Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fluent Compatibility #240

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 58 additions & 12 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ def __init__(self, graph, compatibility_mode=False):
# _license().
self._licenceregister_cache = None

# Cache for Organizations
self._org_cache = None

def _datasets(self):
'''
Generator that returns all DCAT datasets on the graph
Expand Down Expand Up @@ -727,22 +730,25 @@ def _add_list_triples_from_dict(self, _dict, subject, items):

def _add_triples_from_dict(self, _dict, subject, items,
list_value=False,
date_value=False):
date_value=False,
translated=False):
for item in items:
key, predicate, fallbacks, _type = item
self._add_triple_from_dict(_dict, subject, predicate, key,
fallbacks=fallbacks,
list_value=list_value,
date_value=date_value,
_type=_type)
_type=_type,
translated=translated)

def _add_triple_from_dict(self, _dict, subject, predicate, key,
fallbacks=None,
list_value=False,
date_value=False,
_type=Literal,
_datatype=None,
value_modifier=None):
value_modifier=None,
translated=False):
'''
Adds a new triple to the graph with the provided parameters

Expand Down Expand Up @@ -776,6 +782,12 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key,
self._add_date_triple(subject, predicate, value, _type)
elif value:
# Normal text value
if translated and isinstance(value, dict):
# We assume that all translated field values are Literals
for lang, translated_value in value.items():
object = Literal(translated_value, lang=lang)
self.g.add((subject, predicate, object))
return
# ensure URIRef items are preprocessed (space removal/url encoding)
if _type == URIRef:
_type = CleanedURIRef
Expand Down Expand Up @@ -1207,10 +1219,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):

g.add((dataset_ref, RDF.type, DCAT.Dataset))

# Multilingual fields
title_key = 'title_translated' if 'title_translated' in dataset_dict else 'title'
notes_key = 'notes_translated' if 'notes_translated' in dataset_dict else 'notes'
items = [
(title_key, DCT.title, None, Literal),
(notes_key, DCT.description, None, Literal),
]
self._add_triples_from_dict(dataset_dict, dataset_ref, items, translated=True)

# Basic fields
items = [
('title', DCT.title, None, Literal),
JVickery-TBS marked this conversation as resolved.
Show resolved Hide resolved
('notes', DCT.description, None, Literal),
('url', DCAT.landingPage, None, URIRef),
('identifier', DCT.identifier, ['guid', 'id'], URIRefOrLiteral),
('version', OWL.versionInfo, ['dcat_version'], Literal),
Expand All @@ -1223,8 +1242,14 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
self._add_triples_from_dict(dataset_dict, dataset_ref, items)

# Tags
for tag in dataset_dict.get('tags', []):
g.add((dataset_ref, DCAT.keyword, Literal(tag['name'])))
tags = dataset_dict.get('tags_translated', dataset_dict.get('tags', []))
for tag in tags:
if 'name' in tag:
g.add((dataset_ref, DCAT.keyword, Literal(tag['name'])))
else:
# translated tags are stored as {'lang': ['tag1', 'tag2', ...]}
for translated_value in tags[tag]:
g.add((dataset_ref, DCAT.keyword, Literal(translated_value, lang=tag)))

# Dates
items = [
Expand Down Expand Up @@ -1306,9 +1331,23 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
# If no name but an URI is available, the name literal remains empty to
# avoid mixing organization and dataset values.
if not publisher_name and not publisher_uri and dataset_dict.get('organization'):
publisher_name = dataset_dict['organization']['title']

g.add((publisher_details, FOAF.name, Literal(publisher_name)))
org_id = dataset_dict["organization"]["id"]
org_dict = None
if org_id in self._org_cache:
org_dict = self._org_cache[org_id]
else:
try:
org_dict = toolkit.get_action(u'organization_show')({u'ignore_auth': True},
{u'id': org_id})
self._org_cache[org_id] = org_dict
except toolkit.ObjectNotFound:
pass
if org_dict:
title_key = 'title_translated' if 'title_translated' in org_dict else 'title'
items = [(title_key, FOAF.name, None, Literal)]
self._add_triples_from_dict(org_dict, publisher_details, items, translated=True)
else:
g.add((publisher_details, FOAF.name, Literal(publisher_name)))
# TODO: It would make sense to fallback these to organization
# fields but they are not in the default schema and the
# `organization` object in the dataset_dict does not include
Expand Down Expand Up @@ -1364,10 +1403,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):

g.add((distribution, RDF.type, DCAT.Distribution))

# Multilingual fields
name_key = 'name_translated' if 'name_translated' in resource_dict else 'name'
description_key = 'description_translated' if 'description_translated' in resource_dict else 'description'
items = [
(name_key, DCT.title, None, Literal),
(description_key, DCT.description, None, Literal),
]
self._add_triples_from_dict(resource_dict, distribution, items, translated=True)

# Simple values
items = [
('name', DCT.title, None, Literal),
('description', DCT.description, None, Literal),
('status', ADMS.status, None, URIRefOrLiteral),
('rights', DCT.rights, None, URIRefOrLiteral),
('license', DCT.license, None, URIRefOrLiteral),
Expand Down
10 changes: 6 additions & 4 deletions ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ckanext.dcat import utils
from ckanext.dcat.processors import RDFSerializer
from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT,
DISTRIBUTION_LICENSE_FALLBACK_CONFIG)
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
from ckanext.dcat.tests.utils import BaseSerializeTest
Expand Down Expand Up @@ -398,13 +398,15 @@ def test_publisher_extras(self):
assert self._triple(g, publisher, DCT.type, URIRef(extras['publisher_type']))

def test_publisher_org(self):
org = factories.Organization()

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'organization': {
'id': '',
'name': 'publisher1',
'title': 'Example Publisher from Org',
'id': org['id'],
'name': org['name'],
'title': org['title'],
}
}

Expand Down
Loading