From 5483a80dbbb4034337323cedc674a56dc218184c Mon Sep 17 00:00:00 2001 From: steveoni Date: Mon, 20 Nov 2023 18:13:15 +0100 Subject: [PATCH 01/33] dcat json-rpc --- ckanext/opendatani/json_dcat.py | 392 ++++++++++++++++++++++++++++++++ ckanext/opendatani/plugin.py | 2 +- setup.py | 2 + 3 files changed, 395 insertions(+), 1 deletion(-) create mode 100644 ckanext/opendatani/json_dcat.py diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py new file mode 100644 index 0000000..3bc513c --- /dev/null +++ b/ckanext/opendatani/json_dcat.py @@ -0,0 +1,392 @@ +from builtins import str +import json +import logging +from hashlib import sha1 +import traceback +import uuid + +import requests + +from ckan import model +from ckan import logic +from ckan import plugins as p +from ckanext.harvest.model import HarvestObject, HarvestObjectExtra + +from ckanext.dcat import converters +from ckanext.dcat.harvesters.base import DCATHarvester +from sqlalchemy.orm import Query + +log = logging.getLogger(__name__) + + +def _remove_extra(key, dataset_dict): + dataset_dict['extras'][:] = [e + for e in dataset_dict['extras'] + if e['key'] != key] + +class NsiraJSONHarvester(DCATHarvester): + + def info(self): + return { + 'name': 'nsira_dcatjson', + 'title': 'NSIRA DCAT JSON Harvester', + 'description': 'Harvester for DCAT dataset descriptions ' + + 'serialized as JSON' + } + + + def _get_guids_and_datasets(self, content): + doc = json.loads(content) + + + if isinstance(doc, list): + # Assume a list of datasets + datasets = doc + elif isinstance(doc, dict): + datasets = [doc] + else: + raise ValueError('Wrong JSON object') + + for dataset in datasets: + + dataset_copy = { + "title": dataset['label'], + "name": dataset['extension']['matrix'], + "description": dataset['note'][0], + "identifier": dataset['extension']['matrix'], + "modified": dataset['updated'], + "landingPage": "", + "publisher": { + "name": dataset['extension']['contact'].get('name', ''), + "mbox": dataset['extension']['contact'].get('email', '') + }, + + "language": [ + "en" + ], + "distribution": [] + } + + for resource in dataset['link']['alternate']: + dataset_copy['distribution'].append({ + 'title': resource['type'].split("/")[1], + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': resource['type'] + }) + + + + as_string = json.dumps(dataset_copy) + # Get identifier + guid = dataset_copy.get('identifier') + if not guid: + # This is bad, any ideas welcomed + guid = sha1(as_string).hexdigest() + + yield guid, as_string + + def _get_package_dict(self, harvest_object): + + content = harvest_object.content + + dcat_dict = json.loads(content) + + package_dict = converters.dcat_to_ckan(dcat_dict) + + package_dict['name'] = dcat_dict['name'].lower() + + return package_dict, dcat_dict + + def gather_stage(self, harvest_job): + log.debug('In DCATJSONHarvester gather_stage steve') + + ids = [] + + # Get the previous guids for this source + query = \ + model.Session.query(HarvestObject.guid, HarvestObject.package_id) \ + .filter(HarvestObject.current == True) \ + .filter(HarvestObject.harvest_source_id == harvest_job.source.id) + guid_to_package_id = {} + + for guid, package_id in query: + guid_to_package_id[guid] = package_id + + guids_in_db = list(guid_to_package_id.keys()) + guids_in_source = [] + + # Get file contents + url = harvest_job.source.url + + previous_guids = [] + page = 1 + while True: + + try: + content, content_type = \ + self._get_content_and_type(url, harvest_job, page) + except requests.exceptions.HTTPError as error: + if error.response.status_code == 404: + if page > 1: + # Server returned a 404 after the first page, no more + # records + log.debug('404 after first page, no more pages') + break + else: + # Proper 404 + msg = 'Could not get content. Server responded with ' \ + '404 Not Found' + self._save_gather_error(msg, harvest_job) + return None + else: + # This should never happen. Raising just in case. + raise + + if not content: + return None + + try: + + batch_guids = [] + for guid, as_string in self._get_guids_and_datasets(content): + log.debug('Got identifier: {0}' + .format(guid.encode('utf8'))) + + batch_guids.append(guid) + + if guid not in previous_guids: + + if guid in guids_in_db: + # Dataset needs to be udpated + obj = HarvestObject( + guid=guid, job=harvest_job, + package_id=guid_to_package_id[guid], + content=as_string, + extras=[HarvestObjectExtra(key='status', + value='change')]) + else: + # Dataset needs to be created + obj = HarvestObject( + guid=guid, job=harvest_job, + content=as_string, + extras=[HarvestObjectExtra(key='status', + value='new')]) + obj.save() + ids.append(obj.id) + + if len(batch_guids) > 0: + guids_in_source.extend(set(batch_guids) + - set(previous_guids)) + else: + log.debug('Empty document, no more records') + # Empty document, no more ids + break + + except ValueError as e: + msg = 'Error parsing file: {0}'.format(str(e)) + self._save_gather_error(msg, harvest_job) + return None + + if sorted(previous_guids) == sorted(batch_guids): + # Server does not support pagination or no more pages + log.debug('Same content, no more pages') + break + + page = page + 1 + + previous_guids = batch_guids + + # Check datasets that need to be deleted + guids_to_delete = set(guids_in_db) - set(guids_in_source) + for guid in guids_to_delete: + obj = HarvestObject( + guid=guid, job=harvest_job, + package_id=guid_to_package_id[guid], + extras=[HarvestObjectExtra(key='status', value='delete')]) + ids.append(obj.id) + model.Session.query(HarvestObject).\ + filter_by(guid=guid).\ + update({'current': False}, False) + obj.save() + + return ids + + def fetch_stage(self, harvest_object): + return True + + def import_stage(self, harvest_object): + log.debug('In DCATJSONHarvester import_stage') + if not harvest_object: + log.error('No harvest object received') + return False + + if self.force_import: + status = 'change' + else: + status = self._get_object_extra(harvest_object, 'status') + + if status == 'delete': + # Delete package + context = {'model': model, 'session': model.Session, + 'user': self._get_user_name()} + + p.toolkit.get_action('package_delete')( + context, {'id': harvest_object.package_id}) + log.info('Deleted package {0} with guid {1}' + .format(harvest_object.package_id, harvest_object.guid)) + + return True + + if harvest_object.content is None: + self._save_object_error( + 'Empty content for object %s' % harvest_object.id, + harvest_object, 'Import') + return False + + # Get the last harvested object (if any) + previous_object = model.Session.query(HarvestObject) \ + .filter(HarvestObject.guid == harvest_object.guid) \ + .filter(HarvestObject.current == True) \ + .first() + + # Flag previous object as not current anymore + if previous_object and not self.force_import: + previous_object.current = False + previous_object.add() + + package_dict, dcat_dict = self._get_package_dict(harvest_object) + if not package_dict: + return False + + if not package_dict.get('name'): + package_dict['name'] = \ + self._get_package_name(harvest_object, package_dict['title']) + + # copy across resource ids from the existing dataset, otherwise they'll + # be recreated with new ids + if status == 'change': + existing_dataset = self._get_existing_dataset(harvest_object.guid) + if existing_dataset: + copy_across_resource_ids(existing_dataset, package_dict) + + # Allow custom harvesters to modify the package dict before creating + # or updating the package + package_dict = self.modify_package_dict(package_dict, + dcat_dict, + harvest_object) + # Unless already set by an extension, get the owner organization (if + # any) from the harvest source dataset + if not package_dict.get('owner_org'): + source_dataset = model.Package.get(harvest_object.source.id) + if source_dataset.owner_org: + package_dict['owner_org'] = source_dataset.owner_org + + # Flag this object as the current one + harvest_object.current = True + harvest_object.add() + + context = { + 'user': self._get_user_name(), + 'return_id_only': True, + 'ignore_auth': True, + } + + try: + if status == 'new': + package_schema = logic.schema.default_create_package_schema() + log.info("steve here: package_schema is: {}".format(package_schema)) + context['schema'] = package_schema + + # We need to explicitly provide a package ID + package_dict['id'] = str(uuid.uuid4()) + package_schema['id'] = [str] + + # Save reference to the package on the object + harvest_object.package_id = package_dict['id'] + harvest_object.add() + + # Defer constraints and flush so the dataset can be indexed with + # the harvest object id (on the after_show hook from the harvester + # plugin) + model.Session.execute( + 'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.flush() + + elif status == 'change': + package_dict['id'] = harvest_object.package_id + + if status in ['new', 'change']: + action = 'package_create' if status == 'new' else 'package_update' + message_status = 'Created' if status == 'new' else 'Updated' + package_dict['frequency'] = 'notPlanned' + package_dict['topic_category'] = 'location' + package_dict['lineage'] = 'NISRA' + package_dict['contact_name'] = 'OSNI Mapping Helpdesk' + package_dict['contact_email'] = 'osniopendata@dfpni.gov.uk' + package_dict['license_id'] = 'uk-ogl' + _remove_extra('contact_name', package_dict) + _remove_extra('contact_email', package_dict) + package_id = p.toolkit.get_action(action)(context, package_dict) + log.info('%s dataset with id %s', message_status, package_id) + + except Exception as e: + dataset = json.loads(harvest_object.content) + dataset_name = dataset.get('name', '') + + self._save_object_error('Error importing dataset %s: %r / %s' % (dataset_name, e, traceback.format_exc()), harvest_object, 'Import') + return False + + finally: + model.Session.commit() + + return True + +def copy_across_resource_ids(existing_dataset, harvested_dataset): + '''Compare the resources in a dataset existing in the CKAN database with + the resources in a freshly harvested copy, and for any resources that are + the same, copy the resource ID into the harvested_dataset dict. + ''' + # take a copy of the existing_resources so we can remove them when they are + # matched - we don't want to match them more than once. + existing_resources_still_to_match = \ + [r for r in existing_dataset.get('resources')] + + # we match resources a number of ways. we'll compute an 'identity' of a + # resource in both datasets and see if they match. + # start with the surest way of identifying a resource, before reverting + # to closest matches. + resource_identity_functions = [ + lambda r: r['uri'], # URI is best + lambda r: (r['url'], r['title'], r['format']), + lambda r: (r['url'], r['title']), + lambda r: r['url'], # same URL is fine if nothing else matches + ] + + for resource_identity_function in resource_identity_functions: + # calculate the identities of the existing_resources + existing_resource_identities = {} + for r in existing_resources_still_to_match: + try: + identity = resource_identity_function(r) + existing_resource_identities[identity] = r + except KeyError: + pass + + # calculate the identities of the harvested_resources + for resource in harvested_dataset.get('resources'): + try: + identity = resource_identity_function(resource) + except KeyError: + identity = None + if identity and identity in existing_resource_identities: + # we got a match with the existing_resources - copy the id + matching_existing_resource = \ + existing_resource_identities[identity] + resource['id'] = matching_existing_resource['id'] + # make sure we don't match this existing_resource again + del existing_resource_identities[identity] + existing_resources_still_to_match.remove( + matching_existing_resource) + if not existing_resources_still_to_match: + break diff --git a/ckanext/opendatani/plugin.py b/ckanext/opendatani/plugin.py index 361f709..6bfc83e 100644 --- a/ckanext/opendatani/plugin.py +++ b/ckanext/opendatani/plugin.py @@ -1,7 +1,7 @@ import datetime # from pylons import config from ckan.common import config -import routes.mapper +# import routes.mapper import logging import ckan.plugins as plugins diff --git a/setup.py b/setup.py index e275997..bf0b7aa 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ entry_points=''' [ckan.plugins] opendatani=ckanext.opendatani.plugin:OpendataniPlugin + nisra_jsondcat=ckanext.opendatani.json_dcat:NsiraJSONHarvester [paste.paster_command] create_featured_groups=ckanext.opendatani.commands:CreateFeaturedGroups @@ -91,6 +92,7 @@ esri_arcgis_profile=ckanext.opendatani.dcat:EsriArcGISProfile daera_core_profile=ckanext.opendatani.dcat:DaeraCoreProfile nisra_profile=ckanext.opendatani.dcat:NisraProfile + ''', ) From 99a462bc6ff356ae3f4e45d63186e371eac47da0 Mon Sep 17 00:00:00 2001 From: steveoni Date: Fri, 22 Mar 2024 17:20:50 +0100 Subject: [PATCH 02/33] change frequency --- ckanext/opendatani/json_dcat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 3bc513c..531ad7c 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -15,6 +15,7 @@ from ckanext.dcat import converters from ckanext.dcat.harvesters.base import DCATHarvester from sqlalchemy.orm import Query +import datetime log = logging.getLogger(__name__) @@ -319,7 +320,7 @@ def import_stage(self, harvest_object): if status in ['new', 'change']: action = 'package_create' if status == 'new' else 'package_update' message_status = 'Created' if status == 'new' else 'Updated' - package_dict['frequency'] = 'notPlanned' + package_dict['frequency'] = 'montly' package_dict['topic_category'] = 'location' package_dict['lineage'] = 'NISRA' package_dict['contact_name'] = 'OSNI Mapping Helpdesk' From 004040a484a67b29fca5b43e417ef85768f14378 Mon Sep 17 00:00:00 2001 From: steveoni Date: Fri, 22 Mar 2024 19:31:32 +0100 Subject: [PATCH 03/33] fix created date --- .../templates/package/resource_read.html | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 ckanext/opendatani/templates/package/resource_read.html diff --git a/ckanext/opendatani/templates/package/resource_read.html b/ckanext/opendatani/templates/package/resource_read.html new file mode 100644 index 0000000..5bebbc9 --- /dev/null +++ b/ckanext/opendatani/templates/package/resource_read.html @@ -0,0 +1,62 @@ +{% ckan_extends %} {% block primary_content %} +{% block resource_additional_information %} +{% if res %} +
+
+ {{res}} +

{{ _('Additional Information') }}

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% for key, value in h.format_resource_items(res.items()) %} {% if key + not in ('created','Created', 'metadata modified', 'last modified', + 'format') %} + + + + + {% endif %} {% endfor %} + +
{{ _('Field') }}{{ _('Value') }}
{{ _('Data last updated') }} + {{ h.render_datetime(res.last_modified) or + h.render_datetime(res.created) or _('unknown') }} +
{{ _('Metadata last updated') }} + {{ h.render_datetime(res.metadata_modified) or + h.render_datetime(res.created) or _('unknown') }} +
{{ _('Created') }}{{ h.render_datetime(res.Created) or _('unknown') }}
{{ _('Format') }} + {{ res.format or res.mimetype_inner or res.mimetype or _('unknown') + }} +
{{ _('License') }} + {% snippet "snippets/license.html", pkg_dict=pkg, text_only=True %} +
{{ key | capitalize }}{{ value }}
+
+
+{% endif %} {% endblock %} {% endblock %} From 9e106c9977f31ba2b9e6ecec0b7937a652b5c916 Mon Sep 17 00:00:00 2001 From: steveoni Date: Fri, 22 Mar 2024 19:34:44 +0100 Subject: [PATCH 04/33] update created --- ckanext/opendatani/templates/package/resource_read.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/opendatani/templates/package/resource_read.html b/ckanext/opendatani/templates/package/resource_read.html index 5bebbc9..23eae37 100644 --- a/ckanext/opendatani/templates/package/resource_read.html +++ b/ckanext/opendatani/templates/package/resource_read.html @@ -32,7 +32,7 @@

{{ _('Additional Information') }}

{{ _('Created') }} - {{ h.render_datetime(res.Created) or _('unknown') }} + {{ h.render_datetime(res.created) or h.render_datetime(res.Created) or _('unknown') }} {{ _('Format') }} From 558aea1880745fc48873ce127f7c4512efa64747 Mon Sep 17 00:00:00 2001 From: steveoni Date: Fri, 22 Mar 2024 19:35:04 +0100 Subject: [PATCH 05/33] remove comment --- ckanext/opendatani/templates/package/resource_read.html | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/opendatani/templates/package/resource_read.html b/ckanext/opendatani/templates/package/resource_read.html index 23eae37..0cc98a9 100644 --- a/ckanext/opendatani/templates/package/resource_read.html +++ b/ckanext/opendatani/templates/package/resource_read.html @@ -3,7 +3,6 @@ {% if res %}
- {{res}}

{{ _('Additional Information') }}

Date: Fri, 22 Mar 2024 20:32:19 +0100 Subject: [PATCH 06/33] updated frequency value --- ckanext/opendatani/json_dcat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 531ad7c..27acbc5 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -320,7 +320,7 @@ def import_stage(self, harvest_object): if status in ['new', 'change']: action = 'package_create' if status == 'new' else 'package_update' message_status = 'Created' if status == 'new' else 'Updated' - package_dict['frequency'] = 'montly' + package_dict['frequency'] = 'monthly' package_dict['topic_category'] = 'location' package_dict['lineage'] = 'NISRA' package_dict['contact_name'] = 'OSNI Mapping Helpdesk' From d973332ee2bf8c28c621f8e0b5e5dbd52da8dcaf Mon Sep 17 00:00:00 2001 From: steveoni Date: Tue, 26 Mar 2024 17:19:52 +0100 Subject: [PATCH 07/33] update readme --- README.rst | 22 ++++++++++++++++++++++ ckanext/opendatani/json_dcat.py | 4 ++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index e286af2..edf8c43 100644 --- a/README.rst +++ b/README.rst @@ -136,3 +136,25 @@ To publish a new version to PyPI follow these steps: git tag 0.0.2 git push --tags + +-------------------------------------- +NSIRA Json (Restful) Harvester Mapping +-------------------------------------- + +The following is a mapping of the NSIRA Json to the DCAT schema before it is ingested into CKAN. + +``` +title <-- label +name <-- extension.matrix +description <-- note[0] +identifier <-- extension.matrix +modified <-- updated +language <-- en +distribution <-- link.alternate +distribution.title <-- link.alternate[i].type.split("/")[1] +distribution.accessURL <-- link.alternate[i].href +distribution.downloadURL <-- link.alternate[i].href +distribution.format <-- link.alternate[i].type +``` + + diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 27acbc5..193a80b 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -30,8 +30,8 @@ class NsiraJSONHarvester(DCATHarvester): def info(self): return { 'name': 'nsira_dcatjson', - 'title': 'NSIRA DCAT JSON Harvester', - 'description': 'Harvester for DCAT dataset descriptions ' + + 'title': 'NSIRA JSON (Restful) Harvester', + 'description': 'Harvester for Restful dataset descriptions ' + 'serialized as JSON' } From 3f39d8fb4ad18055d35a181143312a81b978923e Mon Sep 17 00:00:00 2001 From: steveoni Date: Wed, 27 Mar 2024 15:17:21 +0100 Subject: [PATCH 08/33] udpate contact name, update tags and categrory --- ckanext/opendatani/json_dcat.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 193a80b..40a6176 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -61,6 +61,8 @@ def _get_guids_and_datasets(self, content): "name": dataset['extension']['contact'].get('name', ''), "mbox": dataset['extension']['contact'].get('email', '') }, + "fn": dataset['extension']['contact'].get('name', ''), + "hasEmail": dataset['extension']['contact'].get('email', ''), "language": [ "en" @@ -256,6 +258,7 @@ def import_stage(self, harvest_object): previous_object.current = False previous_object.add() + package_dict, dcat_dict = self._get_package_dict(harvest_object) if not package_dict: return False @@ -321,13 +324,12 @@ def import_stage(self, harvest_object): action = 'package_create' if status == 'new' else 'package_update' message_status = 'Created' if status == 'new' else 'Updated' package_dict['frequency'] = 'monthly' - package_dict['topic_category'] = 'location' + package_dict['topic_category'] = 'governmentstatistics' package_dict['lineage'] = 'NISRA' - package_dict['contact_name'] = 'OSNI Mapping Helpdesk' - package_dict['contact_email'] = 'osniopendata@dfpni.gov.uk' + package_dict['contact_name'] = dcat_dict.get('fn', '') + package_dict['contact_email'] = dcat_dict.get('hasEmail', '') + package_dict['tags'] = [{'name': 'Goverment statistics'}] package_dict['license_id'] = 'uk-ogl' - _remove_extra('contact_name', package_dict) - _remove_extra('contact_email', package_dict) package_id = p.toolkit.get_action(action)(context, package_dict) log.info('%s dataset with id %s', message_status, package_id) From bae7c6a88092bcfe45c6d8ba1af222825b2d21e0 Mon Sep 17 00:00:00 2001 From: steveoni Date: Wed, 27 Mar 2024 15:17:46 +0100 Subject: [PATCH 09/33] add government statistics to Topics category --- ckanext/opendatani/dataset_schema.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ckanext/opendatani/dataset_schema.json b/ckanext/opendatani/dataset_schema.json index 486933f..4819c52 100644 --- a/ckanext/opendatani/dataset_schema.json +++ b/ckanext/opendatani/dataset_schema.json @@ -33,6 +33,10 @@ "validators": "scheming_required at_least_n_choices(1) scheming_multiple_choice", "output_validators": "scheming_multiple_choice_output", "choices": [ + { + "value": "governmentstatistics", + "label": "Government statistics" + }, { "value": "farming", "label": "Farming" From 101e11ea38c50cb55ae4d9b95b50cb52453bee94 Mon Sep 17 00:00:00 2001 From: steveoni Date: Tue, 21 May 2024 17:31:34 +0100 Subject: [PATCH 10/33] add multiple list fetch --- ckanext/opendatani/json_dcat.py | 137 +++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 4 deletions(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 40a6176..a35b4eb 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -6,6 +6,8 @@ import uuid import requests +import rdflib +import os from ckan import model from ckan import logic @@ -16,6 +18,9 @@ from ckanext.dcat.harvesters.base import DCATHarvester from sqlalchemy.orm import Query import datetime +import six +from ckanext.dcat.interfaces import IDCATRDFHarvester + log = logging.getLogger(__name__) @@ -34,6 +39,123 @@ def info(self): 'description': 'Harvester for Restful dataset descriptions ' + 'serialized as JSON' } + + def _get_content_and_type(self, url, harvest_job, page=1, + content_type=None): + ''' + Gets the content and type of the given url. + + :param url: a web url (starting with http) or a local path + :param harvest_job: the job, used for error reporting + :param page: adds paging to the url + :param content_type: will be returned as type + :return: a tuple containing the content and content-type + ''' + + if not url.lower().startswith('http'): + # Check local file + if os.path.exists(url): + with open(url, 'r') as f: + content = f.read() + content_type = content_type or rdflib.util.guess_format(url) + return content, content_type + else: + self._save_gather_error('Could not get content for this url', + harvest_job) + return None, None + + try: + + if page > 1: + url = url + '&' if '?' in url else url + '?' + url = url + 'page={0}'.format(page) + + log.debug('Getting file %s', url) + + # get the `requests` session object + session = requests.Session() + for harvester in p.PluginImplementations(IDCATRDFHarvester): + session = harvester.update_session(session) + + # first we try a HEAD request which may not be supported + did_get = False + r = session.head(url) + + if r.status_code == 405 or r.status_code == 400: + r = session.get(url, stream=True) + did_get = True + r.raise_for_status() + + cl = r.headers.get('content-length') + if cl and int(cl) > self.MAX_FILE_SIZE: + msg = '''Remote file is too big. Allowed + file size: {allowed}, Content-Length: {actual}.'''.format( + allowed=self.MAX_FILE_SIZE, actual=cl) + self._save_gather_error(msg, harvest_job) + return None, None + + if not did_get: + r = session.get(url, stream=True) + + length = 0 + content = '' if six.PY2 else b'' + for chunk in r.iter_content(chunk_size=self.CHUNK_SIZE): + content = content + chunk + + length += len(chunk) + + if length >= self.MAX_FILE_SIZE: + self._save_gather_error('Remote file is too big.', + harvest_job) + return None, None + + if not six.PY2: + content = content.decode('utf-8') + + if content_type is None and r.headers.get('content-type'): + content_type = r.headers.get('content-type').split(";", 1)[0] + + + # if content is a JSON array of URLS, fetch each url + try: + urls = json.loads(content) + if isinstance(urls, list) and all(isinstance(u, str) for u in urls): + combined_content = [] + for package_url in urls: + package_content, _ = self._get_content_and_type(package_url, harvest_job) + if package_content: + combined_content.append(json.loads(package_content)) + content = json.dumps(combined_content).encode('utf-8') + content_type = 'application/json' + if not six.PY2: + content = content.decode('utf-8') + except json.JSONDecodeError: + self._save_gather_error('Could not parse content as JSON', harvest_job) + return None, None + + + return content, content_type + + except requests.exceptions.HTTPError as error: + if page > 1 and error.response.status_code == 404: + # We want to catch these ones later on + raise + + msg = 'Could not get content from %s. Server responded with %s %s'\ + % (url, error.response.status_code, error.response.reason) + self._save_gather_error(msg, harvest_job) + return None, None + except requests.exceptions.ConnectionError as error: + msg = '''Could not get content from %s because a + connection error occurred. %s''' % (url, error) + self._save_gather_error(msg, harvest_job) + return None, None + except requests.exceptions.Timeout as error: + msg = 'Could not get content from %s because the connection timed'\ + ' out.' % url + self._save_gather_error(msg, harvest_job) + return None, None + def _get_guids_and_datasets(self, content): @@ -49,9 +171,16 @@ def _get_guids_and_datasets(self, content): raise ValueError('Wrong JSON object') for dataset in datasets: - + filtered_keys = [key for key in dataset["dimension"] if key not in ("STATISTIC", "TLIST(A1)")] + labels = [dataset["dimension"][key]["label"] for key in filtered_keys] + + if len(labels) == 1: + output_string = labels[0] + else: + output_string = " by ".join(labels[:-1]) + " and " + labels[-1] + dataset_copy = { - "title": dataset['label'], + "title": dataset['label'] + " "+ "by " + output_string, "name": dataset['extension']['matrix'], "description": dataset['note'][0], "identifier": dataset['extension']['matrix'], @@ -299,7 +428,7 @@ def import_stage(self, harvest_object): try: if status == 'new': package_schema = logic.schema.default_create_package_schema() - log.info("steve here: package_schema is: {}".format(package_schema)) + context['schema'] = package_schema # We need to explicitly provide a package ID @@ -328,7 +457,7 @@ def import_stage(self, harvest_object): package_dict['lineage'] = 'NISRA' package_dict['contact_name'] = dcat_dict.get('fn', '') package_dict['contact_email'] = dcat_dict.get('hasEmail', '') - package_dict['tags'] = [{'name': 'Goverment statistics'}] + package_dict['tags'] = [{'name': 'Experimental'}, {'name': 'Official Statistics'}] package_dict['license_id'] = 'uk-ogl' package_id = p.toolkit.get_action(action)(context, package_dict) log.info('%s dataset with id %s', message_status, package_id) From c977a05d048b0ebc6a989d14e344733506d87def Mon Sep 17 00:00:00 2001 From: steveoni Date: Thu, 23 May 2024 07:32:22 +0100 Subject: [PATCH 11/33] add new metadata --- ckanext/opendatani/dataset_schema.json | 5 +++++ ckanext/opendatani/json_dcat.py | 1 + ckanext/opendatani/templates/package/resource_read.html | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ckanext/opendatani/dataset_schema.json b/ckanext/opendatani/dataset_schema.json index 4819c52..4ab3133 100644 --- a/ckanext/opendatani/dataset_schema.json +++ b/ckanext/opendatani/dataset_schema.json @@ -209,6 +209,11 @@ "required": true, "validators": "boolean_validator datasets_with_no_organization_cannot_be_private opendatani_private_datasets", "form_snippet": "visibility_form_field.html" + }, + { + "field_name": "source_last_updated", + "preset": "datetime", + "label": "Source last updated" } ], "resource_fields": [ diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index a35b4eb..5c8be4c 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -459,6 +459,7 @@ def import_stage(self, harvest_object): package_dict['contact_email'] = dcat_dict.get('hasEmail', '') package_dict['tags'] = [{'name': 'Experimental'}, {'name': 'Official Statistics'}] package_dict['license_id'] = 'uk-ogl' + package_dict['source_last_updated'] = dcat_dict.get('modified', '')[:19].replace('.', '') package_id = p.toolkit.get_action(action)(context, package_dict) log.info('%s dataset with id %s', message_status, package_id) diff --git a/ckanext/opendatani/templates/package/resource_read.html b/ckanext/opendatani/templates/package/resource_read.html index 0cc98a9..0174071 100644 --- a/ckanext/opendatani/templates/package/resource_read.html +++ b/ckanext/opendatani/templates/package/resource_read.html @@ -15,18 +15,18 @@

{{ _('Additional Information') }}

- + From 6d604018c109a898e735d008b533476f93fbe347 Mon Sep 17 00:00:00 2001 From: steveoni Date: Mon, 27 May 2024 15:32:18 +0100 Subject: [PATCH 12/33] transform markup syntax to markdown --- ckanext/opendatani/json_dcat.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 5c8be4c..e9622aa 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -20,10 +20,28 @@ import datetime import six from ckanext.dcat.interfaces import IDCATRDFHarvester - +import re log = logging.getLogger(__name__) +def convert_to_html(text): + """Converts text with formatting to HTML. + + Args: + text: The text to be converted. + + Returns: + The converted HTML string. + """ + # Replace bold tags + text = text.replace("[b]", "").replace("[/b]", "") + + # Replace URL links + url_pattern = r'\[url=(https?://[^\]]+|mailto:[^\]]+)\](.*?)\[/url\]' + replacement = r'[\2](\1)' + text = re.sub(url_pattern, replacement, text) + + return text def _remove_extra(key, dataset_dict): dataset_dict['extras'][:] = [e @@ -182,7 +200,7 @@ def _get_guids_and_datasets(self, content): dataset_copy = { "title": dataset['label'] + " "+ "by " + output_string, "name": dataset['extension']['matrix'], - "description": dataset['note'][0], + "description": convert_to_html(dataset['note'][0]), "identifier": dataset['extension']['matrix'], "modified": dataset['updated'], "landingPage": "", From 84a1a095e155cac389ea32c82d0f0ede77c78063 Mon Sep 17 00:00:00 2001 From: steveoni Date: Wed, 29 May 2024 16:32:20 +0100 Subject: [PATCH 13/33] add new metadata --- ckanext/opendatani/dataset_schema.json | 4 ++++ ckanext/opendatani/json_dcat.py | 23 +++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/ckanext/opendatani/dataset_schema.json b/ckanext/opendatani/dataset_schema.json index 4ab3133..67daca1 100644 --- a/ckanext/opendatani/dataset_schema.json +++ b/ckanext/opendatani/dataset_schema.json @@ -214,6 +214,10 @@ "field_name": "source_last_updated", "preset": "datetime", "label": "Source last updated" + }, + { + "field_name": "time_period", + "label": "Time period" } ], "resource_fields": [ diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index e9622aa..d3c7fc5 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -187,6 +187,13 @@ def _get_guids_and_datasets(self, content): datasets = [doc] else: raise ValueError('Wrong JSON object') + + + frequency = { + "TLIST(A1)": "annually", + "TLIST(Q1)": "quarterly", + "TLIST(M1)": "monthly", + } for dataset in datasets: filtered_keys = [key for key in dataset["dimension"] if key not in ("STATISTIC", "TLIST(A1)")] @@ -196,6 +203,13 @@ def _get_guids_and_datasets(self, content): output_string = labels[0] else: output_string = " by ".join(labels[:-1]) + " and " + labels[-1] + + + # get Tlist from dataset using keys in frquency + frequency_key = [key for key in dataset["dimension"] if key in ("TLIST(A1)", "TLIST(Q1)", "TLIST(M1)")] + frequency_key = frequency_key[0] + frequency_value = dataset["dimension"][frequency_key]["category"]["index"] + time_period = f"{frequency_value[0]} - {frequency_value[-1]}" dataset_copy = { "title": dataset['label'] + " "+ "by " + output_string, @@ -214,7 +228,9 @@ def _get_guids_and_datasets(self, content): "language": [ "en" ], - "distribution": [] + "distribution": [], + "frequency": frequency[frequency_key], + "timePeriod": time_period, } for resource in dataset['link']['alternate']: @@ -407,6 +423,8 @@ def import_stage(self, harvest_object): package_dict, dcat_dict = self._get_package_dict(harvest_object) + + if not package_dict: return False @@ -470,7 +488,7 @@ def import_stage(self, harvest_object): if status in ['new', 'change']: action = 'package_create' if status == 'new' else 'package_update' message_status = 'Created' if status == 'new' else 'Updated' - package_dict['frequency'] = 'monthly' + package_dict['frequency'] = dcat_dict.get('frequency', '') package_dict['topic_category'] = 'governmentstatistics' package_dict['lineage'] = 'NISRA' package_dict['contact_name'] = dcat_dict.get('fn', '') @@ -478,6 +496,7 @@ def import_stage(self, harvest_object): package_dict['tags'] = [{'name': 'Experimental'}, {'name': 'Official Statistics'}] package_dict['license_id'] = 'uk-ogl' package_dict['source_last_updated'] = dcat_dict.get('modified', '')[:19].replace('.', '') + package_dict['time_period'] = dcat_dict.get('timePeriod', '') package_id = p.toolkit.get_action(action)(context, package_dict) log.info('%s dataset with id %s', message_status, package_id) From 228ef5005428d8cd2ca258b54a4a59fadd0d2cb6 Mon Sep 17 00:00:00 2001 From: steveoni Date: Wed, 12 Jun 2024 15:17:03 +0100 Subject: [PATCH 14/33] update schema and add new meta --- ckanext/opendatani/dataset_schema.json | 10 +++++++ ckanext/opendatani/json_dcat.py | 41 ++++++++++++++++++++------ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/ckanext/opendatani/dataset_schema.json b/ckanext/opendatani/dataset_schema.json index 67daca1..27e402f 100644 --- a/ckanext/opendatani/dataset_schema.json +++ b/ckanext/opendatani/dataset_schema.json @@ -218,6 +218,16 @@ { "field_name": "time_period", "label": "Time period" + }, + { + "field_name": "title_tags", + "label": "Title+Tags" + }, + { + "field_name": "metatags", + "label": "Meta Tags", + "form_snippet": "markdown.html", + "form_placeholder": "Some useful json element" } ], "resource_fields": [ diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index d3c7fc5..dc830ef 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -210,9 +210,13 @@ def _get_guids_and_datasets(self, content): frequency_key = frequency_key[0] frequency_value = dataset["dimension"][frequency_key]["category"]["index"] time_period = f"{frequency_value[0]} - {frequency_value[-1]}" - + allowed_keys = {"exceptional", "official", "reservation", "archive", "experimental", "analytical"} + tags = {k: v for k, v in dataset["extension"].items() if not isinstance(v, dict) and k in allowed_keys} + + dataset_copy = { - "title": dataset['label'] + " "+ "by " + output_string, + "title": dataset['label'], + "titleTags": dataset['label'] + " "+ "by " + output_string, "name": dataset['extension']['matrix'], "description": convert_to_html(dataset['note'][0]), "identifier": dataset['extension']['matrix'], @@ -231,15 +235,33 @@ def _get_guids_and_datasets(self, content): "distribution": [], "frequency": frequency[frequency_key], "timePeriod": time_period, + "metaTags": json.dumps(tags), } for resource in dataset['link']['alternate']: - dataset_copy['distribution'].append({ - 'title': resource['type'].split("/")[1], - 'accessURL': resource['href'], - 'downloadURL': resource['href'], - 'format': resource['type'] - }) + if resource['type'] == "application/base64": + dataset_copy['distribution'].append({ + 'title': "Xlsx", + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': "xlsx" + }) + + elif resource['type'] == "application/json": + dataset_copy['distribution'].append({ + 'title': f"JSON {resource['href'].split('/')[-2]}", + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': resource['type'] + }) + + else: + dataset_copy['distribution'].append({ + 'title': resource['type'].split("/")[1], + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': resource['type'] + }) @@ -493,10 +515,11 @@ def import_stage(self, harvest_object): package_dict['lineage'] = 'NISRA' package_dict['contact_name'] = dcat_dict.get('fn', '') package_dict['contact_email'] = dcat_dict.get('hasEmail', '') - package_dict['tags'] = [{'name': 'Experimental'}, {'name': 'Official Statistics'}] package_dict['license_id'] = 'uk-ogl' package_dict['source_last_updated'] = dcat_dict.get('modified', '')[:19].replace('.', '') package_dict['time_period'] = dcat_dict.get('timePeriod', '') + package_dict['title_tags'] = dcat_dict.get('titleTags', '') + package_dict['metatags'] = dcat_dict.get('metaTags', '') package_id = p.toolkit.get_action(action)(context, package_dict) log.info('%s dataset with id %s', message_status, package_id) From 85a495eec383335307a563c468fc9e734b09ebb0 Mon Sep 17 00:00:00 2001 From: steveoni Date: Wed, 12 Jun 2024 15:54:27 +0100 Subject: [PATCH 15/33] update json format --- ckanext/opendatani/json_dcat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index dc830ef..ea7a44f 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -252,7 +252,7 @@ def _get_guids_and_datasets(self, content): 'title': f"JSON {resource['href'].split('/')[-2]}", 'accessURL': resource['href'], 'downloadURL': resource['href'], - 'format': resource['type'] + 'format': resource['type']+f"{resource['href'].split('/')[-2]}" }) else: From 8a231e16619191771f32eddc66de33bcd988151e Mon Sep 17 00:00:00 2001 From: steveoni Date: Thu, 13 Jun 2024 18:22:48 +0100 Subject: [PATCH 16/33] update type --- ckanext/opendatani/json_dcat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index ea7a44f..6b96518 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -252,7 +252,7 @@ def _get_guids_and_datasets(self, content): 'title': f"JSON {resource['href'].split('/')[-2]}", 'accessURL': resource['href'], 'downloadURL': resource['href'], - 'format': resource['type']+f"{resource['href'].split('/')[-2]}" + 'format': f"json{resource['href'].split('/')[-2]}" }) else: From 0e62b04aa0f27045b93c19725decc7304c7e0b98 Mon Sep 17 00:00:00 2001 From: steveoni Date: Fri, 5 Jul 2024 13:22:53 +0100 Subject: [PATCH 17/33] comment out --- ckanext/opendatani/plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/opendatani/plugin.py b/ckanext/opendatani/plugin.py index 6bfc83e..ac3ebe4 100644 --- a/ckanext/opendatani/plugin.py +++ b/ckanext/opendatani/plugin.py @@ -171,8 +171,8 @@ def custom_user_create(context, data_dict): def custom_user_update(context, data_dict): - context['schema'] = custom_update_user_schema( - form_schema='password1' in context.get('schema', {})) + # context['schema'] = custom_update_user_schema( + # form_schema='password1' in context.get('schema', {})) return core_user_update(context, data_dict) From c3dc375adb3da12b5346917121bbf28f9dc8e5fa Mon Sep 17 00:00:00 2001 From: steveoni Date: Thu, 29 Aug 2024 17:16:13 +0100 Subject: [PATCH 18/33] fix api token regeneration --- ckanext/opendatani/plugin.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ckanext/opendatani/plugin.py b/ckanext/opendatani/plugin.py index 6bfc83e..5ac8b74 100644 --- a/ckanext/opendatani/plugin.py +++ b/ckanext/opendatani/plugin.py @@ -171,9 +171,6 @@ def custom_user_create(context, data_dict): def custom_user_update(context, data_dict): - context['schema'] = custom_update_user_schema( - form_schema='password1' in context.get('schema', {})) - return core_user_update(context, data_dict) From 6642c6a1d4797b57e15682dbdd8d80bf22e94f9f Mon Sep 17 00:00:00 2001 From: steveoni Date: Thu, 29 Aug 2024 17:16:31 +0100 Subject: [PATCH 19/33] fix ckanext-pages edit button --- ckanext/opendatani/templates/ckanext_pages/base_form.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/opendatani/templates/ckanext_pages/base_form.html b/ckanext/opendatani/templates/ckanext_pages/base_form.html index 40e5dcc..6b97fd3 100644 --- a/ckanext/opendatani/templates/ckanext_pages/base_form.html +++ b/ckanext/opendatani/templates/ckanext_pages/base_form.html @@ -30,7 +30,7 @@ {% set slug_prefix = cancel_url ~ '/' %} {% set slug_domain = h.url_for('pages.index', qualified=true) %} {% if page %} - {% set delete_url = h.url_for('pages.pages_delete', page=data.name) %} + {% set delete_url = h.url_for('pages.delete', page=data.name) %} {% endif %} {% endif %} From e889bffa964499e2451eaf2850669de241c3b1f2 Mon Sep 17 00:00:00 2001 From: steveoni Date: Thu, 29 Aug 2024 17:17:12 +0100 Subject: [PATCH 20/33] fix edit button in organization and also updat tab to have icons --- ckanext/opendatani/templates/organization/edit_base.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckanext/opendatani/templates/organization/edit_base.html b/ckanext/opendatani/templates/organization/edit_base.html index 20d95b2..5e71ea9 100644 --- a/ckanext/opendatani/templates/organization/edit_base.html +++ b/ckanext/opendatani/templates/organization/edit_base.html @@ -1,8 +1,8 @@ {% ckan_extends %} {% block content_primary_nav %} - {{ h.build_nav_icon('organization_edit', _('Edit'), id=c.group_dict.name) }} - {{ h.build_nav_icon('organization_bulk_process', _('Datasets'), id=c.group_dict.name) }} - {{ h.build_nav_icon('organization_members', _('Members'), id=c.group_dict.name) }} - {{ h.build_nav_icon('add_groups', _('Add groups'), id=c.group_dict.name) }} + {{ h.build_nav_icon(group_type + '.edit', _('Edit'), id=group_dict.name, icon='pencil-square-o') }} + {{ h.build_nav_icon(group_type + '.bulk_process', _('Datasets'), id=group_dict.name, icon='sitemap') }} + {{ h.build_nav_icon(group_type + '.members', _('Members'), id=group_dict.name, icon='users') }} + {{ h.build_nav_icon('odni_organization.add_groups', _('Add groups'), id=c.group_dict.name, icon='file') }} {% endblock %} From 9529cff44af25a9ece3f6b9af03717d9633e8467 Mon Sep 17 00:00:00 2001 From: steveoni Date: Thu, 29 Aug 2024 17:17:53 +0100 Subject: [PATCH 21/33] update nav to have the right icon and fix the edit settings button link --- ckanext/opendatani/templates/user/dashboard.html | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ckanext/opendatani/templates/user/dashboard.html b/ckanext/opendatani/templates/user/dashboard.html index a25e369..c232278 100644 --- a/ckanext/opendatani/templates/user/dashboard.html +++ b/ckanext/opendatani/templates/user/dashboard.html @@ -12,19 +12,19 @@ {% block page_header %} {% endblock %} From f7bf133af0e3e5f3492e17847e3cd62d7c3be999 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Thu, 29 Aug 2024 18:09:10 +0000 Subject: [PATCH 22/33] fix: change groups layout via bootstrap v3 --- ckanext/opendatani/assets/css/theme.css | 1 + .../templates/home/snippets/group_item.html | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index c529990..6021c3f 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -449,6 +449,7 @@ h6 { } .homepage .featured-groups li { background-color: #25374d; + margin-bottom: 20px; } .homepage .featured-groups li a { display: table; diff --git a/ckanext/opendatani/templates/home/snippets/group_item.html b/ckanext/opendatani/templates/home/snippets/group_item.html index f1557ab..8f9239e 100644 --- a/ckanext/opendatani/templates/home/snippets/group_item.html +++ b/ckanext/opendatani/templates/home/snippets/group_item.html @@ -1,14 +1,16 @@ {% set type = group.type or 'group' %} {% set url = h.url_for(type ~ '_read', action='read', id=group.name) %} {% block item %} -
  • - - - {{ group.name }} - -

    {{ group.display_name }}

    -
    -
  • + {% endblock %} {% if position is divisibleby 3 %} {% endif %} From 16cfe22702f1ad7c8e2623eeb1be29f3345f0ab9 Mon Sep 17 00:00:00 2001 From: steveoni Date: Fri, 30 Aug 2024 14:05:19 +0100 Subject: [PATCH 23/33] add default values and new method from parent --- ckanext/opendatani/json_dcat.py | 34 +++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py index 6b96518..07231c2 100644 --- a/ckanext/opendatani/json_dcat.py +++ b/ckanext/opendatani/json_dcat.py @@ -57,6 +57,36 @@ def info(self): 'description': 'Harvester for Restful dataset descriptions ' + 'serialized as JSON' } + + def _read_datasets_from_db(self, guid): + ''' + Returns a database result of datasets matching the given guid. + ''' + + datasets = model.Session.query(model.Package.id) \ + .join(model.PackageExtra) \ + .filter(model.PackageExtra.key == 'guid') \ + .filter(model.PackageExtra.value == guid) \ + .filter(model.Package.state == 'active') \ + .all() + return datasets + + def _get_existing_dataset(self, guid): + ''' + Checks if a dataset with a certain guid extra already exists + + Returns a dict as the ones returned by package_show + ''' + + datasets = self._read_datasets_from_db(guid) + + if not datasets: + return None + elif len(datasets) > 1: + log.error('Found more than one dataset with the same guid: {0}' + .format(guid)) + + return p.toolkit.get_action('package_show')({}, {'id': datasets[0][0]}) def _get_content_and_type(self, url, harvest_job, page=1, content_type=None): @@ -226,8 +256,8 @@ def _get_guids_and_datasets(self, content): "name": dataset['extension']['contact'].get('name', ''), "mbox": dataset['extension']['contact'].get('email', '') }, - "fn": dataset['extension']['contact'].get('name', ''), - "hasEmail": dataset['extension']['contact'].get('email', ''), + "fn": dataset['extension']['contact'].get('name', 'not-provided'), + "hasEmail": dataset['extension']['contact'].get('email', 'notprovided@mail.com'), "language": [ "en" From a13e403aa42b72aa5ec49c8e95ba6d2bcab1c949 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Sat, 31 Aug 2024 04:32:24 +0000 Subject: [PATCH 24/33] fix: search input design --- ckanext/opendatani/assets/css/theme.css | 3 ++- ckanext/opendatani/templates/header.html | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index 6021c3f..f53d59f 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -355,6 +355,7 @@ h6 { text-align: center; line-height: 30px; color: rgba(77, 77, 77, 0.25); + left: 10px; } .masthead .site-search label:after { display: none; @@ -371,7 +372,7 @@ h6 { } .masthead .site-search input { padding: 12px 50px 12px 45px; - width: 210px; + width: 306px; background-color: #e8eaed; border-radius: 0; border-color: #e8eaed; diff --git a/ckanext/opendatani/templates/header.html b/ckanext/opendatani/templates/header.html index 4d88189..c83b2d6 100644 --- a/ckanext/opendatani/templates/header.html +++ b/ckanext/opendatani/templates/header.html @@ -72,7 +72,7 @@ ('datarequests.index', _('Suggest Data') + h.get_open_datarequests_badge()), ) }} {% endblock %} -
  • {{ _('News') }}
  • +
  • {{ _('News') }}aa
  • {{ _('About').format(g.site_title) }}
  • {{ _('Contact') }}
  • @@ -80,10 +80,10 @@ {% endblock %} {% block header_site_search %} - -
    - - + +
    + +
    From 5bd45f62df1a3ae1e82e7cf0b6d3f19c2b350043 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Sat, 31 Aug 2024 17:09:53 +0000 Subject: [PATCH 25/33] fix: footer responsive --- ckanext/opendatani/assets/css/theme.css | 2 +- ckanext/opendatani/templates/footer.html | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index f53d59f..c535299 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -372,7 +372,7 @@ h6 { } .masthead .site-search input { padding: 12px 50px 12px 45px; - width: 306px; + width: 300px; background-color: #e8eaed; border-radius: 0; border-color: #e8eaed; diff --git a/ckanext/opendatani/templates/footer.html b/ckanext/opendatani/templates/footer.html index 72178cf..9b86da0 100644 --- a/ckanext/opendatani/templates/footer.html +++ b/ckanext/opendatani/templates/footer.html @@ -2,16 +2,16 @@ {% block footer_content %}
    -
    +
    NI Direct
    -
    +
    Digital NI
    -
    +
    CKAN
    -
    +
    Datopian
    From d718fd9e77128ce7e75d3a3852d4510c44248674 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Sat, 31 Aug 2024 18:59:38 +0000 Subject: [PATCH 26/33] fix: posts ui --- ckanext/opendatani/assets/css/theme.css | 22 ++++++++++++++----- ckanext/opendatani/fanstatic/css/theme.css | 9 ++++---- ckanext/opendatani/fanstatic/less/_reset.less | 4 ++-- ckanext/opendatani/templates/header.html | 1 + ckanext/opendatani/templates/home/index.html | 10 +++------ 5 files changed, 28 insertions(+), 18 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index c535299..3543150 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -29,12 +29,17 @@ body, background-image: none; background-color: inherit; } -.masthead .nav-collapse, + +.navbar { + border: none; /* Overriden main.css */ +} + +.masthead .navbar-collapse, .masthead .section, .navbar .nav { float: none; } -.masthead .nav-collapse { +.masthead .navbar-collapse { padding: 0; } html, @@ -324,17 +329,18 @@ h6 { } .masthead { color: #4d4d4d; + padding: 0; /* Overriden main.css */ } .masthead .logo { display: block; width: 282px; margin: 22px 0; } -.masthead .nav-collapse { +.masthead .navbar-collapse { background-color: #e8eaed; clear: both; position: static; - margin-left: -9999rem; + margin-left: -9998rem; margin-right: -9999rem; padding-left: 9999rem; padding-right: 9999rem; @@ -411,6 +417,7 @@ h6 { font-weight: bold; border-radius: 0; position: relative; + text-decoration: none; /* Overriden main.css */ } .masthead .nav > li > a:hover, .masthead .navigation .nav-pills > li > a:hover { @@ -448,6 +455,11 @@ h6 { border-top: solid 1px #dfe2e6; } } + +.homepage .featured-groups { + padding: 0; +} + .homepage .featured-groups li { background-color: #25374d; margin-bottom: 20px; @@ -461,7 +473,7 @@ h6 { } @media (min-width: 768px) { .homepage .featured-groups li a { - height: 114px; + height: 134px; overflow: hidden; } } diff --git a/ckanext/opendatani/fanstatic/css/theme.css b/ckanext/opendatani/fanstatic/css/theme.css index c529990..fceb1e6 100644 --- a/ckanext/opendatani/fanstatic/css/theme.css +++ b/ckanext/opendatani/fanstatic/css/theme.css @@ -29,12 +29,12 @@ body, background-image: none; background-color: inherit; } -.masthead .nav-collapse, +.masthead .navbar-collapse, .masthead .section, .navbar .nav { float: none; } -.masthead .nav-collapse { +.masthead .navbar-collapse { padding: 0; } html, @@ -330,7 +330,7 @@ h6 { width: 282px; margin: 22px 0; } -.masthead .nav-collapse { +.masthead .navbar-collapse { background-color: #e8eaed; clear: both; position: static; @@ -410,6 +410,7 @@ h6 { font-weight: bold; border-radius: 0; position: relative; + text-decoration: none; } .masthead .nav > li > a:hover, .masthead .navigation .nav-pills > li > a:hover { @@ -459,7 +460,7 @@ h6 { } @media (min-width: 768px) { .homepage .featured-groups li a { - height: 114px; + height: 134px; overflow: hidden; } } diff --git a/ckanext/opendatani/fanstatic/less/_reset.less b/ckanext/opendatani/fanstatic/less/_reset.less index 6e345a2..07b596a 100644 --- a/ckanext/opendatani/fanstatic/less/_reset.less +++ b/ckanext/opendatani/fanstatic/less/_reset.less @@ -8,13 +8,13 @@ body, background-color:inherit; } -.masthead .nav-collapse, +.masthead .navbar-collapse, .masthead .section, .navbar .nav { float:none; } -.masthead .nav-collapse { +.masthead .navbar-collapse { padding:0; } diff --git a/ckanext/opendatani/templates/header.html b/ckanext/opendatani/templates/header.html index c83b2d6..0e9a6bb 100644 --- a/ckanext/opendatani/templates/header.html +++ b/ckanext/opendatani/templates/header.html @@ -77,6 +77,7 @@
  • {{ _('Contact') }}
  • + {% endblock %} {% block header_site_search %} diff --git a/ckanext/opendatani/templates/home/index.html b/ckanext/opendatani/templates/home/index.html index 4fd78eb..0852cf9 100644 --- a/ckanext/opendatani/templates/home/index.html +++ b/ckanext/opendatani/templates/home/index.html @@ -3,25 +3,22 @@ {% block primary_content %}
    - -

    Latest

    {% trans %}More{% endtrans %} - {% set posts = h.get_recent_blog_posts(3) %} - -
    +
    From b74fb10a480144872229bcb5634a687e059fb9cf Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Sat, 31 Aug 2024 19:15:16 +0000 Subject: [PATCH 27/33] fix: news cards --- ckanext/opendatani/assets/css/theme.css | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index 3543150..1474107 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -1062,11 +1062,15 @@ h6 { /* Custom UI Homepage updates */ .latest-posts.thumbnails { margin-top: 30px; + display: flex; + gap: 17px; + padding-left: 0; } + .latest-posts.thumbnails li { border: 1px solid #eee; - margin-left: 17px; min-height: 501px; + padding: 0; } .latest-posts.thumbnails li .post { padding: 10px; @@ -1117,6 +1121,12 @@ h6 { height: 160px; overflow: hidden; } + +/* Overriden main.css */ +.thumb img { + width: 100%; + height: auto; +} @media screen and (max-width: 768px) { .latest-posts .span4 { width: 100%; From 77acc8a6607a2ea0812a543c3e511501dc6faa5e Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Sat, 31 Aug 2024 19:23:38 +0000 Subject: [PATCH 28/33] fix: mobile view --- ckanext/opendatani/assets/css/theme.css | 5 +++++ ckanext/opendatani/templates/header.html | 2 +- ckanext/opendatani/templates/home/index.html | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index 1474107..8c206fa 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -1134,4 +1134,9 @@ h6 { .mar-l-0 { margin-left: 0 !important; } + + .latest-posts.thumbnails { + flex-direction: column; + } + } diff --git a/ckanext/opendatani/templates/header.html b/ckanext/opendatani/templates/header.html index 0e9a6bb..91d36e6 100644 --- a/ckanext/opendatani/templates/header.html +++ b/ckanext/opendatani/templates/header.html @@ -72,7 +72,7 @@ ('datarequests.index', _('Suggest Data') + h.get_open_datarequests_badge()), ) }} {% endblock %} -
  • {{ _('News') }}aa
  • +
  • {{ _('News') }}
  • {{ _('About').format(g.site_title) }}
  • {{ _('Contact') }}
  • diff --git a/ckanext/opendatani/templates/home/index.html b/ckanext/opendatani/templates/home/index.html index 0852cf9..766c3de 100644 --- a/ckanext/opendatani/templates/home/index.html +++ b/ckanext/opendatani/templates/home/index.html @@ -16,7 +16,7 @@

    Latest

    {% for post in posts %} -
  • +
  • From d663e663515927e68274f4599b5fa7211ad54ab8 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Wed, 4 Sep 2024 18:22:45 +0000 Subject: [PATCH 29/33] fix: datasets header --- ckanext/opendatani/assets/css/theme.css | 27 ++++++++++++++++++++ ckanext/opendatani/templates/home/index.html | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index 8c206fa..b7b61b5 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -230,6 +230,11 @@ li { width: 223px; } @media (min-width: 768px) { + + .module.context-info > .module-content { + padding-top: 33px; + } + [role="main"] .wrapper .media-grid .media-item { width: 223px; } @@ -1140,3 +1145,25 @@ h6 { } } + +@media screen and (max-width: 1000px) { + .masthead .navbar-collapse, .toolbar { + padding: 5px 0; + margin: 0; + max-width: calc(100% + 30px); + width: calc(100% + 30px); + padding-left: 15px; + margin-left: -15px; + } + + .module-content.page-header { + margin: 0; + padding: 0; + } +} + +.module-content.page-header { + background: none; + border: none; +} + diff --git a/ckanext/opendatani/templates/home/index.html b/ckanext/opendatani/templates/home/index.html index 766c3de..618bf91 100644 --- a/ckanext/opendatani/templates/home/index.html +++ b/ckanext/opendatani/templates/home/index.html @@ -18,7 +18,7 @@

    Latest

    - +

    {{ post.title or post.name }}

    From 0aec2853ce2310fcd9e9fec137d7bea44e0741e6 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Wed, 4 Sep 2024 18:52:24 +0000 Subject: [PATCH 30/33] fix: hover dark color bug --- ckanext/opendatani/assets/css/theme.css | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index b7b61b5..25667ed 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -1167,3 +1167,10 @@ h6 { border: none; } +.masthead .navigation .nav-pills li a:hover { + background-color: #dfe2e6; +} + +.masthead .navigation .nav-pills li a:focus, .masthead .navigation .nav-pills li.active a { + background: transparent; +} \ No newline at end of file From 63919b19e692cf8fb0d2bbf8fab9bd5cf2e0b832 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Fri, 6 Sep 2024 13:52:41 +0000 Subject: [PATCH 31/33] fix: style news page --- ckanext/opendatani/assets/css/theme.css | 12 ++++++++++++ .../templates/ckanext_pages/snippets/pages_list.html | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index 25667ed..3d4c221 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -1173,4 +1173,16 @@ h6 { .masthead .navigation .nav-pills li a:focus, .masthead .navigation .nav-pills li.active a { background: transparent; +} + +/* Overriding main.css of dev here */ +.wrapper { + border: none; + -webkit-box-shadow: none; + box-shadow: none; + background-color: transparent; +} + +.module-content { + padding: 30px 0; } \ No newline at end of file diff --git a/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html b/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html index e97bb40..28a122b 100644 --- a/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html +++ b/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html @@ -20,11 +20,11 @@

    Pages

    {% endif %}
    {% if page.image %} -
    + -
    +

    {{ page.title }} {% if page.publish_date %} @@ -44,7 +44,7 @@

    {% endif %}

    {% else %} -
    +

    {{ page.title }} {% if page.publish_date %} From e838769b9ba3bac9c57a2a1c738e32262ee5ef99 Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Fri, 6 Sep 2024 14:28:48 +0000 Subject: [PATCH 32/33] fix: change old icons and add contacts icon and functionality --- ckanext/opendatani/assets/css/theme.css | 4 ++++ ckanext/opendatani/templates/header.html | 20 ++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index 3d4c221..ca81270 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -1185,4 +1185,8 @@ h6 { .module-content { padding: 30px 0; +} + +.form-horizontal .form-group { + margin-left: 0; } \ No newline at end of file diff --git a/ckanext/opendatani/templates/header.html b/ckanext/opendatani/templates/header.html index 91d36e6..b37a545 100644 --- a/ckanext/opendatani/templates/header.html +++ b/ckanext/opendatani/templates/header.html @@ -3,18 +3,18 @@ {% block header_account_logged %}
  • - +
  • - +
  • {% if c.userobj.sysadmin %}
  • - + {{ _('Admin') }}
  • @@ -30,7 +30,7 @@
  • {% set notifications_tooltip = ngettext('Dashboard (%(num)d new item)', 'Dashboard (%(num)d new items)', new_activities+stale_datasets|int) %} - + {{ _('Dashboard') }} {{ new_activities + stale_datasets|int }} @@ -38,7 +38,7 @@ {% block header_account_settings_link %}
  • - + {{ _('Settings') }}
  • @@ -46,11 +46,19 @@ {% block header_account_log_out_link %}
  • - + {{ _('Log out') }}
  • {% endblock %} +{% block header_contacts_link %} +
  • + + + {{ _('Contact') }} + +
  • +{% endblock %} {% endblock %} {% block header_account_notlogged %} From 499fe3ba8d4d4b9f002a47d548693eed5dd213cf Mon Sep 17 00:00:00 2001 From: AlmubdyMutaikhan Date: Fri, 6 Sep 2024 14:37:38 +0000 Subject: [PATCH 33/33] fix: remove glyphicons and add fontawesome --- ckanext/opendatani/templates/header.html | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ckanext/opendatani/templates/header.html b/ckanext/opendatani/templates/header.html index b37a545..adb8d65 100644 --- a/ckanext/opendatani/templates/header.html +++ b/ckanext/opendatani/templates/header.html @@ -3,18 +3,18 @@ {% block header_account_logged %}
  • - +
  • - +
  • {% if c.userobj.sysadmin %}
  • - + {{ _('Admin') }}
  • @@ -30,7 +30,7 @@
  • {% set notifications_tooltip = ngettext('Dashboard (%(num)d new item)', 'Dashboard (%(num)d new items)', new_activities+stale_datasets|int) %} - + {{ _('Dashboard') }} {{ new_activities + stale_datasets|int }} @@ -38,7 +38,7 @@ {% block header_account_settings_link %}
  • - + {{ _('Settings') }}
  • @@ -46,7 +46,7 @@ {% block header_account_log_out_link %}
  • - + {{ _('Log out') }}
  • @@ -54,7 +54,7 @@ {% block header_contacts_link %}
  • - + {{ _('Contact') }}
  • {{ _('Metadata last updated') }} {{ h.render_datetime(res.metadata_modified) or - h.render_datetime(res.created) or _('unknown') }} + h.render_datetime(res.metadata_modified) or _('unknown') }}