Get a report from CKAN about publisher usage.

CSV and JSON report on the publisher page.
datopian · Nov 25, 2019 · 32d4abc · 32d4abc
1 parent 1381bd4
commit 32d4abc
Show file tree

Hide file tree

Showing 3 changed files with 193 additions and 1 deletion.
diff --git a/ckanext/opendatani/helpers.py b/ckanext/opendatani/helpers.py
@@ -6,6 +6,16 @@
 import ckan.logic as logic
 import ckan.lib.helpers as h
 
+import logging
+from ckan.plugins import toolkit
+from ckan.common import config
+import csv
+import json
+import os
+
+log = logging.getLogger(__name__)
+
+
 def get_user_name(user):
     if not isinstance(user, model.User):
         user_name = text_type(user)
@@ -105,3 +115,93 @@ def activity_list_to_text(activity_stream):
                               'is_new': activity.get('is_new', False),
                               'dataset_url': get_dataset_url(activity)})
     return activity_list
+
+
+def _get_action(action, context_dict, data_dict):
+    return toolkit.get_action(action)(context_dict, data_dict)
+
+
+def is_admin(user, org):
+    """
+    Returns True if user is site admin or admin of the organization,
+    and the given organization exists.
+    :param user: user name
+    :type user: string
+    :param org: organization name
+    :type org: string
+    :returns: True/False
+    :rtype: boolean
+    """
+
+    user_orgs = _get_action(
+        'organization_list_for_user',
+        {'user': user}, {'user': user})
+
+    return any(
+        [(i.get('capacity') == 'admin' or i.get('sysadmin'))
+         and i.get('name') == org for i in user_orgs])
+
+
+def verify_datasets_exist(org):
+    """
+    Returns True if the number of datasets (including private) for a given
+    organization is greater than 0.
+    :param org: organization name
+    :type org: string
+    :returns: dataset count
+    :rtype: integer
+    """
+
+    return toolkit.get_action('package_search')({}, {
+        'q': 'organization:{0}'.format(org),
+        'include_private': True}).get('count') > 0
+
+
+def prepare_reports(org):
+    """
+    Creates a CSV and JSON publisher report, and stores them under CKAN's
+    storage path in /storage/publisher-reports/.
+    :param org: organization
+    :type org: string
+    :return: a list containing the file_names of the created archives
+    :rtype: list
+    """
+
+    resource = toolkit.get_action(
+        'report_resources_by_organization')({}, {'org_name': org})
+    file_names = []
+    storage_path = config.get('ckan.storage_path')
+    file_path = storage_path + '/storage/publisher-reports/'
+
+    if not os.path.exists(file_path):
+        os.makedirs(file_path)
+
+    for file_type in ['.csv', '.json']:
+        try:
+            file_name = 'publisher-report-' + org + file_type
+
+            if file_type == '.csv':
+                with open(file_path + file_name, 'w') as csvfile:
+                    fields = resource[0].keys()
+                    writer = csv.DictWriter(csvfile, fieldnames=fields,
+                                            quoting=csv.QUOTE_MINIMAL)
+                    writer.writeheader()
+
+                    for data in resource:
+                        writer.writerow(data)
+
+                file_names.append(file_name)
+
+            if file_type == '.json':
+                with open(file_path + file_name, 'w') as jsonfile:
+                    jsonfile.writelines(json.dumps(resource))
+
+                file_names.append(file_name)
+
+        except Exception as ex:
+            log.error(
+                'An error occured while preparing the {0} archive. Error: {1}'
+                .format(file_type, ex))
+            raise
+
+    return file_names
diff --git a/ckanext/opendatani/plugin.py b/ckanext/opendatani/plugin.py
@@ -13,6 +13,11 @@
 from ckanext.opendatani.controller import CustomUserController
 from ckanext.opendatani import helpers
 
+from ckan.common import OrderedDict
+import logging
+
+log = logging.getLogger(__name__)
+
 _ = toolkit._
 
 
@@ -60,6 +65,9 @@ def get_helpers(self):
             'group_list': group_list,
             'package_list': package_list,
             'ni_activity_list_to_text': helpers.activity_list_to_text,
+            'verify_datasets_exist': helpers.verify_datasets_exist,
+            'is_admin': helpers.is_admin,
+            'prepare_reports': helpers.prepare_reports,
         }
 
     # IRoutes
@@ -92,7 +100,6 @@ def before_map(self, map):
             m.connect('/dataset/{id}/resource/{resource_id}',
                       action='resource_read')
 
-
         return map
 
     def after_map(self, map):
@@ -111,6 +118,7 @@ def get_actions(self):
         return {
             'user_create': custom_user_create,
             'user_update': custom_user_update,
+            'report_resources_by_organization': report_resources_by_organization
         }
 
 
@@ -145,6 +153,74 @@ def custom_user_update(context, data_dict):
     return core_user_update(context, data_dict)
 
 
+@toolkit.side_effect_free
+def report_resources_by_organization(context, data_dict):
+    """
+    Returns a list of OrderedDicts (one for each dataset in an organization)
+    sorted by the last modified date, then creation date
+    (if no modifications have been made yet).
+    Each OrderedDict contains the following keys:
+    dataset_name, dataset_url, resource_name, resource_url,
+    dataset_organisation, dataset_organisation_url, resource_created,
+    resource_last_modified, resource_view_count, resource_download_count
+    :return: a sorted list of OrderedDicts
+    :rtype: list
+    """
+
+    user = toolkit.c.user or context.get('name')
+    org = data_dict.get('org_name') or context.get('org')
+    report = []
+
+    if not helpers.verify_datasets_exist(org):
+        return report
+
+    if 'org_name' in data_dict:
+        del data_dict['org_name']
+
+    if not helpers.is_admin(user, org):
+        toolkit.abort(403, _('You are not authorized to access this \
+                      report or the organization does not exist.'))
+
+    data_dict['include_private'] = True
+    data_dict['q'] = 'organization:{0}'.format(org)
+    results = toolkit.get_action('package_search')({}, data_dict)
+
+    for item in results['results']:
+        resources = item['resources']
+        organization = item['organization']
+
+        for resource in resources:
+
+            # resource_view_count depends on tracking_summary, which
+            # doesn't seem to be enabled. Once it's enabled,
+            # resource_view_count will come from
+            # resource.get('tracking_summary').get('total')
+            # For now, there's a shortened version to avoid errors.
+
+            # resource_download_count will also need to be looked into
+            # when tracking_summary is enabled.
+
+            report.append(OrderedDict([
+                ('dataset_name', item.get('title')),
+                ('dataset_url', (
+                    config.get('ckan.site_url') + '/dataset/{0}'
+                    .format(item.get('name')))),
+                ('resource_name', resource.get('name')),
+                ('resource_url', resource.get('url')),
+                ('dataset_organization', organization.get('name')),
+                ('dataset_organization_url', (
+                    config.get('ckan.site_url') + '/organization/{0}'
+                    .format(organization.get('name')))),
+                ('resource_created', resource.get('created')),
+                ('resource_last_modified', resource.get('last_modified')),
+                ('resource_view_count', resource.get('tracking_summary', 0)),
+                ('resource_download_count', resource.get('downloads', 0))]))
+
+    return sorted(report, key=lambda x: (x['resource_last_modified'],
+                  x['resource_created']),
+                  reverse=True)
+
+
 # Custom schemas
 
 def custom_create_user_schema(form_schema=False):

diff --git a/ckanext/opendatani/templates/snippets/organization.html b/ckanext/opendatani/templates/snippets/organization.html
@@ -0,0 +1,16 @@
+{% ckan_extends %}
+
+{% block description %}
+  {{ super() }}
+  {% block report %}
+  {% set org = request.path.replace('/organization/', '') %}
+    {% if 'organization' in request.path and h.is_admin(c.user, org) and h.verify_datasets_exist(org) %}
+      {% set csv, json = h.prepare_reports(org) %}
+      <dl>
+        <h1 class="heading">{{ _('Publisher Report') }}</h1>
+        <a class="btn btn-primary" href="{{ '/publisher-reports/' + csv }}" download="{{ csv }}">CSV</a>
+        <a class="btn btn-primary" href="{{ '/publisher-reports/' + json }}" download="{{ json }}">JSON</a>
+      </dl>
+    {% endif %}
+  {% endblock %}
+{% endblock %}