Skip to content

Commit

Permalink
Get a report from CKAN about publisher usage.
Browse files Browse the repository at this point in the history
CSV and JSON report on the publisher page.
  • Loading branch information
mpolidori authored and zelima committed Nov 25, 2019
1 parent 1381bd4 commit 32d4abc
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 1 deletion.
100 changes: 100 additions & 0 deletions ckanext/opendatani/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@
import ckan.logic as logic
import ckan.lib.helpers as h

import logging
from ckan.plugins import toolkit
from ckan.common import config
import csv
import json
import os

log = logging.getLogger(__name__)


def get_user_name(user):
if not isinstance(user, model.User):
user_name = text_type(user)
Expand Down Expand Up @@ -105,3 +115,93 @@ def activity_list_to_text(activity_stream):
'is_new': activity.get('is_new', False),
'dataset_url': get_dataset_url(activity)})
return activity_list


def _get_action(action, context_dict, data_dict):
return toolkit.get_action(action)(context_dict, data_dict)


def is_admin(user, org):
"""
Returns True if user is site admin or admin of the organization,
and the given organization exists.
:param user: user name
:type user: string
:param org: organization name
:type org: string
:returns: True/False
:rtype: boolean
"""

user_orgs = _get_action(
'organization_list_for_user',
{'user': user}, {'user': user})

return any(
[(i.get('capacity') == 'admin' or i.get('sysadmin'))
and i.get('name') == org for i in user_orgs])


def verify_datasets_exist(org):
"""
Returns True if the number of datasets (including private) for a given
organization is greater than 0.
:param org: organization name
:type org: string
:returns: dataset count
:rtype: integer
"""

return toolkit.get_action('package_search')({}, {
'q': 'organization:{0}'.format(org),
'include_private': True}).get('count') > 0


def prepare_reports(org):
"""
Creates a CSV and JSON publisher report, and stores them under CKAN's
storage path in /storage/publisher-reports/.
:param org: organization
:type org: string
:return: a list containing the file_names of the created archives
:rtype: list
"""

resource = toolkit.get_action(
'report_resources_by_organization')({}, {'org_name': org})
file_names = []
storage_path = config.get('ckan.storage_path')
file_path = storage_path + '/storage/publisher-reports/'

if not os.path.exists(file_path):
os.makedirs(file_path)

for file_type in ['.csv', '.json']:
try:
file_name = 'publisher-report-' + org + file_type

if file_type == '.csv':
with open(file_path + file_name, 'w') as csvfile:
fields = resource[0].keys()
writer = csv.DictWriter(csvfile, fieldnames=fields,
quoting=csv.QUOTE_MINIMAL)
writer.writeheader()

for data in resource:
writer.writerow(data)

file_names.append(file_name)

if file_type == '.json':
with open(file_path + file_name, 'w') as jsonfile:
jsonfile.writelines(json.dumps(resource))

file_names.append(file_name)

except Exception as ex:
log.error(
'An error occured while preparing the {0} archive. Error: {1}'
.format(file_type, ex))
raise

return file_names
78 changes: 77 additions & 1 deletion ckanext/opendatani/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
from ckanext.opendatani.controller import CustomUserController
from ckanext.opendatani import helpers

from ckan.common import OrderedDict
import logging

log = logging.getLogger(__name__)

_ = toolkit._


Expand Down Expand Up @@ -60,6 +65,9 @@ def get_helpers(self):
'group_list': group_list,
'package_list': package_list,
'ni_activity_list_to_text': helpers.activity_list_to_text,
'verify_datasets_exist': helpers.verify_datasets_exist,
'is_admin': helpers.is_admin,
'prepare_reports': helpers.prepare_reports,
}

# IRoutes
Expand Down Expand Up @@ -92,7 +100,6 @@ def before_map(self, map):
m.connect('/dataset/{id}/resource/{resource_id}',
action='resource_read')


return map

def after_map(self, map):
Expand All @@ -111,6 +118,7 @@ def get_actions(self):
return {
'user_create': custom_user_create,
'user_update': custom_user_update,
'report_resources_by_organization': report_resources_by_organization
}


Expand Down Expand Up @@ -145,6 +153,74 @@ def custom_user_update(context, data_dict):
return core_user_update(context, data_dict)


@toolkit.side_effect_free
def report_resources_by_organization(context, data_dict):
"""
Returns a list of OrderedDicts (one for each dataset in an organization)
sorted by the last modified date, then creation date
(if no modifications have been made yet).
Each OrderedDict contains the following keys:
dataset_name, dataset_url, resource_name, resource_url,
dataset_organisation, dataset_organisation_url, resource_created,
resource_last_modified, resource_view_count, resource_download_count
:return: a sorted list of OrderedDicts
:rtype: list
"""

user = toolkit.c.user or context.get('name')
org = data_dict.get('org_name') or context.get('org')
report = []

if not helpers.verify_datasets_exist(org):
return report

if 'org_name' in data_dict:
del data_dict['org_name']

if not helpers.is_admin(user, org):
toolkit.abort(403, _('You are not authorized to access this \
report or the organization does not exist.'))

data_dict['include_private'] = True
data_dict['q'] = 'organization:{0}'.format(org)
results = toolkit.get_action('package_search')({}, data_dict)

for item in results['results']:
resources = item['resources']
organization = item['organization']

for resource in resources:

# resource_view_count depends on tracking_summary, which
# doesn't seem to be enabled. Once it's enabled,
# resource_view_count will come from
# resource.get('tracking_summary').get('total')
# For now, there's a shortened version to avoid errors.

# resource_download_count will also need to be looked into
# when tracking_summary is enabled.

report.append(OrderedDict([
('dataset_name', item.get('title')),
('dataset_url', (
config.get('ckan.site_url') + '/dataset/{0}'
.format(item.get('name')))),
('resource_name', resource.get('name')),
('resource_url', resource.get('url')),
('dataset_organization', organization.get('name')),
('dataset_organization_url', (
config.get('ckan.site_url') + '/organization/{0}'
.format(organization.get('name')))),
('resource_created', resource.get('created')),
('resource_last_modified', resource.get('last_modified')),
('resource_view_count', resource.get('tracking_summary', 0)),
('resource_download_count', resource.get('downloads', 0))]))

return sorted(report, key=lambda x: (x['resource_last_modified'],
x['resource_created']),
reverse=True)


# Custom schemas

def custom_create_user_schema(form_schema=False):
Expand Down
16 changes: 16 additions & 0 deletions ckanext/opendatani/templates/snippets/organization.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% ckan_extends %}

{% block description %}
{{ super() }}
{% block report %}
{% set org = request.path.replace('/organization/', '') %}
{% if 'organization' in request.path and h.is_admin(c.user, org) and h.verify_datasets_exist(org) %}
{% set csv, json = h.prepare_reports(org) %}
<dl>
<h1 class="heading">{{ _('Publisher Report') }}</h1>
<a class="btn btn-primary" href="{{ '/publisher-reports/' + csv }}" download="{{ csv }}">CSV</a>
<a class="btn btn-primary" href="{{ '/publisher-reports/' + json }}" download="{{ json }}">JSON</a>
</dl>
{% endif %}
{% endblock %}
{% endblock %}

0 comments on commit 32d4abc

Please sign in to comment.