-
Notifications
You must be signed in to change notification settings - Fork 37
/
gold_prices_flow.py
99 lines (88 loc) · 3.1 KB
/
gold_prices_flow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
from dataflows import Flow, PackageWrapper, validate, delete_fields
from dataflows import add_metadata, load, set_type, update_resource
def readme(fpath='README.md'):
if os.path.exists(fpath):
return open(fpath).read()
def extract_december_rows(rows):
for row in rows:
if '-12' in row['Date']:
yield row
gold_price_flow = Flow(
add_metadata(
name="gold-prices",
title="Gold Prices",
homepage='http://www.bundesbank.de',
licenses=[
{
"id": "odc-pddl",
"name": "public_domain_dedication_and_license",
"version": "1.0",
"url": "http://opendatacommons.org/licenses/pddl/1.0/"
}
],
sources=[
{
"name": "bundesbank-gold-prices",
"path": "'http://www.bundesbank.de/cae/servlet/StatisticDownload?tsId=BBEX3.M.XAU.USD.EA.AC.C06&its_csvFormat=en&its_fileFormat=csv&mode=its'",
"title": "Bundesbank gold prices"
}
],
views=[
{
"name": "graph",
"title": "Gold Prices (Monthly in USD)",
"specType": "simple",
"spec": {
"type": "lines-and-points",
"group": "Date",
"series": [
"Price"
]
}
}
],
related=[
{
"title": "Oil prices",
"path": "/core/oil-prices",
"publisher": "core",
"formats": ["CSV", "JSON"]
},
{
"title": "Natural gas",
"path": "/core/natural-gas",
"publisher": "core",
"formats": ["CSV", "JSON"]
}
],
version="0.2.0"
),
load(
load_source='http://www.bundesbank.de/cae/servlet/StatisticDownload?tsId=BBEX3.M.XAU.USD.EA.AC.C06&its_csvFormat=en&its_fileFormat=csv&mode=its',
skip_rows=[1, 2, 3, 4, 5, -1],
headers=['Date', 'Price', 'Empty column'],
format='csv',
name='annual'
),
extract_december_rows,
load(
load_source='http://www.bundesbank.de/cae/servlet/StatisticDownload?tsId=BBEX3.M.XAU.USD.EA.AC.C06&its_csvFormat=en&its_fileFormat=csv&mode=its',
skip_rows=[1, 2, 3, 4, 5, -1],
headers=['Date', 'Price', 'Empty column'],
format='csv',
name='monthly'
),
update_resource('monthly', **{'path':'data/monthly.csv', 'dpp:streaming': True}),
update_resource('annual', **{'path':'data/annual.csv', 'dpp:streaming': True}),
set_type('Date', resources='annual', type='yearmonth'),
set_type('Price', resources='annual', type='number'),
set_type('Date', resources='monthly', type='yearmonth'),
set_type('Price', resources='monthly', type='number'),
validate(),
delete_fields(['Empty column'], resources=None)
)
def flow(parameters, datapackage, resources, stats):
return gold_price_flow
if __name__ == '__main__':
gold_price_flow.process()