Skip to content

Commit

Permalink
feat: ⚡️ add Spain IPC dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Apr 1, 2024
1 parent 6a9b4b8 commit 6e0bd58
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 21 deletions.
1 change: 1 addition & 0 deletions datadex/assets/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def hf_asset(data: pd.DataFrame, hf: HuggingFaceResource) -> None:
"wikidata_asteroids",
"threatened_animal_species",
"country_year_indicators",
"spain_ipc",
]

assets = []
Expand Down
23 changes: 2 additions & 21 deletions datadex/assets/indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,7 @@
import pandas as pd
import requests
from dagster import asset


def sanitize_string(s: str) -> str:
"""
Sanitize a string to be used as a column name in a pandas DataFrame.
"""

return (
s.lower()
.replace(" ", "_")
.replace("(", "")
.replace(")", "")
.replace("-", "_")
.replace(",", "")
.replace(":", "")
.replace("'", "")
.replace("$", "dollar")
.replace("%", "percent")
.replace("+", "plus")
)
from slugify import slugify


@asset
Expand Down Expand Up @@ -86,6 +67,6 @@ def world_bank_wdi() -> pd.DataFrame:
).reset_index()

# Clean column names
pivoted_data.columns = [sanitize_string(col) for col in pivoted_data.columns]
pivoted_data.columns = [slugify(col, separator="_") for col in pivoted_data.columns]

return pivoted_data
25 changes: 25 additions & 0 deletions datadex/assets/others.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
import requests
from dagster import AssetExecutionContext, asset
from slugify import slugify

from ..resources import IUCNRedListAPI

Expand Down Expand Up @@ -92,3 +93,27 @@ def spain_energy_demand(context: AssetExecutionContext) -> pd.DataFrame:
end_date_str = end_date.strftime("%Y-%m-%d")

return df


@asset
def spain_ipc() -> pd.DataFrame:
"""
Spain IPC data from INE. Downloaded from datos.gob.es (https://datos.gob.es/es/apidata).
"""

df = pd.read_csv("https://www.ine.es/jaxiT3/files/t/csv_bdsc/50904.csv", sep=";")

# Clean data
df["Total"] = pd.to_numeric(df["Total"].str.replace(",", "."), errors="coerce")
df["Periodo"] = pd.to_datetime(df["Periodo"].str.replace("M", "-"), format="%Y-%m")

df = df.pivot_table(
index=["Periodo", "Clases"],
columns=["Tipo de dato"],
values="Total",
aggfunc="sum",
).reset_index()

df.columns = [slugify(col, separator="_") for col in df.columns]

return df
30 changes: 30 additions & 0 deletions datadex/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,33 @@ def get_species(self, page):
r.raise_for_status()

return r.json()["result"]


class REDataAPI(ConfigurableResource):
endpoint: str = "https://apidatos.ree.es/en/datos"
first_day: str = "2014-01-01"

def query(
self,
category: str,
widget: str,
start_date: str,
end_date: str,
time_trunc: str,
):
params = f"start_date={start_date}T00:00&end_date={end_date}T00:00&time_trunc={time_trunc}"
url = f"{self.endpoint}/{category}/{widget}?{params}"
r = requests.get(url)
r.raise_for_status()

return r.json()

def get_energy_demand(self, start_date: str, end_date: str, time_trunc="hour"):
category = "demanda"
widget = "demanda-tiempo-real"
return self.query(category, widget, start_date, end_date, time_trunc)

def get_market_prices(self, start_date: str, end_date: str, time_trunc="hour"):
category = "mercados"
widget = "precios-mercados-tiempo-real"
return self.query(category, widget, start_date, end_date, time_trunc)

0 comments on commit 6e0bd58

Please sign in to comment.