diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d0fe341..c75e56f 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -11,10 +11,10 @@ "files.associations": { "*.sql": "jinja-sql" }, - "[jinja-sql]": { - "editor.defaultFormatter": "innoverio.vscode-dbt-power-user", - "editor.formatOnSave": true - }, + // "[jinja-sql]": { + // "editor.defaultFormatter": "innoverio.vscode-dbt-power-user", + // "editor.formatOnSave": true + // }, "[python]": { "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, @@ -30,16 +30,6 @@ }, "python.analysis.typeCheckingMode": "basic", "python.analysis.autoImportCompletions": true, - "sqltools.useNodeRuntime": true, - "sqltools.connections": [ - { - "name": "DuckDB", - "accessMode": "Read Only", - "previewLimit": 50, - "driver": "DuckDB", - "databaseFilePath": "data/local.duckdb" - } - ] }, "extensions": [ "charliermarsh.ruff", diff --git a/datadex/__init__.py b/datadex/__init__.py index 5c5db21..f8c2334 100644 --- a/datadex/__init__.py +++ b/datadex/__init__.py @@ -4,7 +4,7 @@ from dagster_dbt import DbtCliResource, load_assets_from_dbt_project from dagster_duckdb_pandas import DuckDBPandasIOManager -from .assets import energy, huggingface +from .assets import others, indicators, huggingface from .resources import IUCNRedListAPI, HuggingFaceResource DBT_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../dbt/" @@ -13,7 +13,7 @@ dbt = DbtCliResource(project_dir=DBT_PROJECT_DIR, profiles_dir=DBT_PROJECT_DIR) dbt_assets = load_assets_from_dbt_project(DBT_PROJECT_DIR, DBT_PROJECT_DIR) -all_assets = load_assets_from_modules([energy, huggingface]) +all_assets = load_assets_from_modules([indicators, huggingface, others]) resources = { "hf": HuggingFaceResource(token=EnvVar("HUGGINGFACE_TOKEN")), diff --git a/datadex/assets/huggingface.py b/datadex/assets/huggingface.py index bc37b42..27c83b8 100644 --- a/datadex/assets/huggingface.py +++ b/datadex/assets/huggingface.py @@ -1,5 +1,5 @@ import pandas as pd -from dagster import asset, AssetIn +from dagster import AssetIn, asset from ..resources import HuggingFaceResource @@ -16,13 +16,10 @@ def hf_asset(data: pd.DataFrame, hf: HuggingFaceResource) -> None: datasets = [ - "co2_global_trend", "spain_energy_demand", - "owid_energy_data", - "owid_co2_data", "wikidata_asteroids", "threatened_animal_species", - "climate", + "country_year_indicators", ] assets = [] diff --git a/datadex/assets/indicators.py b/datadex/assets/indicators.py new file mode 100644 index 0000000..4a0a04e --- /dev/null +++ b/datadex/assets/indicators.py @@ -0,0 +1,24 @@ +import pandas as pd +from dagster import asset + + +@asset +def owid_energy_data() -> pd.DataFrame: + """ + Raw Energy data from Our World in Data. + """ + energy_owid_url = ( + "https://raw.githubusercontent.com/owid/energy-data/master/owid-energy-data.csv" + ) + return pd.read_csv(energy_owid_url) + + +@asset +def owid_co2_data() -> pd.DataFrame: + """ + Raw CO2 data from Our World in Data. + """ + co2_owid_url = ( + "https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv" + ) + return pd.read_csv(co2_owid_url) diff --git a/datadex/assets/energy.py b/datadex/assets/others.py similarity index 77% rename from datadex/assets/energy.py rename to datadex/assets/others.py index 8c2c84d..4575ba8 100644 --- a/datadex/assets/energy.py +++ b/datadex/assets/others.py @@ -24,37 +24,6 @@ def threatened_animal_species(iucn_redlist_api: IUCNRedListAPI) -> pd.DataFrame: ) -@asset -def owid_energy_data() -> pd.DataFrame: - """ - Raw Energy data from Our World in Data. - """ - energy_owid_url = ( - "https://raw.githubusercontent.com/owid/energy-data/master/owid-energy-data.csv" - ) - return pd.read_csv(energy_owid_url) - - -@asset -def owid_co2_data() -> pd.DataFrame: - """ - Raw CO2 data from Our World in Data. - """ - co2_owid_url = ( - "https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv" - ) - return pd.read_csv(co2_owid_url) - - -@asset -def co2_global_trend() -> pd.DataFrame: - """ - Trends in Atmospheric Carbon Dioxide from NOAA/ESRL. - """ - co2_noaa_url = "https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_trend_gl.csv" - return pd.read_csv(co2_noaa_url, skiprows=24) - - @asset def wikidata_asteroids() -> pd.DataFrame: """ diff --git a/dbt/models/climate.sql b/dbt/models/climate.sql deleted file mode 100644 index 87aff6e..0000000 --- a/dbt/models/climate.sql +++ /dev/null @@ -1,18 +0,0 @@ -with - energy_data as ( - select year, sum(solar_electricity) as solar_electricity - from {{ source("main", "owid_energy_data") }} - where iso_code is not null and solar_electricity is not null and year >= 2014 - group by year - ), - co2_global_trend as ( - select year, avg(trend) as co2_trend - from {{ source("main", "co2_global_trend") }} - group by year - ) -select - energy_data.year, - energy_data.solar_electricity, - co2_global_trend.co2_trend as co2_trend -from energy_data -left join co2_global_trend on energy_data.year = co2_global_trend.year diff --git a/dbt/models/country_year_indicators.sql b/dbt/models/country_year_indicators.sql new file mode 100644 index 0000000..5969ee7 --- /dev/null +++ b/dbt/models/country_year_indicators.sql @@ -0,0 +1,222 @@ +with + owid_energy_data as ( + select + country, + year, + iso_code, + population, + gdp, + biofuel_cons_change_pct, + biofuel_cons_change_twh, + biofuel_cons_per_capita, + biofuel_consumption, + biofuel_elec_per_capita, + biofuel_electricity, + biofuel_share_elec, + biofuel_share_energy, + carbon_intensity_elec, + coal_cons_change_pct, + coal_cons_change_twh, + coal_cons_per_capita, + coal_consumption, + coal_elec_per_capita, + coal_electricity, + coal_prod_change_pct, + coal_prod_change_twh, + coal_prod_per_capita, + coal_production, + coal_share_elec, + coal_share_energy, + electricity_demand, + electricity_generation, + electricity_share_energy, + energy_cons_change_pct, + energy_cons_change_twh, + energy_per_capita, + energy_per_gdp, + fossil_cons_change_pct, + fossil_cons_change_twh, + fossil_elec_per_capita, + fossil_electricity, + fossil_energy_per_capita, + fossil_fuel_consumption, + fossil_share_elec, + fossil_share_energy, + gas_cons_change_pct, + gas_cons_change_twh, + gas_consumption, + gas_elec_per_capita, + gas_electricity, + gas_energy_per_capita, + gas_prod_change_pct, + gas_prod_change_twh, + gas_prod_per_capita, + gas_production, + gas_share_elec, + gas_share_energy, + greenhouse_gas_emissions, + hydro_cons_change_pct, + hydro_cons_change_twh, + hydro_consumption, + hydro_elec_per_capita, + hydro_electricity, + hydro_energy_per_capita, + hydro_share_elec, + hydro_share_energy, + low_carbon_cons_change_pct, + low_carbon_cons_change_twh, + low_carbon_consumption, + low_carbon_elec_per_capita, + low_carbon_electricity, + low_carbon_energy_per_capita, + low_carbon_share_elec, + low_carbon_share_energy, + net_elec_imports, + net_elec_imports_share_demand, + nuclear_cons_change_pct, + nuclear_cons_change_twh, + nuclear_consumption, + nuclear_elec_per_capita, + nuclear_electricity, + nuclear_energy_per_capita, + nuclear_share_elec, + nuclear_share_energy, + oil_cons_change_pct, + oil_cons_change_twh, + oil_consumption, + oil_elec_per_capita, + oil_electricity, + oil_energy_per_capita, + oil_prod_change_pct, + oil_prod_change_twh, + oil_prod_per_capita, + oil_production, + oil_share_elec, + oil_share_energy, + other_renewable_consumption, + other_renewable_electricity, + other_renewable_exc_biofuel_electricity, + other_renewables_cons_change_pct, + other_renewables_cons_change_twh, + other_renewables_elec_per_capita, + other_renewables_elec_per_capita_exc_biofuel, + other_renewables_energy_per_capita, + other_renewables_share_elec, + other_renewables_share_elec_exc_biofuel, + other_renewables_share_energy, + per_capita_electricity, + primary_energy_consumption, + renewables_cons_change_pct, + renewables_cons_change_twh, + renewables_consumption, + renewables_elec_per_capita, + renewables_electricity, + renewables_energy_per_capita, + renewables_share_elec, + renewables_share_energy, + solar_cons_change_pct, + solar_cons_change_twh, + solar_consumption, + solar_elec_per_capita, + solar_electricity, + solar_energy_per_capita, + solar_share_elec, + solar_share_energy, + wind_cons_change_pct, + wind_cons_change_twh, + wind_consumption, + wind_elec_per_capita, + wind_electricity, + wind_energy_per_capita, + wind_share_elec, + wind_share_energy + from {{ source("main", "owid_energy_data") }} + ), + + owid_co2_data as ( + select + country, + year, + iso_code, + population, + gdp, + cement_co2, + cement_co2_per_capita, + co2, + co2_growth_abs, + co2_growth_prct, + co2_including_luc, + co2_including_luc_growth_abs, + co2_including_luc_growth_prct, + co2_including_luc_per_capita, + co2_including_luc_per_gdp, + co2_including_luc_per_unit_energy, + co2_per_capita, + co2_per_gdp, + co2_per_unit_energy, + coal_co2, + coal_co2_per_capita, + consumption_co2, + consumption_co2_per_capita, + consumption_co2_per_gdp, + cumulative_cement_co2, + cumulative_co2, + cumulative_co2_including_luc, + cumulative_coal_co2, + cumulative_flaring_co2, + cumulative_gas_co2, + cumulative_luc_co2, + cumulative_oil_co2, + cumulative_other_co2, + energy_per_capita, + energy_per_gdp, + flaring_co2, + flaring_co2_per_capita, + gas_co2, + gas_co2_per_capita, + ghg_excluding_lucf_per_capita, + ghg_per_capita, + land_use_change_co2, + land_use_change_co2_per_capita, + methane, + methane_per_capita, + nitrous_oxide, + nitrous_oxide_per_capita, + oil_co2, + oil_co2_per_capita, + other_co2_per_capita, + other_industry_co2, + primary_energy_consumption, + share_global_cement_co2, + share_global_co2, + share_global_co2_including_luc, + share_global_coal_co2, + share_global_cumulative_cement_co2, + share_global_cumulative_co2, + share_global_cumulative_co2_including_luc, + share_global_cumulative_coal_co2, + share_global_cumulative_flaring_co2, + share_global_cumulative_gas_co2, + share_global_cumulative_luc_co2, + share_global_cumulative_oil_co2, + share_global_cumulative_other_co2, + share_global_flaring_co2, + share_global_gas_co2, + share_global_luc_co2, + share_global_oil_co2, + share_global_other_co2, + share_of_temperature_change_from_ghg, + temperature_change_from_ch4, + temperature_change_from_co2, + temperature_change_from_ghg, + temperature_change_from_n2o, + total_ghg, + total_ghg_excluding_lucf, + trade_co2, + trade_co2_share + from {{ source("main", "owid_co2_data") }} + ) + +select e.*, c.* +from owid_energy_data as e +join owid_co2_data as c on e.iso_code = c.iso_code and e.year = c.year diff --git a/dbt/models/sources.yml b/dbt/models/sources.yml index 802962b..d0dfabb 100644 --- a/dbt/models/sources.yml +++ b/dbt/models/sources.yml @@ -3,10 +3,6 @@ version: 2 sources: - name: main tables: - - name: threatened_animal_species - meta: - dagster: - asset_key: ["threatened_animal_species"] - name: owid_co2_data meta: dagster: @@ -15,7 +11,3 @@ sources: meta: dagster: asset_key: ["owid_energy_data"] - - name: co2_global_trend - meta: - dagster: - asset_key: ["co2_global_trend"]