Skip to content

Commit

Permalink
Lint and format
Browse files Browse the repository at this point in the history
  • Loading branch information
evamaxfield committed Feb 23, 2024
1 parent bf47527 commit dbacea6
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 24 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ repos:
- id: mypy
additional_dependencies:
- "types-requests"
- "types-python-dateutil"

# Notebooks
# - repo: https://github.com/nbQA-dev/nbQA
Expand Down
15 changes: 9 additions & 6 deletions award_pynder/sources/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Data sources module for the award_pynder package."""

from abc import ABC, abstractmethod
from datetime import datetime
from typing import TYPE_CHECKING, Literal

from dateutil.parser import parse as dateutil_parse
Expand All @@ -13,9 +12,10 @@

###############################################################################


class DatasetFields:
institution = "institution"
pi= "pi"
pi = "pi"
year = "year"
start = "start"
end = "end"
Expand All @@ -27,13 +27,16 @@ class DatasetFields:
query = "query"
source = "source"


ALL_DATASET_FIELDS = [
field_value for field_name, field_value in vars(DatasetFields).items()
field_value
for field_name, field_value in vars(DatasetFields).items()
if not field_name.startswith("_")
]

###############################################################################


class DataSource(ABC):
"""Abstract base class for data sources."""

Expand All @@ -43,7 +46,7 @@ def _parse_datetime(dt: str | datetime) -> datetime:
return dateutil_parse(dt)

return dt

@staticmethod
def _format_date_for_pynder_standard(
dt: str,
Expand All @@ -53,7 +56,7 @@ def _format_date_for_pynder_standard(
return str(dateutil_parse(dt).year)

return dateutil_parse(dt).date().isoformat()

@staticmethod
@abstractmethod
def _format_dataframe(data: "pd.DataFrame") -> "pd.DataFrame":
Expand All @@ -64,4 +67,4 @@ def _format_dataframe(data: "pd.DataFrame") -> "pd.DataFrame":
@abstractmethod
def get_data() -> "pd.DataFrame":
"""Get data from the source."""
raise NotImplementedError()
raise NotImplementedError()
26 changes: 14 additions & 12 deletions award_pynder/sources/nsf.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/usr/bin/env python

from __future__ import annotations
from datetime import datetime

from .base import DataSource, DatasetFields, ALL_DATASET_FIELDS
import logging
from datetime import datetime

import requests
import pandas as pd
import requests
from tqdm import tqdm

import logging
from .base import ALL_DATASET_FIELDS, DatasetFields, DataSource

###############################################################################

Expand Down Expand Up @@ -46,6 +46,7 @@
###############################################################################
# LUTs


class NSFPrograms:
Biological_Sciences = "BIO"
Computer_and_Information_Science_and_Engineering = "CISE"
Expand All @@ -58,6 +59,7 @@ class NSFPrograms:
Social_Behavioral_and_Economic_Sciences = "SBE"
Technology_Innovation_and_Partnerships = "TIP"


CFDA_NUMBER_TO_NSF_PROGRAM_NAME_LUT = {
"47.041": NSFPrograms.Engineering,
"47.049": NSFPrograms.Mathematical_and_Physical_Sciences,
Expand All @@ -77,14 +79,15 @@ class NSFPrograms:

###############################################################################


class NSF(DataSource):
"""Data source for the National Science Foundation."""
"""Data source for the National Science Foundation."""

@staticmethod
def _format_datetime(dt: str | datetime) -> str:
"""Parse datetime string or datetime and return NSF support datetime string."""
return DataSource._parse_datetime(dt).strftime("%m/%d/%Y")

@staticmethod
def _format_query(
query: str | None,
Expand Down Expand Up @@ -118,7 +121,7 @@ def _format_query(
api_str += f"&keyword={query}"

return api_str

@staticmethod
def _format_dataframe(
data: pd.DataFrame,
Expand Down Expand Up @@ -161,7 +164,7 @@ def _format_dataframe(

# Create new dataframe with only the columns we want
return data[ALL_DATASET_FIELDS]

@staticmethod
def _get_chunk(
query: str | None = None,
Expand Down Expand Up @@ -206,7 +209,7 @@ def _get_chunk(
f"Error while fetching NSF data: {e}; "
f"'raise_on_error' is False, ignoring..."
)

# Default return but make this strict
return None

Expand All @@ -215,11 +218,10 @@ def get_data(
query: str | None = None,
from_datetime: str | datetime | None = None,
to_datetime: str | datetime | None = None,
cfda_number: str | int | None = None,
cfda_number: str | None = None,
project_outcomes_required: bool = False,
raise_on_error: bool = True,
tqdm_kwargs: dict | None = None,
**kwargs,
) -> pd.DataFrame:
"""Get data from the National Science Foundation."""
# Continuously get chunks of data
Expand Down Expand Up @@ -252,4 +254,4 @@ def get_data(
pd.concat(chunks, ignore_index=True)
.drop_duplicates(subset="id")
.reset_index(drop=True)
)
)
2 changes: 1 addition & 1 deletion award_pynder/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"""Tests package for award_pynder."""
"""Tests package for award_pynder."""
2 changes: 1 addition & 1 deletion award_pynder/tests/sources/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"""Sources sub-moduble for the tests package for award_pynder."""
"""Sources sub-moduble for the tests package for award_pynder."""
7 changes: 4 additions & 3 deletions award_pynder/tests/sources/test_nsf.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/usr/bin/env python

from ..utils import assert_dataset_basics
from award_pynder.sources.nsf import NSF, NSF_PROGRAM_TO_CFDA_NUMBER_LUT, NSFPrograms

from award_pynder.sources.nsf import NSF, NSFPrograms, NSF_PROGRAM_TO_CFDA_NUMBER_LUT
from ..utils import assert_dataset_basics

###############################################################################


def test_nsf() -> None:
# Get data
data = NSF.get_data(
Expand All @@ -16,4 +17,4 @@ def test_nsf() -> None:
)

# Run tests
assert_dataset_basics(data)
assert_dataset_basics(data)
4 changes: 3 additions & 1 deletion award_pynder/tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

from __future__ import annotations

from typing import TYPE_CHECKING

from award_pynder.sources.base import ALL_DATASET_FIELDS
Expand All @@ -10,6 +11,7 @@

###############################################################################


def assert_dataset_basics(df: pd.DataFrame) -> None:
# Assert that not only are all required fields present,
# but that no extraneous fields are present as well
Expand All @@ -22,4 +24,4 @@ def assert_dataset_basics(df: pd.DataFrame) -> None:
assert df.id.nunique() == len(df)

# Assert that there is at least some data
assert len(df) > 0
assert len(df) > 0

0 comments on commit dbacea6

Please sign in to comment.