Skip to content

Commit

Permalink
cleanup format_subject
Browse files Browse the repository at this point in the history
  • Loading branch information
aidencullo committed Feb 1, 2024
1 parent 7ac48f9 commit c256acb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
21 changes: 10 additions & 11 deletions src/sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,22 @@ def format_email(raw_email):

def format_date(date_str):
parser.parse(date_str)
# print(f'(\'{date_str}\',\'{date_datetime}\'),')
return parser.parse(date_str)


def format_subject(subject_str):
subject = decode_header(subject_str)
text_fragments = []
for data, _ in subject:
try:
text_fragments.append(data.decode())
except (UnicodeDecodeError, AttributeError):
if isinstance(data, bytes):
data = data.decode('unicode_escape')
text_fragments.append(data)
subject = ' '.join(text_fragments)
subject = decode_header(subject_str)[0][0]
if isinstance(subject, bytes):
subject = decode_bytes(subject)
return subject


def decode_bytes(subject):
try:
return subject.decode()
except UnicodeDecodeError:
return subject.decode('unicode_escape')


def format_uid(uid_str):
return int(uid_str)
5 changes: 3 additions & 2 deletions tests/sanitize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ class TestSanitize:
('Time for your appointment!','Time for your appointment!'),
('Added today: new Permanent \'Software Developer\' Jobs | Rochester Jobs & Vacancies...','Added today: new Permanent \'Software Developer\' Jobs | Rochester Jobs & Vacancies...'),
('[GitHub] Your Dependabot alerts for the week of Jan 23 - Jan 30','[GitHub] Your Dependabot alerts for the week of Jan 23 - Jan 30'),
('=?utf-8?B?SGVsbG8sIGdvbGRlbiBnb29kbmVzcyDinKg=?= ','Hello, golden goodness ✨ '),
('=?utf-8?B?SW50ZWxsaVNlYXJjaOKEoiBBbGVydCBmb3VuZCAxMzUgbmV3IGpv?==?utf-8?B?YnMsIGJhc2VkIG9uIHlvdXIgcHJvZmlsZSA=?=','IntelliSearch™ Alert found 135 new jobs, based on your profile '),
('=?utf-8?B?SGVsbG8sIGdvbGRlbiBnb29kbmVzcyDinKg=?= ','Hello, golden goodness ✨'),
('=?utf-8?B?SW50ZWxsaVNlYXJjaOKEoiBBbGVydCBmb3VuZCAxMzUgbmV3IGpv?==?utf-8?B?YnMsIGJhc2VkIG9uIHlvdXIgcHJvZmlsZSA=?=','IntelliSearch™ \
Alert found 135 new jobs, based on your profile '),
('Instantly get 3 free months by referring a friend','Instantly get 3 free months by referring a friend'),
('=?utf-8?Q?=C3=9ALTIMA=20PREVENTA=20PARA=20CAMDEN?=','ÚLTIMA PREVENTA PARA CAMDEN'),
('=?UTF-8?q?4-Unit_Vacation_Rental_in_New_York_=F0=9F=8C=87_26%_Initial_Ren?==?UTF-8?q?tal_Yield_|_Managed_by_Airbnb_Superhost?=','4-Unit Vacation Rental in New York 🌇 26% Initial Rental Yield | Managed by Airbnb Superhost'),
Expand Down

0 comments on commit c256acb

Please sign in to comment.