Skip to content

Commit

Permalink
Merge pull request #70 from galaxyproject/hunter/67-update-genomes-list
Browse files Browse the repository at this point in the history
feat: update organisms list (#67)
  • Loading branch information
NoopDog committed Sep 18, 2024
2 parents 220a816 + a5d12ff commit 04c7f94
Show file tree
Hide file tree
Showing 3 changed files with 1,068 additions and 5 deletions.
7 changes: 7 additions & 0 deletions files/build-genomes-files.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ def get_duplicate_ids(genomes_df):
counts = genomes_df["Genome Version/Assembly ID"].value_counts()
return list(counts.index.to_series().loc[counts > 1])

def get_num_unmatched_assemblies(assemblies_df, result_df):
return len(set(assemblies_df["asmId"]) - set(result_df["asmId"]))

def build_genomes_files():
print("Building files")

Expand All @@ -27,6 +30,10 @@ def build_genomes_files():

result_df = gen_bank_merge_df.combine_first(ref_seq_merge_df).dropna(subset=["ucscBrowser"])

num_unmatched_assemblies = get_num_unmatched_assemblies(assemblies_df, result_df)
if (num_unmatched_assemblies != 0):
print(f"{num_unmatched_assemblies} assemblies had no matches and are omitted")

result_df.to_csv(OUTPUT_PATH, index=False, sep="\t")

print(f"Wrote to {OUTPUT_PATH}")
Expand Down
Loading

0 comments on commit 04c7f94

Please sign in to comment.