Skip to content

Commit

Permalink
SlurmGCP. Fetch GCS blobs once
Browse files Browse the repository at this point in the history
  • Loading branch information
mr0re1 committed Aug 16, 2024
1 parent 9f10861 commit 6e5efae
Showing 1 changed file with 17 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -276,14 +276,11 @@ def blob_get(file):
return storage_client().get_bucket(bucket_name).blob(blob_name)


def blob_list(prefix="", delimiter=None):
@lru_cache(maxsize=1)
def blob_list() -> List[storage.Blob]:
bucket_name, path = _get_bucket_and_common_prefix()
blob_prefix = f"{path}/{prefix}"
# Note: The call returns a response only when the iterator is consumed.
blobs = storage_client().list_blobs(
bucket_name, prefix=blob_prefix, delimiter=delimiter
)
return [blob for blob in blobs]
return list(storage_client().list_blobs(bucket_name, prefix=path))


def hash_file(fullpath: Path) -> str:
Expand All @@ -301,30 +298,21 @@ def install_custom_scripts(check_hash=False):

compute_tokens = ["compute", "prolog", "epilog"]
if lookup().instance_role == "compute":
try:
compute_tokens.append(f"nodeset-{lookup().node_nodeset_name()}")
except Exception as e:
log.error(f"Failed to lookup nodeset: {e}")

prefix_tokens = dict.get(
{
"login": ["login"],
"compute": compute_tokens,
"controller": ["controller", "prolog", "epilog"],
},
lookup().instance_role,
[],
)
prefixes = [f"slurm-{tok}-script" for tok in prefix_tokens]
blobs = list(chain.from_iterable(blob_list(prefix=p) for p in prefixes))

script_pattern = re.compile(r"slurm-(?P<path>\S+)-script-(?P<name>\S+)")
for blob in blobs:
compute_tokens.append(f"nodeset-{lookup().node_nodeset_name()}")

tokens = {
"login": ["login"],
"compute": compute_tokens,
"controller": ["controller", "prolog", "epilog"],
}.get(lookup().instance_role, [])

script_pattern = re.compile(r"slurm-(?P<token>\S+)-script-(?P<name>\S+)")
for blob in blob_list():
m = script_pattern.match(Path(blob.name).name)
if not m:
log.warning(f"found blob that doesn't match expected pattern: {blob.name}")
if not m or m["token"] not in tokens:
continue
path_parts = m["path"].split("-")
# TODO: Don't use ".d", make it simpler
path_parts = m["token"].split("-")
path_parts[0] += ".d"
stem, _, ext = m["name"].rpartition("_")
filename = ".".join((stem, ext))
Expand Down Expand Up @@ -439,7 +427,7 @@ def _list_config_blobs() -> Tuple[Any, str]:
"nodeset": [],
}
hash = hashlib.md5()
blobs = list(blob_list(prefix=""))
blobs = list(blob_list())
# sort blobs so hash is consistent
for blob in sorted(blobs, key=lambda b: b.name):
if blob.name == f"{common_prefix}/config.yaml":
Expand Down

0 comments on commit 6e5efae

Please sign in to comment.