diff --git a/.ci.s3cfg b/.ci.s3cfg index af99e1b6..6c22938b 100644 --- a/.ci.s3cfg +++ b/.ci.s3cfg @@ -40,6 +40,7 @@ limit = -1 limitrate = 0 list_md5 = False list_allow_unordered = False +enable_list_objects_v2 = False log_target_prefix = long_listing = False max_delete = -1 diff --git a/S3/Config.py b/S3/Config.py index 6568a16e..b6ae17c9 100644 --- a/S3/Config.py +++ b/S3/Config.py @@ -255,6 +255,11 @@ class Config(object): # This may be faster when listing very large buckets. list_allow_unordered = False # Maximum attempts of re-issuing failed requests + + # See https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html + enable_list_objects_v2 = False + + max_retries = 5 ## Creating a singleton diff --git a/S3/FileLists.py b/S3/FileLists.py index 40d49c58..246131a8 100644 --- a/S3/FileLists.py +++ b/S3/FileLists.py @@ -438,9 +438,15 @@ def _get_filelist_remote(remote_uri, recursive = True): total_size = 0 - s3 = S3(Config()) - response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), - recursive = recursive, uri_params = uri_params) + cfg = Config() + s3 = S3(cfg) + response = s3.bucket_list( + remote_uri.bucket(), + prefix=remote_uri.object(), + recursive=recursive, + uri_params=uri_params, + list_objects_v2=cfg.enable_list_objects_v2 + ) rem_base_original = rem_base = remote_uri.object() remote_uri_original = remote_uri diff --git a/S3/S3.py b/S3/S3.py index d4cac8f9..c328dd23 100644 --- a/S3/S3.py +++ b/S3/S3.py @@ -317,12 +317,43 @@ def list_all_buckets(self): response["list"] = getListFromXml(response["data"], "Bucket") return response - def bucket_list(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1): + def bucket_list( + self, + bucket, + prefix=None, + recursive=None, + uri_params=None, + limit=-1, + list_objects_v2=False + ): + if uri_params is None: + uri_params = {} + if uri_params.get("list_type") == "v2": + list_objects_v2 = True + item_list = [] prefixes = [] - for truncated, dirs, objects in self.bucket_list_streaming(bucket, prefix, recursive, uri_params, limit): - item_list.extend(objects) - prefixes.extend(dirs) + if list_objects_v2: + uri_params.update({"list_type": "v2"}) + for truncated, dirs, objects in self.bucket_list_streaming( + bucket, + prefix, + recursive, + uri_params, + limit + ): + item_list.extend(objects) + prefixes.extend(dirs) + else: + for truncated, dirs, objects in self.bucket_list_streaming( + bucket, + prefix, + recursive, + uri_params, + limit + ): + item_list.extend(objects) + prefixes.extend(dirs) response = {} response['list'] = item_list @@ -330,6 +361,104 @@ def bucket_list(self, bucket, prefix = None, recursive = None, uri_params = None response['truncated'] = truncated return response + def bucket_list_v2_streaming( + self, + bucket, + prefix=None, + recursive=None, + uri_params={}, + limit=-1, + ): + def _list_truncated(data): + # can either be "true" or "false" or be missing completely + is_truncated = getTextFromXml(data, ".//IsTruncated") or "false" + return is_truncated.lower() != "false" + + def _get_contents(data): + return getListFromXml(data, "Contents") + + def _get_common_prefixes(data): + return getListFromXml(data, "CommonPrefixes") + + def _get_next_continuation_token(data): + return getTextFromXml(data, "NextContinuationToken") + + uri_params = uri_params and uri_params.copy() or {} + truncated = True + + num_objects = 0 + num_prefixes = 0 + max_keys = limit + next_continuation_token = "" + while truncated: + if next_continuation_token: + response = self.bucket_list_v2_noparse( + bucket, + prefix, + recursive, + uri_params, + max_keys, + next_continuation_token + ) + else: + response = self.bucket_list_v2_noparse( + bucket, + prefix, + recursive, + uri_params, + max_keys + ) + current_list = _get_contents(response["data"]) + current_prefixes = _get_common_prefixes(response["data"]) + num_objects += len(current_list) + num_prefixes += len(current_prefixes) + if limit > num_objects + num_prefixes: + max_keys = limit - (num_objects + num_prefixes) + truncated = _list_truncated(response["data"]) + if truncated: + if limit == -1 or num_objects + num_prefixes < limit: + if current_list or current_prefixes: + next_continuation_token = _get_next_continuation_token( + response["data"] + ) + else: + # Unexpectedly, the server lied, and so the previous + # response was not truncated. So, no new key to get. + yield False, current_prefixes, current_list + break + else: + yield truncated, current_prefixes, current_list + break + + yield truncated, current_prefixes, current_list + + def bucket_list_v2_noparse( + self, + bucket, + prefix=None, + recursive=None, + uri_params={}, + max_keys=-1, + continuation_token=None + ): + if prefix: + uri_params['prefix'] = prefix + if not self.config.recursive and not recursive: + uri_params['delimiter'] = "/" + if max_keys != -1: + uri_params['max-keys'] = str(max_keys) + if self.config.list_allow_unordered: + uri_params['allow-unordered'] = "true" + if continuation_token: + uri_params["continuation-token"] = continuation_token + request = self.create_request( + "BUCKET_LIST", + bucket=bucket, + uri_params=uri_params + ) + response = self.send_request(request) + return response + def bucket_list_streaming(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1): """ Generator that produces , pairs of groups of content of a specified bucket. """ def _list_truncated(data): @@ -383,9 +512,7 @@ def _get_next_marker(data, current_elts, key): yield truncated, current_prefixes, current_list - def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = None, max_keys = -1): - if uri_params is None: - uri_params = {} + def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}, max_keys = -1): if prefix: uri_params['prefix'] = prefix if not self.config.recursive and not recursive: diff --git a/s3cmd b/s3cmd index 65472d56..aa3576a5 100755 --- a/s3cmd +++ b/s3cmd @@ -202,7 +202,12 @@ def subcmd_bucket_list(s3, uri, limit): if prefix.endswith('*'): prefix = prefix[:-1] try: - response = s3.bucket_list(bucket, prefix = prefix, limit = limit) + response = s3.bucket_list( + bucket, + prefix=prefix, + limit=limit, + list_objects_v2=cfg.enable_list_objects_v2 + ) except S3Error as e: if e.info["Code"] in S3.codes: error(S3.codes[e.info["Code"]] % bucket) @@ -3234,6 +3239,8 @@ def main(): optparser.add_option( "--list-allow-unordered", dest="list_allow_unordered", action="store_true", help="Not an AWS standard. Allow the listing results to be returned in unsorted order. This may be faster when listing very large buckets.") + optparser.add_option( "--enable_list_objects_v2", dest="enable_list_objects_v2", action="store_true", help="Switches list API to ListObjectsV2") + optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).") optparser.add_option( "--ws-index", dest="website_index", action="store", help="Name of index-document (only for [ws-create] command)")