Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ListObjectV2 support #1380

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci.s3cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ limit = -1
limitrate = 0
list_md5 = False
list_allow_unordered = False
enable_list_objects_v2 = False
log_target_prefix =
long_listing = False
max_delete = -1
Expand Down
5 changes: 5 additions & 0 deletions S3/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ class Config(object):
# This may be faster when listing very large buckets.
list_allow_unordered = False
# Maximum attempts of re-issuing failed requests

# See https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
enable_list_objects_v2 = False


max_retries = 5

## Creating a singleton
Expand Down
12 changes: 9 additions & 3 deletions S3/FileLists.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,15 @@ def _get_filelist_remote(remote_uri, recursive = True):

total_size = 0

s3 = S3(Config())
response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(),
recursive = recursive, uri_params = uri_params)
cfg = Config()
s3 = S3(cfg)
response = s3.bucket_list(
remote_uri.bucket(),
prefix=remote_uri.object(),
recursive=recursive,
uri_params=uri_params,
list_objects_v2=cfg.enable_list_objects_v2
)

rem_base_original = rem_base = remote_uri.object()
remote_uri_original = remote_uri
Expand Down
141 changes: 134 additions & 7 deletions S3/S3.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,19 +317,148 @@ def list_all_buckets(self):
response["list"] = getListFromXml(response["data"], "Bucket")
return response

def bucket_list(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1):
def bucket_list(
self,
bucket,
prefix=None,
recursive=None,
uri_params=None,
limit=-1,
list_objects_v2=False
):
if uri_params is None:
uri_params = {}
if uri_params.get("list_type") == "v2":
list_objects_v2 = True

item_list = []
prefixes = []
for truncated, dirs, objects in self.bucket_list_streaming(bucket, prefix, recursive, uri_params, limit):
item_list.extend(objects)
prefixes.extend(dirs)
if list_objects_v2:
uri_params.update({"list_type": "v2"})
for truncated, dirs, objects in self.bucket_list_streaming(
bucket,
prefix,
recursive,
uri_params,
limit
):
item_list.extend(objects)
prefixes.extend(dirs)
else:
for truncated, dirs, objects in self.bucket_list_streaming(
bucket,
prefix,
recursive,
uri_params,
limit
):
item_list.extend(objects)
prefixes.extend(dirs)

response = {}
response['list'] = item_list
response['common_prefixes'] = prefixes
response['truncated'] = truncated
return response

def bucket_list_v2_streaming(
self,
bucket,
prefix=None,
recursive=None,
uri_params={},
limit=-1,
):
def _list_truncated(data):
# <IsTruncated> can either be "true" or "false" or be missing completely
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
return is_truncated.lower() != "false"

def _get_contents(data):
return getListFromXml(data, "Contents")

def _get_common_prefixes(data):
return getListFromXml(data, "CommonPrefixes")

def _get_next_continuation_token(data):
return getTextFromXml(data, "NextContinuationToken")

uri_params = uri_params and uri_params.copy() or {}
truncated = True

num_objects = 0
num_prefixes = 0
max_keys = limit
next_continuation_token = ""
while truncated:
if next_continuation_token:
response = self.bucket_list_v2_noparse(
bucket,
prefix,
recursive,
uri_params,
max_keys,
next_continuation_token
)
else:
response = self.bucket_list_v2_noparse(
bucket,
prefix,
recursive,
uri_params,
max_keys
)
current_list = _get_contents(response["data"])
current_prefixes = _get_common_prefixes(response["data"])
num_objects += len(current_list)
num_prefixes += len(current_prefixes)
if limit > num_objects + num_prefixes:
max_keys = limit - (num_objects + num_prefixes)
truncated = _list_truncated(response["data"])
if truncated:
if limit == -1 or num_objects + num_prefixes < limit:
if current_list or current_prefixes:
next_continuation_token = _get_next_continuation_token(
response["data"]
)
else:
# Unexpectedly, the server lied, and so the previous
# response was not truncated. So, no new key to get.
yield False, current_prefixes, current_list
break
else:
yield truncated, current_prefixes, current_list
break

yield truncated, current_prefixes, current_list

def bucket_list_v2_noparse(
self,
bucket,
prefix=None,
recursive=None,
uri_params={},
max_keys=-1,
continuation_token=None
):
if prefix:
uri_params['prefix'] = prefix
if not self.config.recursive and not recursive:
uri_params['delimiter'] = "/"
if max_keys != -1:
uri_params['max-keys'] = str(max_keys)
if self.config.list_allow_unordered:
uri_params['allow-unordered'] = "true"
if continuation_token:
uri_params["continuation-token"] = continuation_token
request = self.create_request(
"BUCKET_LIST",
bucket=bucket,
uri_params=uri_params
)
response = self.send_request(request)
return response

def bucket_list_streaming(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1):
""" Generator that produces <dir_list>, <object_list> pairs of groups of content of a specified bucket. """
def _list_truncated(data):
Expand Down Expand Up @@ -383,9 +512,7 @@ def _get_next_marker(data, current_elts, key):

yield truncated, current_prefixes, current_list

def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = None, max_keys = -1):
if uri_params is None:
uri_params = {}
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}, max_keys = -1):
if prefix:
uri_params['prefix'] = prefix
if not self.config.recursive and not recursive:
Expand Down
9 changes: 8 additions & 1 deletion s3cmd
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,12 @@ def subcmd_bucket_list(s3, uri, limit):
if prefix.endswith('*'):
prefix = prefix[:-1]
try:
response = s3.bucket_list(bucket, prefix = prefix, limit = limit)
response = s3.bucket_list(
bucket,
prefix=prefix,
limit=limit,
list_objects_v2=cfg.enable_list_objects_v2
)
except S3Error as e:
if e.info["Code"] in S3.codes:
error(S3.codes[e.info["Code"]] % bucket)
Expand Down Expand Up @@ -3234,6 +3239,8 @@ def main():

optparser.add_option( "--list-allow-unordered", dest="list_allow_unordered", action="store_true", help="Not an AWS standard. Allow the listing results to be returned in unsorted order. This may be faster when listing very large buckets.")

optparser.add_option( "--enable_list_objects_v2", dest="enable_list_objects_v2", action="store_true", help="Switches list API to ListObjectsV2")

optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).")

optparser.add_option( "--ws-index", dest="website_index", action="store", help="Name of index-document (only for [ws-create] command)")
Expand Down
Loading