From 32fdf11de5a0fa09cf88accac60ead2b3ac72088 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 3 Aug 2022 23:47:55 -0700 Subject: [PATCH 1/5] Shared namespaces --- dill/_dill.py | 22 ++++++++++++++++------ dill/tests/_globals_dummy.py | 10 ++++++++++ dill/tests/test_functions.py | 20 ++++++++++++++++++++ 3 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 dill/tests/_globals_dummy.py diff --git a/dill/_dill.py b/dill/_dill.py index be0aeba2..b81f62a1 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -366,6 +366,7 @@ def __init__(self, file, *args, **kwds): self._recurse = settings['recurse'] if _recurse is None else _recurse self._postproc = OrderedDict() self._file = file + self._globals_cache = {} def dump(self, obj): #NOTE: if settings change, need to update attributes # register if the object is a numpy ufunc @@ -1792,17 +1793,14 @@ def save_function(pickler, obj): _postproc = getattr(pickler, '_postproc', None) _main_modified = getattr(pickler, '_main_modified', None) _original_main = getattr(pickler, '_original_main', __builtin__)#'None' + _globals_cache = getattr(pickler, '_globals_cache', None) postproc_list = [] + + globs = None if _recurse: # recurse to get all globals referred to by obj from .detect import globalvars globs_copy = globalvars(obj, recurse=True, builtin=True) - - # Add the name of the module to the globs dictionary to prevent - # the duplication of the dictionary. Pickle the unpopulated - # globals dictionary and set the remaining items after the function - # is created to correctly handle recursion. - globs = {'__name__': obj.__module__} else: globs_copy = obj.__globals__ @@ -1815,6 +1813,18 @@ def save_function(pickler, obj): elif globs_copy is not None and obj.__module__ is not None and \ getattr(_import_module(obj.__module__, True), '__dict__', None) is globs_copy: globs = globs_copy + + if globs is None: + # Add the name of the module to the globs dictionary and prevent + # the duplication of the dictionary. Pickle the unpopulated + # globals dictionary and set the remaining items after the function + # is created to correctly handle recursion. + if _globals_cache is not None and obj.__globals__ is not None: + if id(obj.__globals__) not in _globals_cache: + globs = {'__name__': obj.__module__} + _globals_cache[id(obj.__globals__)] = globs + else: + globs = _globals_cache[id(obj.__globals__)] else: globs = {'__name__': obj.__module__} diff --git a/dill/tests/_globals_dummy.py b/dill/tests/_globals_dummy.py new file mode 100644 index 00000000..128a1ddc --- /dev/null +++ b/dill/tests/_globals_dummy.py @@ -0,0 +1,10 @@ +# This file is used by test_shared_globals in test_functions.py + +x = 3 + +def h(): + print(x) + +def g(): + h() + diff --git a/dill/tests/test_functions.py b/dill/tests/test_functions.py index d8c73396..36a0e052 100644 --- a/dill/tests/test_functions.py +++ b/dill/tests/test_functions.py @@ -132,7 +132,27 @@ def test_code_object(): except Exception as error: raise Exception("failed to construct code object with format version {}".format(version)) from error +def test_shared_globals(): + import dill, _globals_dummy as f, sys + + for recurse in False, True: + g, h = dill.copy((f.g, f.h), recurse=recurse) + assert f.g.__globals__ is f.h.__globals__ + assert g.__globals__ is h.__globals__ + assert f.g.__globals__ is g.__globals__ + + del sys.modules['_globals_dummy'] + + g, h = dill.copy((f.g, f.h), recurse=recurse) + assert f.g.__globals__ is f.h.__globals__ + assert g.__globals__ is h.__globals__ + assert f.g.__globals__ is not g.__globals__ + + sys.modules['_globals_dummy'] = f + + if __name__ == '__main__': test_functions() test_issue_510() test_code_object() + test_shared_globals() From 7fe9bb55d48fde49e296d8d0f1aecc48a3ff1387 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 4 Aug 2022 19:53:27 -0700 Subject: [PATCH 2/5] Fix a minor bug --- dill/_dill.py | 42 ++++++++++++++++++++++++++++++------ dill/tests/_globals_dummy.py | 4 ++-- dill/tests/test_functions.py | 8 ++++++- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index b81f62a1..955bff1e 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -199,7 +199,7 @@ def get_file_type(*args, **kwargs): import dataclasses import typing -from pickle import GLOBAL +from pickle import GLOBAL, EMPTY_DICT, MARK, DICT, SETITEM ### Shims for different versions of Python and dill @@ -1184,12 +1184,13 @@ def _repr_dict(obj): @register(dict) def save_module_dict(pickler, obj): - if is_dill(pickler, child=False) and obj == pickler._main.__dict__ and \ + _is_dill = is_dill(pickler, child=False) + if _is_dill and obj == pickler._main.__dict__ and \ not (pickler._session and pickler._first_pass): logger.trace(pickler, "D1: %s", _repr_dict(obj)) # obj pickler.write(bytes('c__builtin__\n__main__\n', 'UTF-8')) logger.trace(pickler, "# D1") - elif (not is_dill(pickler, child=False)) and (obj == _main_module.__dict__): + elif (not _is_dill) and (obj == _main_module.__dict__): logger.trace(pickler, "D3: %s", _repr_dict(obj)) # obj pickler.write(bytes('c__main__\n__dict__\n', 'UTF-8')) #XXX: works in general? logger.trace(pickler, "# D3") @@ -1199,12 +1200,37 @@ def save_module_dict(pickler, obj): logger.trace(pickler, "D4: %s", _repr_dict(obj)) # obj pickler.write(bytes('c%s\n__dict__\n' % obj['__name__'], 'UTF-8')) logger.trace(pickler, "# D4") + elif _is_dill and id(obj) in pickler._globals_cache: + logger.trace(pickler, "D5: %s", _repr_dict(obj)) # obj + # This is a globals dictionary that was partially copied, but not fully saved. + # Save the dictionary again to ensure that everything is there. + globs_copy = pickler._globals_cache[id(obj)] + pickler.write(pickler.get(pickler.memo[id(globs_copy)][0])) + pickler._batch_setitems(iter(obj.items())) + del pickler._globals_cache[id(obj)] + pickler.memo[id(obj)] = (pickler.memo.pop(id(globs_copy))[0], obj) + logger.trace(pickler, "# D5") else: logger.trace(pickler, "D2: %s", _repr_dict(obj)) # obj - if is_dill(pickler, child=False) and pickler._session: + if _is_dill and pickler._session: # we only care about session the first pass thru pickler._first_pass = False - StockPickler.save_dict(pickler, obj) + + # IMPORTANT: update the following code whenever save_dict is changed in pickle.py + # StockPickler.save_dict(pickler, obj) + if pickler.bin: + pickler.write(EMPTY_DICT) + else: # proto 0 -- can't use EMPTY_DICT + pickler.write(MARK + DICT) + + pickler.memoize(obj) + # add __name__ first + if '__name__' in obj: + pickler.save('__name__') + pickler.save(obj['__name__']) + pickler.write(SETITEM) + pickler._batch_setitems(obj.items()) + logger.trace(pickler, "# D2") return @@ -1797,7 +1823,11 @@ def save_function(pickler, obj): postproc_list = [] globs = None - if _recurse: + if id(obj.__globals__) in pickler.memo: + # It is possible that the globals dictionary itself is also being + # pickled directly. + globs = globs_copy = obj.__globals__ + elif _recurse: # recurse to get all globals referred to by obj from .detect import globalvars globs_copy = globalvars(obj, recurse=True, builtin=True) diff --git a/dill/tests/_globals_dummy.py b/dill/tests/_globals_dummy.py index 128a1ddc..27b86127 100644 --- a/dill/tests/_globals_dummy.py +++ b/dill/tests/_globals_dummy.py @@ -3,8 +3,8 @@ x = 3 def h(): - print(x) + return x def g(): - h() + return h() diff --git a/dill/tests/test_functions.py b/dill/tests/test_functions.py index 36a0e052..f5bc5a7f 100644 --- a/dill/tests/test_functions.py +++ b/dill/tests/test_functions.py @@ -132,6 +132,7 @@ def test_code_object(): except Exception as error: raise Exception("failed to construct code object with format version {}".format(version)) from error + def test_shared_globals(): import dill, _globals_dummy as f, sys @@ -140,6 +141,7 @@ def test_shared_globals(): assert f.g.__globals__ is f.h.__globals__ assert g.__globals__ is h.__globals__ assert f.g.__globals__ is g.__globals__ + assert g() == h() == 3 del sys.modules['_globals_dummy'] @@ -147,10 +149,14 @@ def test_shared_globals(): assert f.g.__globals__ is f.h.__globals__ assert g.__globals__ is h.__globals__ assert f.g.__globals__ is not g.__globals__ + assert g() == h() == 3 + g1, g, g2 = dill.copy((f.__dict__, f.g, f.g.__globals__), recurse=recurse) + assert g1 is g.__globals__ + assert g1 is g2 sys.modules['_globals_dummy'] = f - + if __name__ == '__main__': test_functions() test_issue_510() From a52d2198334592a3a36e69b241e2b8155b8ac3ca Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Thu, 11 Aug 2022 11:25:52 -0300 Subject: [PATCH 3/5] reorganize and simplify if-else clauses in save_function() --- dill/_dill.py | 48 ++++++++++++++++++++++++++---------------------- dill/session.py | 9 +++++---- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 955bff1e..f4a3171d 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1817,39 +1817,44 @@ def save_function(pickler, obj): logger.trace(pickler, "F1: %s", obj) _recurse = getattr(pickler, '_recurse', None) _postproc = getattr(pickler, '_postproc', None) - _main_modified = getattr(pickler, '_main_modified', None) - _original_main = getattr(pickler, '_original_main', __builtin__)#'None' + _original_main = getattr(pickler, '_original_main', None) _globals_cache = getattr(pickler, '_globals_cache', None) postproc_list = [] - globs = None - if id(obj.__globals__) in pickler.memo: - # It is possible that the globals dictionary itself is also being - # pickled directly. - globs = globs_copy = obj.__globals__ - elif _recurse: - # recurse to get all globals referred to by obj - from .detect import globalvars - globs_copy = globalvars(obj, recurse=True, builtin=True) - else: - globs_copy = obj.__globals__ + is_memoized = id(obj.__globals__) in pickler.memo + is_modified_main_dict = ( + _original_main is not None + and obj.__globals__ is _original_main.__dict__ + ) + is_module_dict = ( + not (_recurse or is_memoized or is_modified_main_dict) + and obj.__module__ is not None + and obj.__globals__ is getattr(_import_module(obj.__module__, safe=True), '__dict__', None) + ) + if is_modified_main_dict: # If the globals is the __dict__ from the module being saved as a # session, substitute it by the dictionary being actually saved. - if _main_modified and globs_copy is _original_main.__dict__: - globs_copy = getattr(pickler, '_main', _original_main).__dict__ - globs = globs_copy + globs = pickler._main.__dict__ + elif is_memoized or is_module_dict: # If the globals is a module __dict__, do not save it in the pickle. - elif globs_copy is not None and obj.__module__ is not None and \ - getattr(_import_module(obj.__module__, True), '__dict__', None) is globs_copy: - globs = globs_copy + # It is possible that the globals dictionary itself is also being + # pickled directly. + globs = obj.__globals__ + else: + if _recurse: + # recurse to get all globals referred to by obj + from .detect import globalvars + globs_copy = globalvars(obj, recurse=True, builtin=True) + else: + # function not bound to an importable module + globs_copy = obj.__globals__ - if globs is None: # Add the name of the module to the globs dictionary and prevent # the duplication of the dictionary. Pickle the unpopulated # globals dictionary and set the remaining items after the function # is created to correctly handle recursion. - if _globals_cache is not None and obj.__globals__ is not None: + if _globals_cache is not None: if id(obj.__globals__) not in _globals_cache: globs = {'__name__': obj.__module__} _globals_cache[id(obj.__globals__)] = globs @@ -1858,7 +1863,6 @@ def save_function(pickler, obj): else: globs = {'__name__': obj.__module__} - if globs_copy is not None and globs is not globs_copy: # In the case that the globals are copied, we need to ensure that # the globals dictionary is updated when all objects in the # dictionary are already created. diff --git a/dill/session.py b/dill/session.py index 6a037d61..5fc1877a 100644 --- a/dill/session.py +++ b/dill/session.py @@ -224,17 +224,18 @@ def dump_module( file = filename else: file = open(filename, 'wb') + original_main = main + if refimported: + main = _stash_modules(main) try: pickler = Pickler(file, protocol, **kwds) - pickler._original_main = main - if refimported: - main = _stash_modules(main) pickler._main = main #FIXME: dill.settings are disabled pickler._byref = False # disable pickling by name reference pickler._recurse = False # disable pickling recursion for globals pickler._session = True # is best indicator of when pickling a session pickler._first_pass = True - pickler._main_modified = main is not pickler._original_main + if main is not original_main: + pickler._original_main = original_main pickler.dump(main) finally: if file is not filename: # if newly opened file From 6874754ba6f3f387178b6f693033ff372230d657 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 13 Feb 2023 16:15:17 -0800 Subject: [PATCH 4/5] Merge test_functors into test_functions --- dill/tests/_globals_dummy.py | 10 ---------- dill/tests/test_functions.py | 14 +++++++------- dill/tests/test_functors.py | 5 +++-- 3 files changed, 10 insertions(+), 19 deletions(-) delete mode 100644 dill/tests/_globals_dummy.py diff --git a/dill/tests/_globals_dummy.py b/dill/tests/_globals_dummy.py deleted file mode 100644 index 27b86127..00000000 --- a/dill/tests/_globals_dummy.py +++ /dev/null @@ -1,10 +0,0 @@ -# This file is used by test_shared_globals in test_functions.py - -x = 3 - -def h(): - return x - -def g(): - return h() - diff --git a/dill/tests/test_functions.py b/dill/tests/test_functions.py index f5bc5a7f..b587473f 100644 --- a/dill/tests/test_functions.py +++ b/dill/tests/test_functions.py @@ -134,27 +134,27 @@ def test_code_object(): def test_shared_globals(): - import dill, _globals_dummy as f, sys + import dill, test_functors as f, sys for recurse in False, True: g, h = dill.copy((f.g, f.h), recurse=recurse) assert f.g.__globals__ is f.h.__globals__ assert g.__globals__ is h.__globals__ assert f.g.__globals__ is g.__globals__ - assert g() == h() == 3 - - del sys.modules['_globals_dummy'] + assert g(1, 2) == h() == 3 + + del sys.modules['test_functors'] g, h = dill.copy((f.g, f.h), recurse=recurse) assert f.g.__globals__ is f.h.__globals__ assert g.__globals__ is h.__globals__ assert f.g.__globals__ is not g.__globals__ - assert g() == h() == 3 + assert g(1, 2) == h() == 3 g1, g, g2 = dill.copy((f.__dict__, f.g, f.g.__globals__), recurse=recurse) assert g1 is g.__globals__ assert g1 is g2 - - sys.modules['_globals_dummy'] = f + + sys.modules['test_functors'] = f if __name__ == '__main__': diff --git a/dill/tests/test_functors.py b/dill/tests/test_functors.py index 952a546c..88d83d89 100644 --- a/dill/tests/test_functors.py +++ b/dill/tests/test_functors.py @@ -10,17 +10,18 @@ import dill dill.settings['recurse'] = True +x = 3 def f(a, b, c): # without keywords pass def g(a, b, c=2): # with keywords - pass + return h(a=a, b=b, c=c) def h(a=1, b=2, c=3): # without args - pass + return x def test_functools(): From 527bfb48d4bc7cfc4ec976491cdd6635a7c7ceaa Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 13 Feb 2023 16:26:24 -0800 Subject: [PATCH 5/5] Add back if statements removed by leogama --- dill/_dill.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index f4a3171d..7c294063 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1854,7 +1854,7 @@ def save_function(pickler, obj): # the duplication of the dictionary. Pickle the unpopulated # globals dictionary and set the remaining items after the function # is created to correctly handle recursion. - if _globals_cache is not None: + if _globals_cache is not None and obj.__globals__ is not None: if id(obj.__globals__) not in _globals_cache: globs = {'__name__': obj.__module__} _globals_cache[id(obj.__globals__)] = globs @@ -1863,16 +1863,17 @@ def save_function(pickler, obj): else: globs = {'__name__': obj.__module__} - # In the case that the globals are copied, we need to ensure that - # the globals dictionary is updated when all objects in the - # dictionary are already created. - glob_ids = {id(g) for g in globs_copy.values()} - for stack_element in _postproc: - if stack_element in glob_ids: - _postproc[stack_element].append((_setitems, (globs, globs_copy))) - break - else: - postproc_list.append((_setitems, (globs, globs_copy))) + if globs_copy is not None and globs is not globs_copy: + # In the case that the globals are copied, we need to ensure that + # the globals dictionary is updated when all objects in the + # dictionary are already created. + glob_ids = {id(g) for g in globs_copy.values()} + for stack_element in _postproc: + if stack_element in glob_ids: + _postproc[stack_element].append((_setitems, (globs, globs_copy))) + break + else: + postproc_list.append((_setitems, (globs, globs_copy))) closure = obj.__closure__ state_dict = {}