From 875ddfeecfaffad9eee24813301637cba310337d Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 9 Oct 2022 17:58:43 +0300 Subject: added guard for torch.load to prevent loading pickles with unknown content --- modules/safe.py | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 modules/safe.py (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py new file mode 100644 index 00000000..2d2c1371 --- /dev/null +++ b/modules/safe.py @@ -0,0 +1,89 @@ +# this code is adapted from the script contributed by anon from /h/ + +import io +import pickle +import collections +import sys +import traceback + +import torch +import numpy +import _codecs +import zipfile + + +def encode(*args): + out = _codecs.encode(*args) + return out + + +class RestrictedUnpickler(pickle.Unpickler): + def persistent_load(self, saved_id): + assert saved_id[0] == 'storage' + return torch.storage._TypedStorage() + + def find_class(self, module, name): + if module == 'collections' and name == 'OrderedDict': + return getattr(collections, name) + if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: + return getattr(torch._utils, name) + if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage']: + return getattr(torch, name) + if module == 'torch.nn.modules.container' and name in ['ParameterDict']: + return getattr(torch.nn.modules.container, name) + if module == 'numpy.core.multiarray' and name == 'scalar': + return numpy.core.multiarray.scalar + if module == 'numpy' and name == 'dtype': + return numpy.dtype + if module == '_codecs' and name == 'encode': + return encode + if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint': + import pytorch_lightning.callbacks + return pytorch_lightning.callbacks.model_checkpoint + if module == "pytorch_lightning.callbacks.model_checkpoint" and name == 'ModelCheckpoint': + import pytorch_lightning.callbacks.model_checkpoint + return pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint + if module == "__builtin__" and name == 'set': + return set + + # Forbid everything else. + raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden") + + +def check_pt(filename): + try: + + # new pytorch format is a zip file + with zipfile.ZipFile(filename) as z: + with z.open('archive/data.pkl') as file: + unpickler = RestrictedUnpickler(file) + unpickler.load() + + except zipfile.BadZipfile: + + # if it's not a zip file, it's an olf pytorch format, with five objects written to pickle + with open(filename, "rb") as file: + unpickler = RestrictedUnpickler(file) + for i in range(5): + unpickler.load() + + +def load(filename, *args, **kwargs): + from modules import shared + + try: + if not shared.cmd_opts.disable_safe_unpickle: + check_pt(filename) + + except Exception: + print(f"Error verifying pickled file from {filename}:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr) + print(f"You can skip this check with --disable-safe-unpickle commandline argument.", file=sys.stderr) + return None + + return unsafe_torch_load(filename, *args, **kwargs) + + +unsafe_torch_load = torch.load +torch.load = load -- cgit v1.2.3 From a65476718f08a35f527b973ef731e6f488bace5e Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 9 Oct 2022 23:38:49 +0300 Subject: add DoubleStorage to list of allowed classes for pickle --- modules/safe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 2d2c1371..4d06f2a5 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -27,7 +27,7 @@ class RestrictedUnpickler(pickle.Unpickler): return getattr(collections, name) if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: return getattr(torch._utils, name) - if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage']: + if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage']: return getattr(torch, name) if module == 'torch.nn.modules.container' and name in ['ParameterDict']: return getattr(torch.nn.modules.container, name) -- cgit v1.2.3 From 8acc901ba3a252dc6ab4fabcb41644cf64d1774c Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 10 Oct 2022 00:38:55 -0400 Subject: Newer versions of PyTorch use TypedStorage instead Pytorch 1.13 and later will rename _TypedStorage to TypedStorage, so check for TypedStorage and use _TypedStorage if it is not available. Currently this is needed so that nightly builds of PyTorch work correctly. --- modules/safe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 4d06f2a5..05917463 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -12,6 +12,10 @@ import _codecs import zipfile +# PyTorch 1.13 and later have _TypedStorage renamed to TypedStorage +TypedStorage = torch.storage.TypedStorage if hasattr(torch.storage, 'TypedStorage') else torch.storage._TypedStorage + + def encode(*args): out = _codecs.encode(*args) return out @@ -20,7 +24,7 @@ def encode(*args): class RestrictedUnpickler(pickle.Unpickler): def persistent_load(self, saved_id): assert saved_id[0] == 'storage' - return torch.storage._TypedStorage() + return TypedStorage() def find_class(self, module, name): if module == 'collections' and name == 'OrderedDict': -- cgit v1.2.3 From 66b7d7584f0b44ce1316425808c27ca7df38293c Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 17:03:00 +0300 Subject: become even stricter with pickles no pickle shall pass thank you again, RyotaK --- modules/safe.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 05917463..20be16a5 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -10,6 +10,7 @@ import torch import numpy import _codecs import zipfile +import re # PyTorch 1.13 and later have _TypedStorage renamed to TypedStorage @@ -54,11 +55,27 @@ class RestrictedUnpickler(pickle.Unpickler): raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden") +allowed_zip_names = ["archive/data.pkl", "archive/version"] +allowed_zip_names_re = re.compile(r"^archive/data/\d+$") + + +def check_zip_filenames(filename, names): + for name in names: + if name in allowed_zip_names: + continue + if allowed_zip_names_re.match(name): + continue + + raise Exception(f"bad file inside {filename}: {name}") + + def check_pt(filename): try: # new pytorch format is a zip file with zipfile.ZipFile(filename) as z: + check_zip_filenames(filename, z.namelist()) + with z.open('archive/data.pkl') as file: unpickler = RestrictedUnpickler(file) unpickler.load() -- cgit v1.2.3 From 9e5ca5077f43bb3ec1a0ec41b47964cb38d544a6 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 14 Oct 2022 16:37:32 +0300 Subject: extra message for unpicking fails --- modules/safe.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 20be16a5..399165a1 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -96,11 +96,18 @@ def load(filename, *args, **kwargs): if not shared.cmd_opts.disable_safe_unpickle: check_pt(filename) + except pickle.UnpicklingError: + print(f"Error verifying pickled file from {filename}:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + print(f"-----> !!!! The file is most likely corrupted !!!! <-----", file=sys.stderr) + print(f"You can skip this check with --disable-safe-unpickle commandline argument, but that is not going to help you.\n\n", file=sys.stderr) + return None + except Exception: print(f"Error verifying pickled file from {filename}:", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr) - print(f"You can skip this check with --disable-safe-unpickle commandline argument.", file=sys.stderr) + print(f"You can skip this check with --disable-safe-unpickle commandline argument.\n\n", file=sys.stderr) return None return unsafe_torch_load(filename, *args, **kwargs) -- cgit v1.2.3 From d35bf649456da2558cbb6f2ea16fa1606022b7e7 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 1 Nov 2022 14:19:24 +0300 Subject: make launch.py run installers for extensions that have ones add some more classes to safety module for an extension --- modules/safe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 399165a1..348a24fc 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -32,7 +32,7 @@ class RestrictedUnpickler(pickle.Unpickler): return getattr(collections, name) if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: return getattr(torch._utils, name) - if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage']: + if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage', 'ByteStorage']: return getattr(torch, name) if module == 'torch.nn.modules.container' and name in ['ParameterDict']: return getattr(torch.nn.modules.container, name) -- cgit v1.2.3 From 6e4de5b4422dfc0d45063b2c8c78b19f00321615 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 6 Nov 2022 11:20:23 +0300 Subject: add load_with_extra function for modules to load checkpoints with extended whitelist --- modules/safe.py | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 348a24fc..a9209e38 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -23,11 +23,18 @@ def encode(*args): class RestrictedUnpickler(pickle.Unpickler): + extra_handler = None + def persistent_load(self, saved_id): assert saved_id[0] == 'storage' return TypedStorage() def find_class(self, module, name): + if self.extra_handler is not None: + res = self.extra_handler(module, name) + if res is not None: + return res + if module == 'collections' and name == 'OrderedDict': return getattr(collections, name) if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: @@ -52,7 +59,7 @@ class RestrictedUnpickler(pickle.Unpickler): return set # Forbid everything else. - raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden") + raise Exception(f"global '{module}/{name}' is forbidden") allowed_zip_names = ["archive/data.pkl", "archive/version"] @@ -69,7 +76,7 @@ def check_zip_filenames(filename, names): raise Exception(f"bad file inside {filename}: {name}") -def check_pt(filename): +def check_pt(filename, extra_handler): try: # new pytorch format is a zip file @@ -78,6 +85,7 @@ def check_pt(filename): with z.open('archive/data.pkl') as file: unpickler = RestrictedUnpickler(file) + unpickler.extra_handler = extra_handler unpickler.load() except zipfile.BadZipfile: @@ -85,16 +93,42 @@ def check_pt(filename): # if it's not a zip file, it's an olf pytorch format, with five objects written to pickle with open(filename, "rb") as file: unpickler = RestrictedUnpickler(file) + unpickler.extra_handler = extra_handler for i in range(5): unpickler.load() def load(filename, *args, **kwargs): + return load_with_extra(filename, *args, **kwargs) + + +def load_with_extra(filename, extra_handler=None, *args, **kwargs): + """ + this functon is intended to be used by extensions that want to load models with + some extra classes in them that the usual unpickler would find suspicious. + + Use the extra_handler argument to specify a function that takes module and field name as text, + and returns that field's value: + + ```python + def extra(module, name): + if module == 'collections' and name == 'OrderedDict': + return collections.OrderedDict + + return None + + safe.load_with_extra('model.pt', extra_handler=extra) + ``` + + The alternative to this is just to use safe.unsafe_torch_load('model.pt'), which as the name implies is + definitely unsafe. + """ + from modules import shared try: if not shared.cmd_opts.disable_safe_unpickle: - check_pt(filename) + check_pt(filename, extra_handler) except pickle.UnpicklingError: print(f"Error verifying pickled file from {filename}:", file=sys.stderr) -- cgit v1.2.3 From e46147786914484b422899ee7154ae1685d96ae5 Mon Sep 17 00:00:00 2001 From: SmirkingFace <116507648+smirkingface@users.noreply.github.com> Date: Fri, 2 Dec 2022 11:12:13 +0100 Subject: Fixed safe.py for pytorch 1.13 ckpt files --- modules/safe.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index a9209e38..10460ad0 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -62,14 +62,12 @@ class RestrictedUnpickler(pickle.Unpickler): raise Exception(f"global '{module}/{name}' is forbidden") -allowed_zip_names = ["archive/data.pkl", "archive/version"] -allowed_zip_names_re = re.compile(r"^archive/data/\d+$") - +# Regular expression that accepts 'dirname/version', 'dirname/data.pkl', and 'dirname/data/' +allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|version|(data\.pkl))$") +data_pkl_re = re.compile(r"^([^/]+)/data\.pkl$") def check_zip_filenames(filename, names): for name in names: - if name in allowed_zip_names: - continue if allowed_zip_names_re.match(name): continue @@ -82,8 +80,14 @@ def check_pt(filename, extra_handler): # new pytorch format is a zip file with zipfile.ZipFile(filename) as z: check_zip_filenames(filename, z.namelist()) - - with z.open('archive/data.pkl') as file: + + # find filename of data.pkl in zip file: '/data.pkl' + data_pkl_filenames = [f for f in z.namelist() if data_pkl_re.match(f)] + if len(data_pkl_filenames) == 0: + raise Exception(f"data.pkl not found in {filename}") + if len(data_pkl_filenames) > 1: + raise Exception(f"Multiple data.pkl found in {filename}") + with z.open(data_pkl_filenames[0]) as file: unpickler = RestrictedUnpickler(file) unpickler.extra_handler = extra_handler unpickler.load() -- cgit v1.2.3 From c0355caefe3d82e304e6d832699d581fc8f9fbf9 Mon Sep 17 00:00:00 2001 From: Jim Hays Date: Wed, 14 Dec 2022 21:01:32 -0500 Subject: Fix various typos --- modules/safe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 10460ad0..20e9d2fa 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -80,7 +80,7 @@ def check_pt(filename, extra_handler): # new pytorch format is a zip file with zipfile.ZipFile(filename) as z: check_zip_filenames(filename, z.namelist()) - + # find filename of data.pkl in zip file: '/data.pkl' data_pkl_filenames = [f for f in z.namelist() if data_pkl_re.match(f)] if len(data_pkl_filenames) == 0: @@ -108,7 +108,7 @@ def load(filename, *args, **kwargs): def load_with_extra(filename, extra_handler=None, *args, **kwargs): """ - this functon is intended to be used by extensions that want to load models with + this function is intended to be used by extensions that want to load models with some extra classes in them that the usual unpickler would find suspicious. Use the extra_handler argument to specify a function that takes module and field name as text, -- cgit v1.2.3 From cca16373def60bfc6d159a3c2dca91d0ba48112a Mon Sep 17 00:00:00 2001 From: brkirch Date: Sat, 17 Dec 2022 03:24:54 -0500 Subject: Add attributes used by MPS --- modules/safe.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 10460ad0..7c89c4c2 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -37,16 +37,16 @@ class RestrictedUnpickler(pickle.Unpickler): if module == 'collections' and name == 'OrderedDict': return getattr(collections, name) - if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: + if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter', '_rebuild_device_tensor_from_numpy']: return getattr(torch._utils, name) - if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage', 'ByteStorage']: + if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage', 'ByteStorage', 'float32']: return getattr(torch, name) if module == 'torch.nn.modules.container' and name in ['ParameterDict']: return getattr(torch.nn.modules.container, name) - if module == 'numpy.core.multiarray' and name == 'scalar': - return numpy.core.multiarray.scalar - if module == 'numpy' and name == 'dtype': - return numpy.dtype + if module == 'numpy.core.multiarray' and name in ['scalar', '_reconstruct']: + return getattr(numpy.core.multiarray, name) + if module == 'numpy' and name in ['dtype', 'ndarray']: + return getattr(numpy, name) if module == '_codecs' and name == 'encode': return encode if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint': -- cgit v1.2.3 From 3bf5591efe9a9f219c6088be322a87adc4f48f95 Mon Sep 17 00:00:00 2001 From: Yuval Aboulafia Date: Sat, 24 Dec 2022 21:35:29 +0200 Subject: fix F541 f-string without any placeholders --- modules/safe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 479c8b86..1d4c20b9 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -137,15 +137,15 @@ def load_with_extra(filename, extra_handler=None, *args, **kwargs): except pickle.UnpicklingError: print(f"Error verifying pickled file from {filename}:", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) - print(f"-----> !!!! The file is most likely corrupted !!!! <-----", file=sys.stderr) - print(f"You can skip this check with --disable-safe-unpickle commandline argument, but that is not going to help you.\n\n", file=sys.stderr) + print("-----> !!!! The file is most likely corrupted !!!! <-----", file=sys.stderr) + print("You can skip this check with --disable-safe-unpickle commandline argument, but that is not going to help you.\n\n", file=sys.stderr) return None except Exception: print(f"Error verifying pickled file from {filename}:", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) - print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr) - print(f"You can skip this check with --disable-safe-unpickle commandline argument.\n\n", file=sys.stderr) + print("\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr) + print("You can skip this check with --disable-safe-unpickle commandline argument.\n\n", file=sys.stderr) return None return unsafe_torch_load(filename, *args, **kwargs) -- cgit v1.2.3 From 8eef9d8e782aa0655241e43f67059aa7bef3bdca Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 25 Dec 2022 09:03:56 +0300 Subject: a way to add an exception to unpickler without explicitly calling load_with_extra --- modules/safe.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'modules/safe.py') diff --git a/modules/safe.py b/modules/safe.py index 479c8b86..ec23a53c 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -103,7 +103,7 @@ def check_pt(filename, extra_handler): def load(filename, *args, **kwargs): - return load_with_extra(filename, *args, **kwargs) + return load_with_extra(filename, extra_handler=global_extra_handler, *args, **kwargs) def load_with_extra(filename, extra_handler=None, *args, **kwargs): @@ -151,5 +151,42 @@ def load_with_extra(filename, extra_handler=None, *args, **kwargs): return unsafe_torch_load(filename, *args, **kwargs) +class Extra: + """ + A class for temporarily setting the global handler for when you can't explicitly call load_with_extra + (because it's not your code making the torch.load call). The intended use is like this: + +``` +import torch +from modules import safe + +def handler(module, name): + if module == 'torch' and name in ['float64', 'float16']: + return getattr(torch, name) + + return None + +with safe.Extra(handler): + x = torch.load('model.pt') +``` + """ + + def __init__(self, handler): + self.handler = handler + + def __enter__(self): + global global_extra_handler + + assert global_extra_handler is None, 'already inside an Extra() block' + global_extra_handler = self.handler + + def __exit__(self, exc_type, exc_val, exc_tb): + global global_extra_handler + + global_extra_handler = None + + unsafe_torch_load = torch.load torch.load = load +global_extra_handler = None + -- cgit v1.2.3