diff options
author | brkirch <brkirch@users.noreply.github.com> | 2023-01-12 13:00:38 +0000 |
---|---|---|
committer | brkirch <brkirch@users.noreply.github.com> | 2023-01-18 01:54:18 +0000 |
commit | a255dac4f8c5ee11c15b634563d3df513f1834b4 (patch) | |
tree | a2ea43a4e9175312f0781a0e24b97a9639d9e862 | |
parent | 0b8911d883118daa54f7735c5b753b5575d9f943 (diff) | |
download | stable-diffusion-webui-gfx803-a255dac4f8c5ee11c15b634563d3df513f1834b4.tar.gz stable-diffusion-webui-gfx803-a255dac4f8c5ee11c15b634563d3df513f1834b4.tar.bz2 stable-diffusion-webui-gfx803-a255dac4f8c5ee11c15b634563d3df513f1834b4.zip |
Fix cumsum for MPS in newer torch
The prior fix assumed that testing int16 was enough to determine if a fix is needed, but a recent fix for cumsum has int16 working but not bool.
-rw-r--r-- | modules/devices.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/modules/devices.py b/modules/devices.py index caeb0276..ac3ae0c9 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -139,8 +139,10 @@ orig_Tensor_cumsum = torch.Tensor.cumsum def cumsum_fix(input, cumsum_func, *args, **kwargs): if input.device.type == 'mps': output_dtype = kwargs.get('dtype', input.dtype) - if any(output_dtype == broken_dtype for broken_dtype in [torch.bool, torch.int8, torch.int16, torch.int64]): + if output_dtype == torch.int64: return cumsum_func(input.cpu(), *args, **kwargs).to(input.device) + elif cumsum_needs_bool_fix and output_dtype == torch.bool or cumsum_needs_int_fix and (output_dtype == torch.int8 or output_dtype == torch.int16): + return cumsum_func(input.to(torch.int32), *args, **kwargs).to(torch.int64) return cumsum_func(input, *args, **kwargs) @@ -151,8 +153,9 @@ if has_mps(): torch.nn.functional.layer_norm = layer_norm_fix torch.Tensor.numpy = numpy_fix elif version.parse(torch.__version__) > version.parse("1.13.1"): - if not torch.Tensor([1,2]).to(torch.device("mps")).equal(torch.Tensor([1,1]).to(torch.device("mps")).cumsum(0, dtype=torch.int16)): - torch.cumsum = lambda input, *args, **kwargs: ( cumsum_fix(input, orig_cumsum, *args, **kwargs) ) - torch.Tensor.cumsum = lambda self, *args, **kwargs: ( cumsum_fix(self, orig_Tensor_cumsum, *args, **kwargs) ) + cumsum_needs_int_fix = not torch.Tensor([1,2]).to(torch.device("mps")).equal(torch.ShortTensor([1,1]).to(torch.device("mps")).cumsum(0)) + cumsum_needs_bool_fix = not torch.BoolTensor([True,True]).to(device=torch.device("mps"), dtype=torch.int64).equal(torch.BoolTensor([True,False]).to(torch.device("mps")).cumsum(0)) + torch.cumsum = lambda input, *args, **kwargs: ( cumsum_fix(input, orig_cumsum, *args, **kwargs) ) + torch.Tensor.cumsum = lambda self, *args, **kwargs: ( cumsum_fix(self, orig_Tensor_cumsum, *args, **kwargs) ) orig_narrow = torch.narrow torch.narrow = lambda *args, **kwargs: ( orig_narrow(*args, **kwargs).clone() ) |