From 84b6fcd02ca6d6ab48c4b6be4bb8724b1c2e7014 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Thu, 3 Aug 2023 00:00:23 +0300 Subject: add NV option for Random number generator source setting, which allows to generate same pictures on CPU/AMD/Mac as on NVidia videocards. --- modules/rng_philox.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 modules/rng_philox.py (limited to 'modules/rng_philox.py') diff --git a/modules/rng_philox.py b/modules/rng_philox.py new file mode 100644 index 00000000..b5c02483 --- /dev/null +++ b/modules/rng_philox.py @@ -0,0 +1,100 @@ +"""RNG imitiating torch cuda randn on CPU. You are welcome. + +Usage: + +``` +g = Generator(seed=0) +print(g.randn(shape=(3, 4))) +``` + +Expected output: +``` +[[-0.92466259 -0.42534415 -2.6438457 0.14518388] + [-0.12086647 -0.57972564 -0.62285122 -0.32838709] + [-1.07454231 -0.36314407 -1.67105067 2.26550497]] +``` +""" + +import numpy as np + +philox_m = [0xD2511F53, 0xCD9E8D57] +philox_w = [0x9E3779B9, 0xBB67AE85] + +two_pow32_inv = np.array([2.3283064e-10], dtype=np.float32) +two_pow32_inv_2pi = np.array([2.3283064e-10 * 6.2831855], dtype=np.float32) + + +def uint32(x): + """Converts (N,) np.uint64 array into (2, N) np.unit32 array.""" + return np.moveaxis(x.view(np.uint32).reshape(-1, 2), 0, 1) + + +def philox4_round(counter, key): + """A single round of the Philox 4x32 random number generator.""" + + v1 = uint32(counter[0].astype(np.uint64) * philox_m[0]) + v2 = uint32(counter[2].astype(np.uint64) * philox_m[1]) + + counter[0] = v2[1] ^ counter[1] ^ key[0] + counter[1] = v2[0] + counter[2] = v1[1] ^ counter[3] ^ key[1] + counter[3] = v1[0] + + +def philox4_32(counter, key, rounds=10): + """Generates 32-bit random numbers using the Philox 4x32 random number generator. + + Parameters: + counter (numpy.ndarray): A 4xN array of 32-bit integers representing the counter values (offset into generation). + key (numpy.ndarray): A 2xN array of 32-bit integers representing the key values (seed). + rounds (int): The number of rounds to perform. + + Returns: + numpy.ndarray: A 4xN array of 32-bit integers containing the generated random numbers. + """ + + for _ in range(rounds - 1): + philox4_round(counter, key) + + key[0] = key[0] + philox_w[0] + key[1] = key[1] + philox_w[1] + + philox4_round(counter, key) + return counter + + +def box_muller(x, y): + """Returns just the first out of two numbers generated by Box–Muller transform algorithm.""" + u = x.astype(np.float32) * two_pow32_inv + two_pow32_inv / 2 + v = y.astype(np.float32) * two_pow32_inv_2pi + two_pow32_inv_2pi / 2 + + s = np.sqrt(-2.0 * np.log(u)) + + r1 = s * np.sin(v) + return r1.astype(np.float32) + + +class Generator: + """RNG that produces same outputs as torch.randn(..., device='cuda') on CPU""" + + def __init__(self, seed): + self.seed = seed + self.offset = 0 + + def randn(self, shape): + """Generate a sequence of n standard normal random variables using the Philox 4x32 random number generator and the Box-Muller transform.""" + + n = 1 + for x in shape: + n *= x + + counter = np.zeros((4, n), dtype=np.uint32) + counter[0] = self.offset + counter[2] = np.arange(n, dtype=np.uint32) # up to 2^32 numbers can be generated - if you want more you'd need to spill into counter[3] + self.offset += 1 + + key = uint32(np.array([[self.seed] * n], dtype=np.uint64)) + + g = philox4_32(counter, key) + + return box_muller(g[0], g[1]).reshape(shape) # discard g[2] and g[3] -- cgit v1.2.3 From 0904df84e296c10fe0ccb37a3b689e6b96d87e41 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Thu, 3 Aug 2023 07:53:03 +0300 Subject: minor performance improvements for philox --- modules/rng_philox.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'modules/rng_philox.py') diff --git a/modules/rng_philox.py b/modules/rng_philox.py index b5c02483..5532cf9d 100644 --- a/modules/rng_philox.py +++ b/modules/rng_philox.py @@ -26,7 +26,7 @@ two_pow32_inv_2pi = np.array([2.3283064e-10 * 6.2831855], dtype=np.float32) def uint32(x): """Converts (N,) np.uint64 array into (2, N) np.unit32 array.""" - return np.moveaxis(x.view(np.uint32).reshape(-1, 2), 0, 1) + return x.view(np.uint32).reshape(-1, 2).transpose(1, 0) def philox4_round(counter, key): @@ -65,8 +65,8 @@ def philox4_32(counter, key, rounds=10): def box_muller(x, y): """Returns just the first out of two numbers generated by Box–Muller transform algorithm.""" - u = x.astype(np.float32) * two_pow32_inv + two_pow32_inv / 2 - v = y.astype(np.float32) * two_pow32_inv_2pi + two_pow32_inv_2pi / 2 + u = x * two_pow32_inv + two_pow32_inv / 2 + v = y * two_pow32_inv_2pi + two_pow32_inv_2pi / 2 s = np.sqrt(-2.0 * np.log(u)) @@ -93,7 +93,9 @@ class Generator: counter[2] = np.arange(n, dtype=np.uint32) # up to 2^32 numbers can be generated - if you want more you'd need to spill into counter[3] self.offset += 1 - key = uint32(np.array([[self.seed] * n], dtype=np.uint64)) + key = np.empty(n, dtype=np.uint64) + key.fill(self.seed) + key = uint32(key) g = philox4_32(counter, key) -- cgit v1.2.3