1 files changed, 20 insertions, 11 deletions
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 635e266e..65b05d34 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -28,9 +28,11 @@ class InterrogateModels:
     clip_preprocess = None
     categories = None
     dtype = None
+    running_on_cpu = None
 
     def __init__(self, content_dir):
         self.categories = []
+        self.running_on_cpu = devices.device_interrogate == torch.device("cpu")
 
         if os.path.exists(content_dir):
             for filename in os.listdir(content_dir):
@@ -53,26 +55,30 @@ class InterrogateModels:
     def load_clip_model(self):
         import clip
 
-        model, preprocess = clip.load(clip_model_name)
+        if self.running_on_cpu:
+            model, preprocess = clip.load(clip_model_name, device="cpu")
+        else:
+            model, preprocess = clip.load(clip_model_name)
+
         model.eval()
-        model = model.to(shared.device)
+        model = model.to(devices.device_interrogate)
 
         return model, preprocess
 
     def load(self):
         if self.blip_model is None:
             self.blip_model = self.load_blip_model()
-            if not shared.cmd_opts.no_half:
+            if not shared.cmd_opts.no_half and not self.running_on_cpu:
                 self.blip_model = self.blip_model.half()
 
-        self.blip_model = self.blip_model.to(shared.device)
+        self.blip_model = self.blip_model.to(devices.device_interrogate)
 
         if self.clip_model is None:
             self.clip_model, self.clip_preprocess = self.load_clip_model()
-            if not shared.cmd_opts.no_half:
+            if not shared.cmd_opts.no_half and not self.running_on_cpu:
                 self.clip_model = self.clip_model.half()
 
-        self.clip_model = self.clip_model.to(shared.device)
+        self.clip_model = self.clip_model.to(devices.device_interrogate)
 
         self.dtype = next(self.clip_model.parameters()).dtype
 
@@ -99,11 +105,11 @@ class InterrogateModels:
             text_array = text_array[0:int(shared.opts.interrogate_clip_dict_limit)]
 
         top_count = min(top_count, len(text_array))
-        text_tokens = clip.tokenize([text for text in text_array], truncate=True).to(shared.device)
+        text_tokens = clip.tokenize([text for text in text_array], truncate=True).to(devices.device_interrogate)
         text_features = self.clip_model.encode_text(text_tokens).type(self.dtype)
         text_features /= text_features.norm(dim=-1, keepdim=True)
 
-        similarity = torch.zeros((1, len(text_array))).to(shared.device)
+        similarity = torch.zeros((1, len(text_array))).to(devices.device_interrogate)
         for i in range(image_features.shape[0]):
             similarity += (100.0 * image_features[i].unsqueeze(0) @ text_features.T).softmax(dim=-1)
         similarity /= image_features.shape[0]
@@ -116,7 +122,7 @@ class InterrogateModels:
             transforms.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=InterpolationMode.BICUBIC),
             transforms.ToTensor(),
             transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
-        ])(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
+        ])(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
 
         with torch.no_grad():
             caption = self.blip_model.generate(gpu_image, sample=False, num_beams=shared.opts.interrogate_clip_num_beams, min_length=shared.opts.interrogate_clip_min_length, max_length=shared.opts.interrogate_clip_max_length)
@@ -140,7 +146,7 @@ class InterrogateModels:
 
             res = caption
 
-            clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
+            clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
 
             precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext
             with torch.no_grad(), precision_scope("cuda"):
@@ -156,7 +162,10 @@ class InterrogateModels:
                 for name, topn, items in self.categories:
                     matches = self.rank(image_features, items, top_count=topn)
                     for match, score in matches:
-                        res += ", " + match
+                        if shared.opts.interrogate_return_ranks:
+                            res += f", ({match}:{score/100:.3f})"
+                        else:
+                            res += ", " + match
 
         except Exception:
             print(f"Error interrogating", file=sys.stderr)