12 files changed, 72 insertions, 24 deletions
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 0974056d..de905caa 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -62,7 +62,7 @@ parser.add_argument("--opt-split-attention-invokeai", action='store_true', help=
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="prefer older version of split attention optimization for automatic choice of optimization")
 parser.add_argument("--opt-sdp-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization for automatic choice of optimization; requires PyTorch 2.*")
 parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization without memory efficient attention for automatic choice of optimization, makes image generation deterministic; requires PyTorch 2.*")
-parser.add_argument("--disable-opt-split-attention", action='store_true', help="does not do anything")
+parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization")
 parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI")
 parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
 parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests")
diff --git a/modules/images.py b/modules/images.py
index 30e9ffc5..a12d252b 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -503,10 +503,6 @@ def save_image_with_geninfo(image, geninfo, filename, extension=None, existing_p
 
     image_format = Image.registered_extensions()[extension]
 
-    existing_pnginfo = existing_pnginfo or {}
-    if opts.enable_pnginfo:
-        existing_pnginfo['parameters'] = geninfo
-
     if extension.lower() == '.png':
         if opts.enable_pnginfo:
             pnginfo_data = PngImagePlugin.PngInfo()
diff --git a/modules/launch_utils.py b/modules/launch_utils.py
index 6e9bb770..0c8c4db0 100644
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@@ -68,7 +68,13 @@ def git_tag():
     try:
         return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip()
     except Exception:
-        return "<none>"
+        try:
+            from pathlib import Path
+            changelog_md = Path(__file__).parent.parent / "CHANGELOG.md"
+            with changelog_md.open(encoding="utf-8") as file:
+                return next((line.strip() for line in file if line.strip()), "<none>")
+        except Exception:
+            return "<none>"
 
 
 def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str:
diff --git a/modules/processing.py b/modules/processing.py
index baa9b278..362ab4c2 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -595,8 +595,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
     try:
         # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint
-        override_checkpoint = p.override_settings.get('sd_model_checkpoint')
-        if override_checkpoint is not None and sd_models.checkpoint_alisases.get(override_checkpoint) is None:
+        if sd_models.checkpoint_alisases.get(p.override_settings.get('sd_model_checkpoint')) is None:
             p.override_settings.pop('sd_model_checkpoint', None)
             sd_models.reload_model_weights()
 
diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py
index ec1469d0..f755283c 100644
--- a/modules/script_callbacks.py
+++ b/modules/script_callbacks.py
@@ -1,11 +1,12 @@
 import inspect
+import os
 from collections import namedtuple
 from typing import Optional, Dict, Any
 
 from fastapi import FastAPI
 from gradio import Blocks
 
-from modules import errors
+from modules import errors, timer
 
 
 def report_exception(c, job):
@@ -123,6 +124,7 @@ def app_started_callback(demo: Optional[Blocks], app: FastAPI):
     for c in callback_map['callbacks_app_started']:
         try:
             c.callback(demo, app)
+            timer.startup_timer.record(os.path.basename(c.script))
         except Exception:
             report_exception(c, 'app_started_callback')
 
diff --git a/modules/scripts.py b/modules/scripts.py
index b901862d..99bf836a 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -5,7 +5,7 @@ from collections import namedtuple
 
 import gradio as gr
 
-from modules import shared, paths, script_callbacks, extensions, script_loading, scripts_postprocessing, errors
+from modules import shared, paths, script_callbacks, extensions, script_loading, scripts_postprocessing, errors, timer
 
 AlwaysVisible = object()
 
@@ -280,6 +280,7 @@ def load_scripts():
         finally:
             sys.path = syspath
             current_basedir = paths.script_path
+            timer.startup_timer.record(scriptfile.filename)
 
     global scripts_txt2img, scripts_img2img, scripts_postproc
 
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 487dfd60..3b6f95ce 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -68,6 +68,8 @@ def apply_optimizations(option=None):
 
     if selection == "None":
         matching_optimizer = None
+    elif selection == "Automatic" and shared.cmd_opts.disable_opt_split_attention:
+        matching_optimizer = None
     elif matching_optimizer is None:
         matching_optimizer = optimizers[0]
 
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 5f0ff513..b41aa419 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -57,7 +57,7 @@ class SdOptimizationSdpNoMem(SdOptimization):
     name = "sdp-no-mem"
     label = "scaled dot product without memory efficient attention"
     cmd_opt = "opt_sdp_no_mem_attention"
-    priority = 90
+    priority = 80
 
     def is_available(self):
         return hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(torch.nn.functional.scaled_dot_product_attention)
@@ -71,7 +71,7 @@ class SdOptimizationSdp(SdOptimizationSdpNoMem):
     name = "sdp"
     label = "scaled dot product"
     cmd_opt = "opt_sdp_attention"
-    priority = 80
+    priority = 70
 
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward
@@ -114,7 +114,7 @@ class SdOptimizationInvokeAI(SdOptimization):
 class SdOptimizationDoggettx(SdOptimization):
     name = "Doggettx"
     cmd_opt = "opt_split_attention"
-    priority = 20
+    priority = 90
 
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 232eb9c4..918f6fd6 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -314,8 +314,6 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
         timer.record("apply half()")
 
-    devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16
-    devices.dtype_vae = torch.float32 if shared.cmd_opts.no_half or shared.cmd_opts.no_half_vae else torch.float16
     devices.dtype_unet = model.model.diffusion_model.dtype
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
diff --git a/modules/shared.py b/modules/shared.py
index 3c7ae654..7025a754 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -6,6 +6,7 @@ import threading
 import time
 
 import gradio as gr
+import torch
 import tqdm
 
 import modules.interrogate
@@ -63,6 +64,9 @@ cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_op
 devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_esrgan, devices.device_codeformer = \
     (devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'esrgan', 'codeformer'])
 
+devices.dtype = torch.float32 if cmd_opts.no_half else torch.float16
+devices.dtype_vae = torch.float32 if cmd_opts.no_half or cmd_opts.no_half_vae else torch.float16
+
 device = devices.device
 weight_load_location = None if cmd_opts.lowram else "cpu"
 
@@ -474,7 +478,7 @@ options_templates.update(options_section(('ui', "User interface"), {
     "quicksettings_list": OptionInfo(["sd_model_checkpoint"], "Quicksettings list", ui_components.DropdownMulti, lambda: {"choices": list(opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that appear at the top of page rather than in settings tab").needs_restart(),
     "ui_tab_order": OptionInfo([], "UI tab order", ui_components.DropdownMulti, lambda: {"choices": list(tab_names)}).needs_restart(),
     "hidden_tabs": OptionInfo([], "Hidden UI tabs", ui_components.DropdownMulti, lambda: {"choices": list(tab_names)}).needs_restart(),
-    "ui_reorder": OptionInfo([], "txt2img/img2img UI item order", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_restart(),
+    "ui_reorder_list": OptionInfo([], "txt2img/img2img UI item order", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_restart(),
     "hires_fix_show_sampler": OptionInfo(False, "Hires fix: show hires sampler selection").needs_restart(),
     "hires_fix_show_prompts": OptionInfo(False, "Hires fix: show hires prompt and negative prompt").needs_restart(),
     "disable_token_counters": OptionInfo(False, "Disable prompt token counters").needs_restart(),
@@ -626,8 +630,8 @@ class Options:
             self.data['quicksettings_list'] = [i.strip() for i in self.data.get('quicksettings').split(',')]
 
         # 1.4.0 ui_reorder
-        if isinstance(self.data.get('ui_reorder'), str):
-            self.data['ui_reorder'] = [i.strip() for i in self.data.get('ui_reorder').split(',')]
+        if isinstance(self.data.get('ui_reorder'), str) and self.data.get('ui_reorder') and "ui_reorder_list" not in self.data:
+            self.data['ui_reorder_list'] = [i.strip() for i in self.data.get('ui_reorder').split(',')]
 
         bad_settings = 0
         for k, v in self.data.items():
diff --git a/modules/timer.py b/modules/timer.py
index ba92be33..da99e49f 100644
--- a/modules/timer.py
+++ b/modules/timer.py
@@ -1,11 +1,30 @@
 import time
 
 
+class TimerSubcategory:
+    def __init__(self, timer, category):
+        self.timer = timer
+        self.category = category
+        self.start = None
+        self.original_base_category = timer.base_category
+
+    def __enter__(self):
+        self.start = time.time()
+        self.timer.base_category = self.original_base_category + self.category + "/"
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        elapsed_for_subcategroy = time.time() - self.start
+        self.timer.base_category = self.original_base_category
+        self.timer.add_time_to_record(self.original_base_category + self.category, elapsed_for_subcategroy)
+        self.timer.record(self.category)
+
+
 class Timer:
     def __init__(self):
         self.start = time.time()
         self.records = {}
         self.total = 0
+        self.base_category = ''
 
     def elapsed(self):
         end = time.time()
@@ -13,18 +32,29 @@ class Timer:
         self.start = end
         return res
 
-    def record(self, category, extra_time=0):
-        e = self.elapsed()
+    def add_time_to_record(self, category, amount):
         if category not in self.records:
             self.records[category] = 0
 
-        self.records[category] += e + extra_time
+        self.records[category] += amount
+
+    def record(self, category, extra_time=0):
+        e = self.elapsed()
+
+        self.add_time_to_record(self.base_category + category, e + extra_time)
+
         self.total += e + extra_time
 
+    def subcategory(self, name):
+        self.elapsed()
+
+        subcat = TimerSubcategory(self, name)
+        return subcat
+
     def summary(self):
         res = f"{self.total:.1f}s"
 
-        additions = [x for x in self.records.items() if x[1] >= 0.1]
+        additions = [(category, time_taken) for category, time_taken in self.records.items() if time_taken >= 0.1 and '/' not in category]
         if not additions:
             return res
 
@@ -34,5 +64,13 @@ class Timer:
 
         return res
 
+    def dump(self):
+        return {'total': self.total, 'records': self.records}
+
     def reset(self):
         self.__init__()
+
+
+startup_timer = Timer()
+
+startup_record = None
diff --git a/modules/ui.py b/modules/ui.py
index 4e0cf776..b7459f08 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -11,7 +11,7 @@ import numpy as np
 from PIL import Image, PngImagePlugin  # noqa: F401
 from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call
 
-from modules import sd_hijack, sd_models, script_callbacks, ui_extensions, deepbooru, sd_vae, extra_networks, ui_common, ui_postprocessing, progress, ui_loadsave, errors, shared_items, ui_settings
+from modules import sd_hijack, sd_models, script_callbacks, ui_extensions, deepbooru, sd_vae, extra_networks, ui_common, ui_postprocessing, progress, ui_loadsave, errors, shared_items, ui_settings, timer
 from modules.ui_components import FormRow, FormGroup, ToolButton, FormHTML
 from modules.paths import script_path
 from modules.ui_common import create_refresh_button
@@ -388,7 +388,7 @@ def create_sampler_and_steps_selection(choices, tabname):
 
 
 def ordered_ui_categories():
-    user_order = {x.strip(): i * 2 + 1 for i, x in enumerate(shared.opts.ui_reorder)}
+    user_order = {x.strip(): i * 2 + 1 for i, x in enumerate(shared.opts.ui_reorder_list)}
 
     for _, category in sorted(enumerate(shared_items.ui_reorder_categories()), key=lambda x: user_order.get(x[1], x[0] * 2 + 0)):
         yield category
@@ -1595,3 +1595,5 @@ def setup_ui_api(app):
     app.add_api_route("/internal/quicksettings-hint", quicksettings_hint, methods=["GET"], response_model=List[QuicksettingsHint])
 
     app.add_api_route("/internal/ping", lambda: {}, methods=["GET"])
+
+    app.add_api_route("/internal/profile-startup", lambda: timer.startup_record, methods=["GET"])