18 files changed, 263 insertions, 32 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c6ab5e3..c3c57fe0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,17 @@
+## 1.3.1
+
+### Features:
+ * revert default cross attention optimization to Doggettx
+
+### Bug Fixes:
+ * fix bug: LoRA don't apply on dropdown list sd_lora
+ * fix png info always added even if setting is not enabled
+ * fix some fields not applying in xyz plot
+ * fix "hires. fix" prompt sharing same labels with txt2img_prompt
+ * fix lora hashes not being added properly to infotex if there is only one lora
+ * fix --use-cpu failing to work properly at startup
+ * make --disable-opt-split-attention command line option work again
+
 ## 1.3.0
 
 ### Features:
diff --git a/html/footer.html b/html/footer.html
index bad87ff6..1ce13295 100644
--- a/html/footer.html
+++ b/html/footer.html
@@ -5,6 +5,8 @@
          • 
         <a href="https://gradio.app">Gradio</a>
          • 
+        <a href="#" onclick="showProfile('./internal/profile-startup'); return false;">Startup profile</a>
+         • 
         <a href="/" onclick="javascript:gradioApp().getElementById('settings_restart_gradio').click(); return false">Reload UI</a>
 </div>
 <br />
diff --git a/javascript/profilerVisualization.js b/javascript/profilerVisualization.js
new file mode 100644
index 00000000..9d8e5f42
--- /dev/null
+++ b/javascript/profilerVisualization.js
@@ -0,0 +1,153 @@
+
+function createRow(table, cellName, items) {
+    var tr = document.createElement('tr');
+    var res = [];
+
+    items.forEach(function(x, i) {
+        if (x === undefined) {
+            res.push(null);
+            return;
+        }
+
+        var td = document.createElement(cellName);
+        td.textContent = x;
+        tr.appendChild(td);
+        res.push(td);
+
+        var colspan = 1;
+        for (var n = i + 1; n < items.length; n++) {
+            if (items[n] !== undefined) {
+                break;
+            }
+
+            colspan += 1;
+        }
+
+        if (colspan > 1) {
+            td.colSpan = colspan;
+        }
+    });
+
+    table.appendChild(tr);
+
+    return res;
+}
+
+function showProfile(path, cutoff = 0.05) {
+    requestGet(path, {}, function(data) {
+        var table = document.createElement('table');
+        table.className = 'popup-table';
+
+        data.records['total'] = data.total;
+        var keys = Object.keys(data.records).sort(function(a, b) {
+            return data.records[b] - data.records[a];
+        });
+        var items = keys.map(function(x) {
+            return {key: x, parts: x.split('/'), time: data.records[x]};
+        });
+        var maxLength = items.reduce(function(a, b) {
+            return Math.max(a, b.parts.length);
+        }, 0);
+
+        var cols = createRow(table, 'th', ['record', 'seconds']);
+        cols[0].colSpan = maxLength;
+
+        function arraysEqual(a, b) {
+            return !(a < b || b < a);
+        }
+
+        var addLevel = function(level, parent, hide) {
+            var matching = items.filter(function(x) {
+                return x.parts[level] && !x.parts[level + 1] && arraysEqual(x.parts.slice(0, level), parent);
+            });
+            var sorted = matching.sort(function(a, b) {
+                return b.time - a.time;
+            });
+            var othersTime = 0;
+            var othersList = [];
+            var othersRows = [];
+            var childrenRows = [];
+            sorted.forEach(function(x) {
+                var visible = x.time >= cutoff && !hide;
+
+                var cells = [];
+                for (var i = 0; i < maxLength; i++) {
+                    cells.push(x.parts[i]);
+                }
+                cells.push(x.time.toFixed(3));
+                var cols = createRow(table, 'td', cells);
+                for (i = 0; i < level; i++) {
+                    cols[i].className = 'muted';
+                }
+
+                var tr = cols[0].parentNode;
+                if (!visible) {
+                    tr.classList.add("hidden");
+                }
+
+                if (x.time >= cutoff) {
+                    childrenRows.push(tr);
+                } else {
+                    othersTime += x.time;
+                    othersList.push(x.parts[level]);
+                    othersRows.push(tr);
+                }
+
+                var children = addLevel(level + 1, parent.concat([x.parts[level]]), true);
+                if (children.length > 0) {
+                    var cell = cols[level];
+                    var onclick = function() {
+                        cell.classList.remove("link");
+                        cell.removeEventListener("click", onclick);
+                        children.forEach(function(x) {
+                            x.classList.remove("hidden");
+                        });
+                    };
+                    cell.classList.add("link");
+                    cell.addEventListener("click", onclick);
+                }
+            });
+
+            if (othersTime > 0) {
+                var cells = [];
+                for (var i = 0; i < maxLength; i++) {
+                    cells.push(parent[i]);
+                }
+                cells.push(othersTime.toFixed(3));
+                cells[level] = 'others';
+                var cols = createRow(table, 'td', cells);
+                for (i = 0; i < level; i++) {
+                    cols[i].className = 'muted';
+                }
+
+                var cell = cols[level];
+                var tr = cell.parentNode;
+                var onclick = function() {
+                    tr.classList.add("hidden");
+                    cell.classList.remove("link");
+                    cell.removeEventListener("click", onclick);
+                    othersRows.forEach(function(x) {
+                        x.classList.remove("hidden");
+                    });
+                };
+
+                cell.title = othersList.join(", ");
+                cell.classList.add("link");
+                cell.addEventListener("click", onclick);
+
+                if (hide) {
+                    tr.classList.add("hidden");
+                }
+
+                childrenRows.push(tr);
+            }
+
+            return childrenRows;
+        };
+
+        addLevel(0, []);
+
+        popup(table);
+    });
+}
+
diff --git a/javascript/ui_settings_hints.js b/javascript/ui_settings_hints.js
index e216852b..d088f949 100644
--- a/javascript/ui_settings_hints.js
+++ b/javascript/ui_settings_hints.js
@@ -42,7 +42,7 @@ onOptionsChanged(function() {
 function settingsHintsShowQuicksettings() {
     requestGet("./internal/quicksettings-hint", {}, function(data) {
         var table = document.createElement('table');
-        table.className = 'settings-value-table';
+        table.className = 'popup-table';
 
         data.forEach(function(obj) {
             var tr = document.createElement('tr');
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 0974056d..de905caa 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -62,7 +62,7 @@ parser.add_argument("--opt-split-attention-invokeai", action='store_true', help=
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="prefer older version of split attention optimization for automatic choice of optimization")
 parser.add_argument("--opt-sdp-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization for automatic choice of optimization; requires PyTorch 2.*")
 parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization without memory efficient attention for automatic choice of optimization, makes image generation deterministic; requires PyTorch 2.*")
-parser.add_argument("--disable-opt-split-attention", action='store_true', help="does not do anything")
+parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization")
 parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI")
 parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
 parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests")
diff --git a/modules/images.py b/modules/images.py
index 30e9ffc5..a12d252b 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -503,10 +503,6 @@ def save_image_with_geninfo(image, geninfo, filename, extension=None, existing_p
 
     image_format = Image.registered_extensions()[extension]
 
-    existing_pnginfo = existing_pnginfo or {}
-    if opts.enable_pnginfo:
-        existing_pnginfo['parameters'] = geninfo
-
     if extension.lower() == '.png':
         if opts.enable_pnginfo:
             pnginfo_data = PngImagePlugin.PngInfo()
diff --git a/modules/launch_utils.py b/modules/launch_utils.py
index 6e9bb770..0c8c4db0 100644
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@@ -68,7 +68,13 @@ def git_tag():
     try:
         return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip()
     except Exception:
-        return "<none>"
+        try:
+            from pathlib import Path
+            changelog_md = Path(__file__).parent.parent / "CHANGELOG.md"
+            with changelog_md.open(encoding="utf-8") as file:
+                return next((line.strip() for line in file if line.strip()), "<none>")
+        except Exception:
+            return "<none>"
 
 
 def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str:
diff --git a/modules/processing.py b/modules/processing.py
index baa9b278..362ab4c2 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -595,8 +595,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
     try:
         # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint
-        override_checkpoint = p.override_settings.get('sd_model_checkpoint')
-        if override_checkpoint is not None and sd_models.checkpoint_alisases.get(override_checkpoint) is None:
+        if sd_models.checkpoint_alisases.get(p.override_settings.get('sd_model_checkpoint')) is None:
             p.override_settings.pop('sd_model_checkpoint', None)
             sd_models.reload_model_weights()
 
diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py
index ec1469d0..f755283c 100644
--- a/modules/script_callbacks.py
+++ b/modules/script_callbacks.py
@@ -1,11 +1,12 @@
 import inspect
+import os
 from collections import namedtuple
 from typing import Optional, Dict, Any
 
 from fastapi import FastAPI
 from gradio import Blocks
 
-from modules import errors
+from modules import errors, timer
 
 
 def report_exception(c, job):
@@ -123,6 +124,7 @@ def app_started_callback(demo: Optional[Blocks], app: FastAPI):
     for c in callback_map['callbacks_app_started']:
         try:
             c.callback(demo, app)
+            timer.startup_timer.record(os.path.basename(c.script))
         except Exception:
             report_exception(c, 'app_started_callback')
 
diff --git a/modules/scripts.py b/modules/scripts.py
index b901862d..99bf836a 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -5,7 +5,7 @@ from collections import namedtuple
 
 import gradio as gr
 
-from modules import shared, paths, script_callbacks, extensions, script_loading, scripts_postprocessing, errors
+from modules import shared, paths, script_callbacks, extensions, script_loading, scripts_postprocessing, errors, timer
 
 AlwaysVisible = object()
 
@@ -280,6 +280,7 @@ def load_scripts():
         finally:
             sys.path = syspath
             current_basedir = paths.script_path
+            timer.startup_timer.record(scriptfile.filename)
 
     global scripts_txt2img, scripts_img2img, scripts_postproc
 
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 487dfd60..3b6f95ce 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -68,6 +68,8 @@ def apply_optimizations(option=None):
 
     if selection == "None":
         matching_optimizer = None
+    elif selection == "Automatic" and shared.cmd_opts.disable_opt_split_attention:
+        matching_optimizer = None
     elif matching_optimizer is None:
         matching_optimizer = optimizers[0]
 
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 5f0ff513..b41aa419 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -57,7 +57,7 @@ class SdOptimizationSdpNoMem(SdOptimization):
     name = "sdp-no-mem"
     label = "scaled dot product without memory efficient attention"
     cmd_opt = "opt_sdp_no_mem_attention"
-    priority = 90
+    priority = 80
 
     def is_available(self):
         return hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(torch.nn.functional.scaled_dot_product_attention)
@@ -71,7 +71,7 @@ class SdOptimizationSdp(SdOptimizationSdpNoMem):
     name = "sdp"
     label = "scaled dot product"
     cmd_opt = "opt_sdp_attention"
-    priority = 80
+    priority = 70
 
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward
@@ -114,7 +114,7 @@ class SdOptimizationInvokeAI(SdOptimization):
 class SdOptimizationDoggettx(SdOptimization):
     name = "Doggettx"
     cmd_opt = "opt_split_attention"
-    priority = 20
+    priority = 90
 
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 232eb9c4..918f6fd6 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -314,8 +314,6 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
         timer.record("apply half()")
 
-    devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16
-    devices.dtype_vae = torch.float32 if shared.cmd_opts.no_half or shared.cmd_opts.no_half_vae else torch.float16
     devices.dtype_unet = model.model.diffusion_model.dtype
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
diff --git a/modules/shared.py b/modules/shared.py
index 3c7ae654..7025a754 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -6,6 +6,7 @@ import threading
 import time
 
 import gradio as gr
+import torch
 import tqdm
 
 import modules.interrogate
@@ -63,6 +64,9 @@ cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_op
 devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_esrgan, devices.device_codeformer = \
     (devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'esrgan', 'codeformer'])
 
+devices.dtype = torch.float32 if cmd_opts.no_half else torch.float16
+devices.dtype_vae = torch.float32 if cmd_opts.no_half or cmd_opts.no_half_vae else torch.float16
+
 device = devices.device
 weight_load_location = None if cmd_opts.lowram else "cpu"
 
@@ -474,7 +478,7 @@ options_templates.update(options_section(('ui', "User interface"), {
     "quicksettings_list": OptionInfo(["sd_model_checkpoint"], "Quicksettings list", ui_components.DropdownMulti, lambda: {"choices": list(opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that appear at the top of page rather than in settings tab").needs_restart(),
     "ui_tab_order": OptionInfo([], "UI tab order", ui_components.DropdownMulti, lambda: {"choices": list(tab_names)}).needs_restart(),
     "hidden_tabs": OptionInfo([], "Hidden UI tabs", ui_components.DropdownMulti, lambda: {"choices": list(tab_names)}).needs_restart(),
-    "ui_reorder": OptionInfo([], "txt2img/img2img UI item order", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_restart(),
+    "ui_reorder_list": OptionInfo([], "txt2img/img2img UI item order", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_restart(),
     "hires_fix_show_sampler": OptionInfo(False, "Hires fix: show hires sampler selection").needs_restart(),
     "hires_fix_show_prompts": OptionInfo(False, "Hires fix: show hires prompt and negative prompt").needs_restart(),
     "disable_token_counters": OptionInfo(False, "Disable prompt token counters").needs_restart(),
@@ -626,8 +630,8 @@ class Options:
             self.data['quicksettings_list'] = [i.strip() for i in self.data.get('quicksettings').split(',')]
 
         # 1.4.0 ui_reorder
-        if isinstance(self.data.get('ui_reorder'), str):
-            self.data['ui_reorder'] = [i.strip() for i in self.data.get('ui_reorder').split(',')]
+        if isinstance(self.data.get('ui_reorder'), str) and self.data.get('ui_reorder') and "ui_reorder_list" not in self.data:
+            self.data['ui_reorder_list'] = [i.strip() for i in self.data.get('ui_reorder').split(',')]
 
         bad_settings = 0
         for k, v in self.data.items():
diff --git a/modules/timer.py b/modules/timer.py
index ba92be33..da99e49f 100644
--- a/modules/timer.py
+++ b/modules/timer.py
@@ -1,11 +1,30 @@
 import time
 
 
+class TimerSubcategory:
+    def __init__(self, timer, category):
+        self.timer = timer
+        self.category = category
+        self.start = None
+        self.original_base_category = timer.base_category
+
+    def __enter__(self):
+        self.start = time.time()
+        self.timer.base_category = self.original_base_category + self.category + "/"
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        elapsed_for_subcategroy = time.time() - self.start
+        self.timer.base_category = self.original_base_category
+        self.timer.add_time_to_record(self.original_base_category + self.category, elapsed_for_subcategroy)
+        self.timer.record(self.category)
+
+
 class Timer:
     def __init__(self):
         self.start = time.time()
         self.records = {}
         self.total = 0
+        self.base_category = ''
 
     def elapsed(self):
         end = time.time()
@@ -13,18 +32,29 @@ class Timer:
         self.start = end
         return res
 
-    def record(self, category, extra_time=0):
-        e = self.elapsed()
+    def add_time_to_record(self, category, amount):
         if category not in self.records:
             self.records[category] = 0
 
-        self.records[category] += e + extra_time
+        self.records[category] += amount
+
+    def record(self, category, extra_time=0):
+        e = self.elapsed()
+
+        self.add_time_to_record(self.base_category + category, e + extra_time)
+
         self.total += e + extra_time
 
+    def subcategory(self, name):
+        self.elapsed()
+
+        subcat = TimerSubcategory(self, name)
+        return subcat
+
     def summary(self):
         res = f"{self.total:.1f}s"
 
-        additions = [x for x in self.records.items() if x[1] >= 0.1]
+        additions = [(category, time_taken) for category, time_taken in self.records.items() if time_taken >= 0.1 and '/' not in category]
         if not additions:
             return res
 
@@ -34,5 +64,13 @@ class Timer:
 
         return res
 
+    def dump(self):
+        return {'total': self.total, 'records': self.records}
+
     def reset(self):
         self.__init__()
+
+
+startup_timer = Timer()
+
+startup_record = None
diff --git a/modules/ui.py b/modules/ui.py
index 4e0cf776..b7459f08 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -11,7 +11,7 @@ import numpy as np
 from PIL import Image, PngImagePlugin  # noqa: F401
 from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call
 
-from modules import sd_hijack, sd_models, script_callbacks, ui_extensions, deepbooru, sd_vae, extra_networks, ui_common, ui_postprocessing, progress, ui_loadsave, errors, shared_items, ui_settings
+from modules import sd_hijack, sd_models, script_callbacks, ui_extensions, deepbooru, sd_vae, extra_networks, ui_common, ui_postprocessing, progress, ui_loadsave, errors, shared_items, ui_settings, timer
 from modules.ui_components import FormRow, FormGroup, ToolButton, FormHTML
 from modules.paths import script_path
 from modules.ui_common import create_refresh_button
@@ -388,7 +388,7 @@ def create_sampler_and_steps_selection(choices, tabname):
 
 
 def ordered_ui_categories():
-    user_order = {x.strip(): i * 2 + 1 for i, x in enumerate(shared.opts.ui_reorder)}
+    user_order = {x.strip(): i * 2 + 1 for i, x in enumerate(shared.opts.ui_reorder_list)}
 
     for _, category in sorted(enumerate(shared_items.ui_reorder_categories()), key=lambda x: user_order.get(x[1], x[0] * 2 + 0)):
         yield category
@@ -1595,3 +1595,5 @@ def setup_ui_api(app):
     app.add_api_route("/internal/quicksettings-hint", quicksettings_hint, methods=["GET"], response_model=List[QuicksettingsHint])
 
     app.add_api_route("/internal/ping", lambda: {}, methods=["GET"])
+
+    app.add_api_route("/internal/profile-startup", lambda: timer.startup_record, methods=["GET"])
diff --git a/style.css b/style.css
index 571f4cf4..34b85b80 100644
--- a/style.css
+++ b/style.css
@@ -403,19 +403,29 @@ div#extras_scale_to_tab div.form{
     margin: 0 1.2em;
 }
 
-table.settings-value-table{
+table.popup-table{
     background: white;
     border-collapse: collapse;
     margin: 1em;
     border: 4px solid white;
 }
 
-table.settings-value-table td{
+table.popup-table td{
     padding: 0.4em;
     border: 1px solid #ccc;
     max-width: 36em;
 }
 
+table.popup-table .muted{
+    color: #aaa;
+}
+
+table.popup-table .link{
+    text-decoration: underline;
+    cursor: pointer;
+    font-weight: bold;
+}
+
 .ui-defaults-none{
     color: #aaa !important;
 }
diff --git a/webui.py b/webui.py
index 3df2cd1a..828259b8 100644
--- a/webui.py
+++ b/webui.py
@@ -22,7 +22,7 @@ logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not
 
 from modules import paths, timer, import_hook, errors, devices  # noqa: F401
 
-startup_timer = timer.Timer()
+startup_timer = timer.startup_timer
 
 import torch
 import pytorch_lightning   # noqa: F401 # pytorch_lightning should be imported after torch, but it re-enables warnings on import so import once to disable them
@@ -272,8 +272,8 @@ def initialize_rest(*, reload_script_modules=False):
 
     localization.list_localizations(cmd_opts.localizations_dir)
 
-    modules.scripts.load_scripts()
-    startup_timer.record("load scripts")
+    with startup_timer.subcategory("load scripts"):
+        modules.scripts.load_scripts()
 
     if reload_script_modules:
         for module in [module for name, module in sys.modules.items() if name.startswith("modules.ui")]:
@@ -428,9 +428,12 @@ def webui():
 
         ui_extra_networks.add_pages_to_demo(app)
 
-        modules.script_callbacks.app_started_callback(shared.demo, app)
-        startup_timer.record("scripts app_started_callback")
+        startup_timer.record("add APIs")
+
+        with startup_timer.subcategory("app_started_callback"):
+            modules.script_callbacks.app_started_callback(shared.demo, app)
 
+        timer.startup_record = startup_timer.dump()
         print(f"Startup time: {startup_timer.summary()}.")
 
         if cmd_opts.subpath:
@@ -455,6 +458,7 @@ def webui():
             # If we catch a keyboard interrupt, we want to stop the server and exit.
             shared.demo.close()
             break
+
         print('Restarting UI...')
         shared.demo.close()
         time.sleep(0.5)