Removed unneccessary overhead

author: Leonard Kugis <leonard@kug.is> 2025-12-23 00:08:47 +0100
committer: Leonard Kugis <leonard@kug.is> 2025-12-23 00:08:47 +0100
commit: ec7598f568ff59ecc1eb51572f84d866b0180501 (patch)
tree: 775944e30a140cc20857a316397d9538e9d1eff6
parent: 78f4448a21614ed01b7c4e60eb496889bc58076d (diff)
download: xembu-ec7598f568ff59ecc1eb51572f84d866b0180501.tar.gz
5 files changed, 129 insertions, 229 deletions
diff --git a/.gitignore b/.gitignore
index 9f4c102..b82580a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+work
+out.pdf
 # Created by https://www.toptal.com/developers/gitignore/api/python,vim,linux,windows,macos
 # Edit at https://www.toptal.com/developers/gitignore?templates=python,vim,linux,windows,macos
 
diff --git a/modules/base.py b/modules/base.py
index 7d04644..6b32562 100644
--- a/modules/base.py
+++ b/modules/base.py
@@ -15,9 +15,6 @@ class Frame:
 
 @dataclass
 class BigFrame:
-    """
-    Nimmt eine halbe PDF-Seite ein (Renderer packt 2 BigFrames pro Seite).
-    """
     title: str
 
     def render(self, ax: Axes, mono_font: FontProperties) -> None:
@@ -25,10 +22,10 @@ class BigFrame:
 
 @dataclass
 class ModuleResult:
-    summary_text: str               # NEU: wird im Hauptprogramm in die Konsole gedruckt
-    frames: List[Frame]             # Kacheln (optional)
+    summary_text: str
+    frames: List[Frame]
     bigframes: List[BigFrame]
-    pages: List[plt.Figure]         # Vollseiten (optional)
+    pages: List[plt.Figure]
 
 
 class Module(Protocol):
diff --git a/modules/general.py b/modules/general.py
index f7eca38..3ad587a 100644
--- a/modules/general.py
+++ b/modules/general.py
@@ -12,57 +12,42 @@ from typing import Optional
 
 from .base import Frame, BigFrame, ModuleResult
 
-
 MONEY_UNITS = {"€", "eur", "EUR", "euro", "EURO"}
 
-
 def _is_money_unit(u: str) -> bool:
     return str(u).strip() in MONEY_UNITS
 
-
 def compute_group_distribution(df: pd.DataFrame):
-    """
-    Liefert:
-      group_summary: dict group -> info
-      per_person: DataFrame columns [person, contributed, share, balance]
-      per_group_person: DataFrame detail columns [group, person, contributed, usage, share, balance]
-    """
-    # Explode Gruppen
     work = df.copy()
     work = work.explode("dist_groups")
     work["group"] = work["dist_groups"].fillna("").astype(str).str.strip()
     work = work[work["group"] != ""]
 
-    # C/U Normalisierung
-    work["flag"] = work["Distributionsflag"].astype(str).str.strip().str.upper()
-    work["person"] = work["Nutzer"].astype(str).str.strip()
+    work["flag"] = work["group_flag"].astype(str).str.strip().str.upper()
+    work["debitor"] = work["debitor"].astype(str).str.strip()
 
-    # Contributions (Geld)
     contrib = work[work["flag"] == "C"].copy()
     if len(contrib) > 0:
         bad_units = contrib[~contrib["unit"].apply(_is_money_unit)]
         if len(bad_units) > 0:
             raise ValueError(
                 "Contribution (C) muss Geld-Einheit haben (z.B. € / EUR). "
-                f"Problemzeilen:\n{bad_units[['Datum','Nutzer','group','Positionsbezeichnung','Positionswert','unit']]}"
+                f"Problemzeilen:\n{bad_units[['date','debitor','group','position','val','unit']]}"
             )
 
-    # Usage (Beliebige Einheit, pro Gruppe sollte es sinnvoll einheitlich sein)
     usage = work[work["flag"] == "U"].copy()
 
-    # Summen
-    contrib_by_gp = contrib.groupby(["group", "person"])["value"].sum().rename("contributed").reset_index()
-    contrib_tot = contrib.groupby("group")["value"].sum().rename("total_contrib").reset_index()
+    contrib_by_gp = contrib.groupby(["group", "debitor"])["val"].sum().rename("contributed").reset_index()
+    contrib_tot = contrib.groupby("group")["val"].sum().rename("total_contrib").reset_index()
 
-    usage_by_gp = usage.groupby(["group", "person"])["value"].sum().rename("usage").reset_index()
-    usage_tot = usage.groupby("group")["value"].sum().rename("total_usage").reset_index()
+    usage_by_gp = usage.groupby(["group", "debitor"])["val"].sum().rename("usage").reset_index()
+    usage_tot = usage.groupby("group")["val"].sum().rename("total_usage").reset_index()
     usage_unit = usage.groupby("group")["unit"].agg(lambda s: s.dropna().astype(str).unique().tolist()).reset_index()
     usage_unit = usage_unit.rename(columns={"unit": "usage_units"})
 
-    participants = work.groupby("group")["person"].agg(lambda s: sorted(set(s.tolist()))).reset_index()
-    participants = participants.rename(columns={"person": "participants"})
+    participants = work.groupby("group")["debitor"].agg(lambda s: sorted(set(s.tolist()))).reset_index()
+    participants = participants.rename(columns={"debitor": "participants"})
 
-    # group_summary
     summary = (
         participants.merge(contrib_tot, on="group", how="left")
         .merge(usage_tot, on="group", how="left")
@@ -73,22 +58,19 @@ def compute_group_distribution(df: pd.DataFrame):
     summary["has_usage"] = summary["total_usage"].apply(lambda x: x > 0)
     summary["mode"] = summary.apply(lambda r: "usage" if r["has_usage"] else "equal", axis=1)
 
-    # Detail pro (group, person)
     detail = (
         pd.DataFrame({"group": work["group"].unique()})
         .assign(key=1)
-        .merge(pd.DataFrame({"person": work["person"].unique()}).assign(key=1), on="key")
+        .merge(pd.DataFrame({"debitor": work["debitor"].unique()}).assign(key=1), on="key")
         .drop(columns=["key"])
     )
-    # Nur relevante Paare, die in der Gruppe vorkommen
-    gp_person = work[["group", "person"]].drop_duplicates()
-    detail = detail.merge(gp_person, on=["group", "person"], how="inner")
+    gp_debitor = work[["group", "debitor"]].drop_duplicates()
+    detail = detail.merge(gp_debitor, on=["group", "debitor"], how="inner")
 
-    detail = detail.merge(contrib_by_gp, on=["group", "person"], how="left").merge(usage_by_gp, on=["group", "person"], how="left")
+    detail = detail.merge(contrib_by_gp, on=["group", "debitor"], how="left").merge(usage_by_gp, on=["group", "debitor"], how="left")
     detail["contributed"] = detail["contributed"].fillna(0.0)
     detail["usage"] = detail["usage"].fillna(0.0)
 
-    # Shares berechnen pro Gruppe
     shares = []
     for _, row in summary.iterrows():
         g = row["group"]
@@ -97,7 +79,6 @@ def compute_group_distribution(df: pd.DataFrame):
         n = len(parts) if parts else 0
 
         g_detail = detail[detail["group"] == g].copy()
-        # usage-mode, sobald es irgendeine U-Position gibt (auch wenn total_usage==0 → fallback)
         g_has_any_u = (usage["group"] == g).any()
 
         if g_has_any_u:
@@ -114,25 +95,22 @@ def compute_group_distribution(df: pd.DataFrame):
             mode = "equal"
 
         g_detail["mode"] = mode
-        shares.append(g_detail[["group", "person", "share", "mode"]])
+        shares.append(g_detail[["group", "debitor", "share", "mode"]])
 
-    shares_df = pd.concat(shares, ignore_index=True) if shares else pd.DataFrame(columns=["group","person","share","mode"])
-    detail = detail.merge(shares_df, on=["group", "person"], how="left")
+    shares_df = pd.concat(shares, ignore_index=True) if shares else pd.DataFrame(columns=["group","debitor","share","mode"])
+    detail = detail.merge(shares_df, on=["group", "debitor"], how="left")
     detail["share"] = detail["share"].fillna(0.0)
     detail["balance"] = detail["contributed"] - detail["share"]
 
-    # per_person totals
-    per_person = detail.groupby("person")[["contributed", "share", "balance"]].sum().reset_index()
-    per_person = per_person.sort_values("person")
+    per_debitor = detail.groupby("debitor")[["contributed", "share", "balance"]].sum().reset_index()
+    per_debitor = per_debitor.sort_values("debitor")
 
-    # summary erweitern
-    # "Sobald es eine Position mit U gibt" zählt, auch wenn total_usage==0 (fallback)
     has_any_u = usage.groupby("group").size().rename("u_count").reset_index()
     summary = summary.merge(has_any_u, on="group", how="left")
     summary["u_count"] = summary["u_count"].fillna(0).astype(int)
     summary["mode"] = summary["u_count"].apply(lambda c: "usage" if c > 0 else "equal")
 
-    return summary, per_person, detail
+    return summary, per_debitor, detail
 
 @dataclass
 class GroupTimeSeries:
@@ -142,14 +120,13 @@ class GroupTimeSeries:
     usage_units: List[str]
     xlim_start: pd.Timestamp
     xlim_end: pd.Timestamp
-    contrib_cum: Dict[str, pd.Series]   # € kumulativ
-    usage_cum: Dict[str, pd.Series]     # unit kumulativ (z.B. km, stk)
-    share_cum: Dict[str, pd.Series]     # € kumulativ (Anteil)
-    ratio: Dict[str, pd.Series]         # Anteil/Ausgelegt
+    contrib_cum: Dict[str, pd.Series]
+    usage_cum: Dict[str, pd.Series]
+    share_cum: Dict[str, pd.Series]
+    ratio: Dict[str, pd.Series]
 
 
 def _auto_time_limits(tmin: pd.Timestamp, tmax: pd.Timestamp) -> tuple[pd.Timestamp, pd.Timestamp]:
-    # +/- 5% Intervall, bei 0 Intervall fallback 30 Minuten
     dt = tmax - tmin
     if dt <= pd.Timedelta(0):
         margin = pd.Timedelta(minutes=30)
@@ -159,28 +136,25 @@ def _auto_time_limits(tmin: pd.Timestamp, tmax: pd.Timestamp) -> tuple[pd.Timest
 
 
 def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTimeSeries]:
-    # explode Gruppen und filtere
     work = df.copy().explode("dist_groups")
     work["group"] = work["dist_groups"].fillna("").astype(str).str.strip()
     work = work[work["group"] == group].copy()
 
-    work = work[pd.notna(work["Datum"])]
+    work = work[pd.notna(work["date"])]
     if work.empty:
         return None
 
-    work["person"] = work["Nutzer"].astype(str).str.strip()
-    work["flag"] = work["Distributionsflag"].astype(str).str.strip().str.upper()
+    work["debitor"] = work["debitor"].astype(str).str.strip()
+    work["flag"] = work["group_flag"].astype(str).str.strip().str.upper()
 
-    participants = sorted(work["person"].unique().tolist())
+    participants = sorted(work["debitor"].unique().tolist())
 
-    # timeline: alle Zeitpunkte der Gruppe (unique, sortiert)
-    times = pd.DatetimeIndex(sorted(work["Datum"].unique()))
+    times = pd.DatetimeIndex(sorted(work["date"].unique()))
     tmin, tmax = times.min(), times.max()
     x0, x1 = _auto_time_limits(tmin, tmax)
 
     times = times.union(pd.DatetimeIndex([x0, x1])).sort_values()
 
-    # usage units (kann leer sein, oder mehrere – wir zeigen dann z.B. "km/stk")
     usage_units = sorted(
         work.loc[work["flag"] == "U", "unit"]
             .dropna()
@@ -190,25 +164,21 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
             .tolist()
     )
 
-    # pro Person: Beiträge (C) und Nutzung (U) als kumulatives step-series auf timeline
     contrib_cum: Dict[str, pd.Series] = {}
     usage_cum: Dict[str, pd.Series] = {}
 
     for p in participants:
-        c = work[(work["person"] == p) & (work["flag"] == "C")].copy()
-        u = work[(work["person"] == p) & (work["flag"] == "U")].copy()
+        c = work[(work["debitor"] == p) & (work["flag"] == "C")].copy()
+        u = work[(work["debitor"] == p) & (work["flag"] == "U")].copy()
 
-        # Beiträge: nach Datum aggregieren, reindex auf timeline, kumulieren
-        c_by_t = c.groupby("Datum")["value"].sum() if not c.empty else pd.Series(dtype=float)
+        c_by_t = c.groupby("date")["val"].sum() if not c.empty else pd.Series(dtype=float)
         c_by_t = c_by_t.reindex(times, fill_value=0.0)
         contrib_cum[p] = c_by_t.cumsum()
 
-        # Nutzung: nach Datum aggregieren, reindex auf timeline, kumulieren
-        u_by_t = u.groupby("Datum")["value"].sum() if not u.empty else pd.Series(dtype=float)
+        u_by_t = u.groupby("date")["val"].sum() if not u.empty else pd.Series(dtype=float)
         u_by_t = u_by_t.reindex(times, fill_value=0.0)
         usage_cum[p] = u_by_t.cumsum()
 
-    # share über Zeit: kumulative total contributions verteilt
     total_contrib = sum((contrib_cum[p] for p in participants), start=pd.Series(0.0, index=times))
     total_usage = sum((usage_cum[p] for p in participants), start=pd.Series(0.0, index=times))
 
@@ -217,9 +187,7 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
 
     share_cum: Dict[str, pd.Series] = {}
     if has_any_u:
-        # usage-mode sobald U existiert; solange total_usage==0 => equal fallback
         for p in participants:
-            # share = total_contrib * usage_p / total_usage, sonst total_contrib/n
             usage_p = usage_cum[p]
             with np.errstate(divide="ignore", invalid="ignore"):
                 share_usage = total_contrib * (usage_p / total_usage.replace(0.0, np.nan))
@@ -227,7 +195,6 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
             share = share_usage.where(total_usage > 0, share_equal)
             share_cum[p] = share.fillna(0.0)
     else:
-        # equal-mode immer
         equal = total_contrib / float(n)
         for p in participants:
             share_cum[p] = equal
@@ -254,13 +221,6 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
 
 @dataclass
 class GroupChartBigFrame(BigFrame):
-    """
-    kind:
-      - 'usage_cum'
-      - 'contrib_cum'
-      - 'share_cum'
-      - 'ratio'
-    """
     gts: GroupTimeSeries
     kind: str
 
@@ -271,48 +231,45 @@ class GroupChartBigFrame(BigFrame):
         formatter = mdates.ConciseDateFormatter(locator)
         ax.xaxis.set_major_locator(locator)
         ax.xaxis.set_major_formatter(formatter)
-        ax.xaxis.get_offset_text().set_visible(False)  # <-- "2025-Dec" weg
+        ax.xaxis.get_offset_text().set_visible(False)
 
         ax.set_xlim(self.gts.xlim_start, self.gts.xlim_end)
 
         if self.kind == "usage_cum":
             series_map = self.gts.usage_cum
             unit = "/".join(self.gts.usage_units) if self.gts.usage_units else ""
-            ax.set_ylabel(f"Verbrauch kumulativ {unit}".strip(), fontproperties=mono_font)
+            ax.set_ylabel(f"Usage cumulative {unit}".strip(), fontproperties=mono_font)
 
         elif self.kind == "contrib_cum":
             series_map = self.gts.contrib_cum
-            ax.set_ylabel("Contributions kumulativ €", fontproperties=mono_font)
+            ax.set_ylabel("Contribution cumulative €", fontproperties=mono_font)
 
         elif self.kind == "share_cum":
             series_map = self.gts.share_cum
-            ax.set_ylabel("Anteil kumulativ €", fontproperties=mono_font)
+            ax.set_ylabel("Share cumulative €", fontproperties=mono_font)
 
         elif self.kind == "ratio":
             series_map = self.gts.ratio
-            ax.set_ylabel("Anteil / Ausgelegt", fontproperties=mono_font)
-            ax.set_yscale("log")  # <-- LOG
+            ax.set_ylabel("Share / Contribution ratio (logarithmic)", fontproperties=mono_font)
+            ax.set_yscale("log")
 
         else:
             raise ValueError(f"Unknown kind: {self.kind}")
 
-        # Plot + Sammeln für robuste y-Limits
         all_vals = []
 
-        min_ratio = 1e-3  # „quasi 0“ für log, damit Kurven am Anfang nicht "mittendrin" starten
+        min_ratio = 1e-3
 
         for p in self.gts.participants:
             y = series_map[p].copy()
 
             if self.kind == "ratio":
-                # NaN/0/Inf behandeln, damit die Kurve von Anfang an existiert
                 y = y.replace([np.inf, -np.inf], np.nan)
                 y = y.fillna(min_ratio)
                 y = y.clip(lower=min_ratio)
             else:
                 y = y.replace([np.inf, -np.inf], np.nan).fillna(0.0)
 
-            # Steps für kumulative Kurven ist meist sauberer
             ax.plot(self.gts.times, y.values, label=p, linewidth=1, drawstyle="steps-post")
 
             v = y.values
@@ -320,7 +277,6 @@ class GroupChartBigFrame(BigFrame):
             if v.size:
                 all_vals.append(v)
 
-        # y-Limits so setzen, dass wirklich ALLE Werte sichtbar sind
         if all_vals:
             vv = np.concatenate(all_vals)
 
@@ -329,14 +285,14 @@ class GroupChartBigFrame(BigFrame):
                 if vmax <= 0:
                     ax.set_ylim(0, 1)
                 else:
-                    ax.set_ylim(0, vmax * 1.08)  # kleiner Puffer
+                    ax.set_ylim(0, vmax * 1.08)
 
             elif self.kind == "ratio":
                 vpos = vv[vv > 0]
                 if vpos.size:
                     vmin = float(np.nanmin(vpos))
                     vmax = float(np.nanmax(vpos))
-                    ax.set_ylim(vmin / 1.5, vmax * 1.5)  # log: multiplicative padding
+                    ax.set_ylim(vmin / 1.5, vmax * 1.5)
 
         ax.grid(True, alpha=0.2)
 
@@ -345,7 +301,6 @@ class GroupChartBigFrame(BigFrame):
             for t in leg.get_texts():
                 t.set_fontproperties(mono_font)
 
-        # Tick-Fonts monospace
         for tick in ax.get_xticklabels() + ax.get_yticklabels():
             tick.set_fontproperties(mono_font)
 
@@ -358,12 +313,11 @@ class TextFrame(Frame):
 
 @dataclass
 class PlotBigFrame(BigFrame):
-    per_person: pd.DataFrame  # erwartet Spalten: person, contributed, share
+    per_debitor: pd.DataFrame
 
     def render(self, ax: Axes, mono_font: FontProperties) -> None:
-        # Axes ist schon da, wir zeichnen direkt hinein
         ax.axis("on")
-        plot_df = self.per_person.set_index("person")[["contributed", "share"]]
+        plot_df = self.per_debitor.set_index("debitor")[["contributed", "share"]]
         plot_df.plot.bar(ax=ax)
         ax.tick_params(axis="x", rotation=0)
         leg = ax.legend(prop=mono_font)
@@ -386,38 +340,37 @@ class GeneralModule:
 
         mono_font = context.get("mono_font") or FontProperties(family="DejaVu Sans Mono", size=8)
 
-        group_summary, per_person, detail = compute_group_distribution(df)
+        group_summary, per_debitor, detail = compute_group_distribution(df)
 
-        balance = {r["person"]: float(r["balance"]) for _, r in per_person.iterrows()}
+        balance = {r["debitor"]: float(r["balance"]) for _, r in per_debitor.iterrows()}
         payments = self._minimize_payments(balance)
 
-        # ---- NEU: Textauswertung für Konsole
         summary_lines = []
-        summary_lines.append("General – Verteilung über Distributionsgruppen")
+        summary_lines.append("General")
         summary_lines.append("")
-        summary_lines.append("Gruppen:")
+        summary_lines.append("Goups:")
         for _, r in group_summary.sort_values("group").iterrows():
             g = r["group"]
             total_c = float(r.get("total_contrib", 0.0))
             u_count = int(r.get("u_count", 0))
             mode = "usage" if u_count > 0 else "equal"
             participants = r.get("participants", []) or []
-            summary_lines.append(f"  - {g}: {total_c:.2f} €; mode={mode}; teilnehmer={len(participants)}")
+            summary_lines.append(f"  - {g}: {total_c:.2f} €; mode={mode}; participants={len(participants)}")
 
         summary_lines.append("")
-        summary_lines.append("Personen (Summe über alle Gruppen):")
-        for _, r in per_person.sort_values("person").iterrows():
+        summary_lines.append("Debitors (total):")
+        for _, r in per_debitor.sort_values("debitor").iterrows():
             summary_lines.append(
-                f"  - {r['person']}: ausgelegt={r['contributed']:.2f} €; anteil={r['share']:.2f} €; saldo={r['balance']:.2f} €"
+                f"  - {r['debitor']}: contributed={r['contributed']:.2f} €; share={r['share']:.2f} €; balance={r['balance']:.2f} €"
             )
 
         summary_lines.append("")
-        summary_lines.append("Ausgleich (minimiert):")
+        summary_lines.append("Compensation (minimized):")
         if payments:
             for p, r, a in payments:
                 summary_lines.append(f"  - {p} → {r}: {a:.2f} €")
         else:
-            summary_lines.append("  (keine Zahlungen nötig)")
+            summary_lines.append("  (No compensation required)")
 
         summary_text = "\n".join(summary_lines)
 
@@ -426,45 +379,42 @@ class GeneralModule:
         pages: List[plt.Figure] = []
 
         if want_pdf:
-            frames.extend(self._make_frames(group_summary, per_person, payments))
+            frames.extend(self._make_frames(group_summary, per_debitor, payments))
 
-            # BigFrame: Gesamt-Balkenplot bleibt (wie vorher)
             bigframes.append(
                 PlotBigFrame(
-                    title="General – Ausgelegt vs Anteil (Summe über Gruppen)",
-                    per_person=per_person.copy(),
+                    title="General – Shares vs. Contributions (total)",
+                    per_debitor=per_debitor.copy(),
                 )
             )
 
-            # NEU: pro Distributionsgruppe 4 BigFrame-Charts
             for g in sorted(group_summary["group"].unique().tolist()):
                 gts = _prepare_group_timeseries(df, g)
                 if not gts:
                     continue
 
                 bigframes.append(GroupChartBigFrame(
-                    title=f"{g} – Kumulativer Verbrauch pro Person",
+                    title=f"{g} – Cumulative usage per debitor",
                     gts=gts,
                     kind="usage_cum",
                 ))
                 bigframes.append(GroupChartBigFrame(
-                    title=f"{g} – Kumulative Contributions pro Person",
+                    title=f"{g} – Cumulative contributions per debitor",
                     gts=gts,
                     kind="contrib_cum",
                 ))
                 bigframes.append(GroupChartBigFrame(
-                    title=f"{g} – Anteil pro Person (zeitlicher Verlauf)",
+                    title=f"{g} – Share per debitor",
                     gts=gts,
                     kind="share_cum",
                 ))
                 bigframes.append(GroupChartBigFrame(
-                    title=f"{g} – Verhältnis Anteil/Ausgelegt (zeitlicher Verlauf)",
+                    title=f"{g} – Share / Contribution ratio (logarithmic)",
                     gts=gts,
                     kind="ratio",
                 ))
 
-            # Pages: nur noch Detailseiten, keine Balkenplot-Seite mehr
-            pages.extend(self._make_pages(group_summary, per_person, detail, mono_font))
+            pages.extend(self._make_pages(group_summary, per_debitor, detail, mono_font))
 
         return ModuleResult(summary_text=summary_text, frames=frames, bigframes=bigframes, pages=pages)
 
@@ -493,57 +443,52 @@ class GeneralModule:
                 j += 1
         return out
 
-    def _make_frames(self, group_summary: pd.DataFrame, per_person: pd.DataFrame, payments: List[Tuple[str,str,float]]) -> List[Frame]:
-        # Frame 1: Gruppen-Übersicht
-        lines = ["Gruppenübersicht:"]
+    def _make_frames(self, group_summary: pd.DataFrame, per_debitor: pd.DataFrame, payments: List[Tuple[str,str,float]]) -> List[Frame]:
+        lines = ["Groups:"]
         for _, r in group_summary.sort_values("group").iterrows():
             g = r["group"]
             total_c = float(r.get("total_contrib", 0.0))
             u_count = int(r.get("u_count", 0))
             parts = r.get("participants", [])
             mode = "usage" if u_count > 0 else "equal"
-            lines.append(f"- {g}: {total_c:.2f} €; mode={mode}; teilnehmer={len(parts)}")
+            lines.append(f"- {g}: {total_c:.2f} €; mode={mode}; participants={len(parts)}")
 
-        f1 = TextFrame(title="General: Gruppen", text="\n".join(lines))
+        f1 = TextFrame(title="General: Groups", text="\n".join(lines))
 
-        # Frame 2: Personen-Totale
-        lines = ["Personen (Summe über alle Gruppen):", "Person | contributed | share | balance"]
-        for _, r in per_person.iterrows():
-            lines.append(f"{r['person']}: {r['contributed']:.2f} €; {r['share']:.2f} €; {r['balance']:.2f} €")
-        f2 = TextFrame(title="General: Personen", text="\n".join(lines))
+        lines = ["Debitor total:", "debitor | contributed | share | balance"]
+        for _, r in per_debitor.iterrows():
+            lines.append(f"{r['debitor']} | {r['contributed']:.2f} € | {r['share']:.2f} € | {r['balance']:.2f} €")
+        f2 = TextFrame(title="General: Debitors", text="\n".join(lines))
 
-        # Frame 3: Ausgleich
-        lines = ["Ausgleich (minimiert):"]
+        lines = ["Compensation (minimized):"]
         if payments:
             for p, r, a in payments:
                 lines.append(f"{p} → {r}: {a:.2f} €")
         else:
-            lines.append("(keine Zahlungen nötig)")
-        f3 = TextFrame(title="General: Ausgleich", text="\n".join(lines))
+            lines.append("(No compensation required)")
+        f3 = TextFrame(title="General: Compensation", text="\n".join(lines))
 
         return [f1, f2, f3]
 
-    def _make_pages(self, group_summary, per_person, detail, mono_font) -> List[plt.Figure]:
+    def _make_pages(self, group_summary, per_debitor, detail, mono_font) -> List[plt.Figure]:
         pages: List[plt.Figure] = []
 
-        # Textseiten: pro Gruppe Detail (ggf. mehrere)
-        # Wir machen je Gruppe eine Seite, wenn es nicht zu viele sind
         for g in sorted(detail["group"].unique().tolist()):
-            gdet = detail[detail["group"] == g].sort_values("person")
+            gdet = detail[detail["group"] == g].sort_values("debitor")
             total_c = float(group_summary[group_summary["group"] == g]["total_contrib"].iloc[0]) if (group_summary["group"] == g).any() else 0.0
             u_count = int(group_summary[group_summary["group"] == g]["u_count"].iloc[0]) if (group_summary["group"] == g).any() else 0
             mode = "usage" if u_count > 0 else "equal"
 
             lines = [
-                f"Gruppe: {g}",
+                f"Group: {g}",
                 f"Total Contribution: {total_c:.2f} €",
                 f"Mode: {mode}",
                 "",
-                "Person | contributed | usage | share | balance",
+                "debitor | contributed | usage | share | balance",
             ]
             for _, r in gdet.iterrows():
                 lines.append(
-                    f"{r['person']}: {r['contributed']:.2f} €; {r['usage']:.4f}; {r['share']:.2f} €; {r['balance']:.2f} €"
+                    f"{r['debitor']} | {r['contributed']:.2f} € | {r['usage']:.4f} | {r['share']:.2f} € | {r['balance']:.2f} €"
                 )
 
             fig, ax = plt.subplots(figsize=(8.27, 11.69))
@@ -551,10 +496,5 @@ class GeneralModule:
             ax.text(0, 1, "\n".join(lines), va="top", ha="left", fontproperties=mono_font)
             pages.append(fig)
 
-        # Optional: Nutzungsverläufe für Gruppen mit unit "km"
-        # (nur wenn U vorhanden und unit in den U-rows km ist)
-        # Dafür brauchen wir zeitliche Daten → aus detail nicht möglich, also direkt aus df wäre besser.
-        # Wenn du willst, ergänze ich das als eigene Seite pro km-Gruppe auf Basis der Original-DF.
-
         return pages
 
diff --git a/xembu.py b/xembu.py
index 0ae60e6..02f1132 100644
--- a/xembu.py
+++ b/xembu.py
@@ -16,18 +16,17 @@ from modules.general import GeneralModule
 from datetime import datetime
 
 CSV_COLUMNS = [
-    "Datum",
-    "Nutzer",
-    "Distributionsgruppe",
-    "Distributionsflag",
-    "Positionsbezeichnung",
-    "Positionswert",
-    "Modules",
-    "Parameters",
-    "Beleg",
+    "date",
+    "debitor",
+    "group",
+    "group_flag",
+    "position",
+    "value",
+    "modules",
+    "parameters",
+    "receipt",
 ]
 
-
 def _pick_mono_font(size: int = 8) -> font_manager.FontProperties:
     for fam in ["Inconsolata", "DejaVu Sans Mono", "monospace"]:
         try:
@@ -37,13 +36,11 @@ def _pick_mono_font(size: int = 8) -> font_manager.FontProperties:
     return font_manager.FontProperties(size=size)
 
 def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, total_pages: int):
-    # Margins: links/rechts 2cm, oben/unten 1cm
     margin_lr_cm = 2.0
     margin_tb_cm = 1.0
 
-    # Zusätzlicher Abstand (Bänder) zwischen Header/Footer und Content
-    header_gap_cm = 1.3   # mehr Abstand nach unten
-    footer_gap_cm = 2.0   # mehr Abstand nach oben (2-zeiliger Footer)
+    header_gap_cm = 1.3
+    footer_gap_cm = 2.0
 
     cm_to_in = 1 / 2.54
     margin_lr_in = margin_lr_cm * cm_to_in
@@ -58,27 +55,22 @@ def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, t
     header_gap = header_gap_in / h_in
     footer_gap = footer_gap_in / h_in
 
-    # Content-Bereich: innerhalb der Margins + zusätzlich Platz für Header/Footer
     top = 1 - my - header_gap
     bottom = my + footer_gap
     if top <= bottom:
-        # Fallback, falls es zu eng wird
         top = 1 - my
         bottom = my
 
     fig.subplots_adjust(left=mx, right=1 - mx, top=top, bottom=bottom)
 
-    # Header/Footer Positionen: jeweils an der inneren Kante der Margins
     left_x = mx
     right_x = 1 - mx
     header_y = 1 - my
     footer_y = my
 
-    # Kopfzeile
     fig.text(left_x,  header_y, title,        ha="left",  va="top", fontproperties=mono_font, fontsize=9)
     fig.text(right_x, header_y, generated_at, ha="right", va="top", fontproperties=mono_font, fontsize=9)
 
-    # Fußzeile links (zweizeilig)
     footer_left = (
         "xembu - eXtensible Event-based Multiuser Bookkeeping Utility\n"
         "Copyright (C) 2024 Leonard Kugis\n"
@@ -87,7 +79,6 @@ def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, t
     fig.text(left_x, footer_y, footer_left, ha="left", va="bottom",
              fontproperties=mono_font, fontsize=7, linespacing=1.1)
 
-    # Fußzeile rechts
     fig.text(right_x, footer_y, f"{page} / {total_pages}", ha="right", va="bottom",
              fontproperties=mono_font, fontsize=8)
 
@@ -112,21 +103,18 @@ def parse_value_unit(s: str):
     num_str = " ".join(parts[:-1]).strip().replace(",", ".").replace("€", "").strip()
     return float(num_str), unit
 
-
 def parse_modules_list(s: str) -> List[str]:
     if s is None or (isinstance(s, float) and pd.isna(s)):
         return []
     mods = [m.strip() for m in str(s).split(",")]
     return [m for m in mods if m]
 
-
 def parse_groups_list(s: str) -> List[str]:
     if s is None or (isinstance(s, float) and pd.isna(s)):
         return []
     gs = [g.strip() for g in str(s).split(",")]
     return [g for g in gs if g]
 
-
 def parse_parameters_list(s: str) -> List[tuple]:
     if s is None or (isinstance(s, float) and pd.isna(s)):
         return []
@@ -157,24 +145,23 @@ def parse_parameters_list(s: str) -> List[tuple]:
         tuples.append(tuple(vals))
     return tuples
 
-
 def parse_csv(path: str) -> pd.DataFrame:
     df = _read_csv_flexible(path)
 
-    df["Datum"] = pd.to_datetime(df["Datum"], format="%Y-%m-%d-%H-%M-%S", errors="coerce")
-    df["Nutzer"] = df["Nutzer"].astype(str).str.strip()
-    df["Distributionsflag"] = df["Distributionsflag"].astype(str).str.strip().str.upper()
-    df["Positionsbezeichnung"] = df["Positionsbezeichnung"].astype(str).str.strip()
+    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d-%H-%M-%S", errors="coerce")
+    df["debitor"] = df["debitor"].astype(str).str.strip()
+    df["group_flag"] = df["group_flag"].astype(str).str.strip().str.upper()
+    df["position"] = df["position"].astype(str).str.strip()
 
-    df["dist_groups"] = df["Distributionsgruppe"].apply(parse_groups_list)
-    df["modules_list"] = df["Modules"].apply(parse_modules_list)
-    df["params_list"] = df["Parameters"].apply(parse_parameters_list)
+    df["dist_groups"] = df["group"].apply(parse_groups_list)
+    df["modules_list"] = df["modules"].apply(parse_modules_list)
+    df["params_list"] = df["parameters"].apply(parse_parameters_list)
 
-    vals_units = df["Positionswert"].apply(parse_value_unit)
-    df["value"] = vals_units.apply(lambda x: x[0])
+    vals_units = df["value"].apply(parse_value_unit)
+    df["val"] = vals_units.apply(lambda x: x[0])
     df["unit"] = vals_units.apply(lambda x: x[1])
 
-    df["Beleg"] = df["Beleg"].where(df["Beleg"].notna(), "")
+    df["receipt"] = df["receipt"].where(df["receipt"].notna(), "")
     return df
 
 
@@ -193,32 +180,32 @@ def _build_positions_table_figs(df: pd.DataFrame, base_dir: str, mono_font):
     figures = []
 
     columns = [
-        "Datum", "Nutzer", "Distributionsgruppe", "Flag",
-        "Positionsbezeichnung", "Positionswert",
-        "Modules", "Parameters", "Beleg", "SHA1",
+        "Date", "Debitor", "Group", "Flag",
+        "Position", "Value",
+        "Modules", "Parameters", "Receipt", "SHA1",
     ]
 
     table_data = []
-    for _, row in df.sort_values("Datum").iterrows():
-        sha1 = compute_hash(str(row["Beleg"]), base_dir=base_dir) if row["Beleg"] else None
+    for _, row in df.sort_values("date").iterrows():
+        sha1 = compute_hash(str(row["receipt"]), base_dir=base_dir) if row["receipt"] else None
         sha1_fmt = ""
         if sha1:
             sha1_fmt = sha1[: len(sha1) // 2] + "\n" + sha1[len(sha1) // 2 :]
 
-        groups_str = ", ".join(row["dist_groups"]) if isinstance(row["dist_groups"], list) else str(row["Distributionsgruppe"])
-        mods_str = ", ".join(row["modules_list"]) if isinstance(row["modules_list"], list) else str(row["Modules"])
-        params_str = str(row["params_list"]) if isinstance(row["params_list"], list) else str(row["Parameters"])
+        groups_str = ", ".join(row["dist_groups"]) if isinstance(row["dist_groups"], list) else str(row["group"])
+        mods_str = ", ".join(row["modules_list"]) if isinstance(row["modules_list"], list) else str(row["modules"])
+        params_str = str(row["params_list"]) if isinstance(row["params_list"], list) else str(row["parameters"])
 
         table_data.append([
-            row["Datum"].strftime("%Y-%m-%d %H:%M:%S") if pd.notna(row["Datum"]) else "INVALID",
-            row["Nutzer"],
+            row["date"].strftime("%Y-%m-%d %H:%M:%S") if pd.notna(row["date"]) else "INVALID",
+            row["debitor"],
             groups_str,
-            row["Distributionsflag"],
-            row["Positionsbezeichnung"],
-            f"{row['value']:.4f} {row['unit']}".strip(),
+            row["group_flag"],
+            row["position"],
+            f"{row['val']:.4f} {row['unit']}".strip(),
             mods_str,
             params_str,
-            str(row["Beleg"]) if row["Beleg"] else "",
+            str(row["receipt"]) if row["receipt"] else "",
             sha1_fmt,
         ])
 
@@ -265,7 +252,6 @@ def _build_positions_table_figs(df: pd.DataFrame, base_dir: str, mono_font):
 
     return figures
 
-
 def _separator_page(pdf: PdfPages, title: str, mono_font):
     fig, ax = plt.subplots(figsize=(8.27, 11.69))
     ax.axis("off")
@@ -338,16 +324,14 @@ def create_pdf(
 
     generated_at = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
 
-    # 1) Alle Seiten als Figures sammeln (damit wir total_pages kennen)
     figs: List[plt.Figure] = []
     figs.extend(_build_positions_table_figs(df, base_dir=base_dir, mono_font=mono_font))
     figs.extend(_build_frame_figs(module_frames, mono_font=mono_font))
     figs.extend(_build_bigframe_figs(module_bigframes, mono_font=mono_font))
-    figs.extend(module_pages)  # bereits fertige Figures aus Modulen
+    figs.extend(module_pages)
 
     total_pages = len(figs)
 
-    # 2) Speichern mit Header/Footer + Seitenzählung
     with PdfPages(pdf_path) as pdf:
         for i, fig in enumerate(figs, start=1):
             _decorate_figure(fig, mono_font, title=title, generated_at=generated_at, page=i, total_pages=total_pages)
@@ -355,50 +339,37 @@ def create_pdf(
             plt.close(fig)
 
 def create_bundle(archive_path: str, csv_path: str, df: pd.DataFrame, base_dir: str, pdf_path: Optional[str] = None):
-    """
-    Bundle enthält: CSV, optional PDF, und alle Belege (relative Pfade aus 'Beleg' relativ zu base_dir).
-    Ausgabe: .tar.zst (über externes zstd).
-    """
     os.makedirs(os.path.dirname(os.path.abspath(archive_path)) or ".", exist_ok=True)
 
-    # Wir bauen ein temporäres .tar daneben und komprimieren danach.
     tar_path = archive_path
     if tar_path.endswith(".zst"):
-        tar_path = tar_path[:-4]  # strip ".zst"
+        tar_path = tar_path[:-4]
     if not tar_path.endswith(".tar"):
         tar_path = tar_path + ".tar"
 
-    # Sammle Belege
     beleg_paths = []
-    for p in df["Beleg"].astype(str).tolist():
+    for p in df["receipt"].astype(str).tolist():
         p = p.strip()
         if p:
             beleg_paths.append(p)
 
     with tarfile.open(tar_path, "w") as tar:
-        # CSV
         tar.add(csv_path, arcname=os.path.basename(csv_path))
 
-        # PDF optional
         if pdf_path and os.path.exists(pdf_path):
             tar.add(pdf_path, arcname=os.path.basename(pdf_path))
 
-        # Belege
         missing = []
         for rel in sorted(set(beleg_paths)):
             abs_path = rel if os.path.isabs(rel) else os.path.join(base_dir, rel)
             if os.path.exists(abs_path):
-                # arcname: möglichst den relativen Pfad behalten
                 arcname = os.path.basename(rel) if os.path.isabs(rel) else rel
                 tar.add(abs_path, arcname=arcname)
             else:
                 missing.append(rel)
 
-    # zstd komprimieren → archive_path
-    # zstd -o <archive> <tar>
     subprocess.run(["zstd", "-T0", "-o", archive_path, tar_path], check=True)
 
-    # tar löschen (zstd bekommt eine Kopie)
     try:
         os.remove(tar_path)
     except Exception:
@@ -409,13 +380,12 @@ def create_bundle(archive_path: str, csv_path: str, df: pd.DataFrame, base_dir:
         for m in missing:
             print(f"  - {m}")
 
-
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("csv", help="Pfad zur CSV-Datei")
-    parser.add_argument("--title", "-t", help="Titel für PDF-Kopfzeile (optional)")
-    parser.add_argument("--pdf", "-p", help="Pfad zur Ziel-PDF (optional)")
-    parser.add_argument("--bundle", "-b", help="Pfad zum Bundle (.tar.zst), enthält CSV, PDF (falls erzeugt) und Belege (optional)")
+    parser.add_argument("csv", help="CSV path")
+    parser.add_argument("--title", "-t", help="PDF header title (optional)")
+    parser.add_argument("--pdf", "-p", help="PDF path (optional)")
+    parser.add_argument("--bundle", "-b", help="Path to bundle (.tar.zst), containing CSV, PDF and receipts (optional)")
     args = parser.parse_args()
 
     csv_path = os.path.abspath(args.csv)
@@ -424,20 +394,17 @@ def main():
     title = args.title if args.title else os.path.basename(csv_path)
 
     df = parse_csv(csv_path)
-    if df["Datum"].isna().any():
-        bad = df[df["Datum"].isna()][CSV_COLUMNS]
+    if df["date"].isna().any():
+        bad = df[df["date"].isna()][CSV_COLUMNS]
         raise ValueError(f"Ungültige Datumsangaben in folgenden Zeilen:\n{bad}")
 
     want_pdf = bool(args.pdf)
     mono_font = _pick_mono_font(size=8)
 
-    # Module-Registry
     modules: Dict[str, Module] = {
         "general": GeneralModule(),
-        # weitere Module später hier registrieren
     }
 
-    # Modulzuordnung aus CSV
     rows_for_module: Dict[str, List[int]] = {}
     for idx, row in df.iterrows():
         for m in row["modules_list"]:
@@ -445,10 +412,8 @@ def main():
 
     results: List[ModuleResult] = []
 
-    # General immer
     results.append(modules["general"].process(df, context={"base_dir": base_dir, "want_pdf": want_pdf, "mono_font": mono_font}))
 
-    # weitere Module optional
     for mod_name, indices in rows_for_module.items():
         if mod_name == "general":
             continue
@@ -459,31 +424,27 @@ def main():
         subdf = df.loc[indices].copy()
         results.append(mod.process(subdf, context={"base_dir": base_dir, "want_pdf": want_pdf, "mono_font": mono_font}))
 
-    # ---- NEU: Konsolen-Auswertung je Modul
     print("\n===== Auswertung =====")
     for r in results:
         print(r.summary_text)
         print("")
 
-    # PDF optional
     if args.pdf:
         module_frames: List[Frame] = []
-        module_bigframes: List[BigFrame] = []   # NEU
+        module_bigframes: List[BigFrame] = []
         module_pages: List[plt.Figure] = []
         for r in results:
             module_frames.extend(r.frames)
-            module_bigframes.extend(r.bigframes)  # NEU
+            module_bigframes.extend(r.bigframes)
             module_pages.extend(r.pages)
 
         create_pdf(df, module_frames, module_bigframes, module_pages, args.pdf, mono_font, base_dir=base_dir, title=title)
         print(f"[OK] PDF geschrieben: {args.pdf}")
 
-    # Bundle optional (enthält CSV + ggf. PDF + Belege)
     if args.bundle:
         create_bundle(args.bundle, csv_path, df, base_dir=base_dir, pdf_path=args.pdf if args.pdf else None)
         print(f"[OK] Bundle geschrieben: {args.bundle}")
 
-
 if __name__ == "__main__":
     main()
 
diff --git a/xembu_testdata/testdata.csv b/xembu_testdata/testdata.csv
index c110972..90ec3bb 100644
--- a/xembu_testdata/testdata.csv
+++ b/xembu_testdata/testdata.csv
@@ -1,4 +1,4 @@
-Datum;Nutzer;Distributionsgruppe;Distributionsflag;Positionsbezeichnung;Positionswert;Modules;Parameters;Beleg
+date;debitor;group;group_flag;position;value;modules;parameters;receipt
 2025-12-02-02-17-57;Dana;KFZ;U;Autofahrt;20.2 km;;;belege/beleg058.txt
 2025-12-02-02-30-53;Leo;General;C;Putzzeug;14.68 €;;;belege/beleg011.txt
 2025-12-02-11-26-50;Bene;General;C;Miete Küche;38.39 €;;;belege/beleg036.txt
author	Leonard Kugis <leonard@kug.is>	2025-12-23 00:08:47 +0100
committer	Leonard Kugis <leonard@kug.is>	2025-12-23 00:08:47 +0100
commit	ec7598f568ff59ecc1eb51572f84d866b0180501 (patch)
tree	775944e30a140cc20857a316397d9538e9d1eff6
parent	78f4448a21614ed01b7c4e60eb496889bc58076d (diff)
download	xembu-ec7598f568ff59ecc1eb51572f84d866b0180501.tar.gz