diff options
| author | Leonard Kugis <leonard@kug.is> | 2025-12-23 00:08:47 +0100 |
|---|---|---|
| committer | Leonard Kugis <leonard@kug.is> | 2025-12-23 00:08:47 +0100 |
| commit | ec7598f568ff59ecc1eb51572f84d866b0180501 (patch) | |
| tree | 775944e30a140cc20857a316397d9538e9d1eff6 | |
| parent | 78f4448a21614ed01b7c4e60eb496889bc58076d (diff) | |
| download | xembu-ec7598f568ff59ecc1eb51572f84d866b0180501.tar.gz | |
Removed unneccessary overhead
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | modules/base.py | 9 | ||||
| -rw-r--r-- | modules/general.py | 214 | ||||
| -rw-r--r-- | xembu.py | 131 | ||||
| -rw-r--r-- | xembu_testdata/testdata.csv | 2 |
5 files changed, 129 insertions, 229 deletions
@@ -1,3 +1,5 @@ +work +out.pdf # Created by https://www.toptal.com/developers/gitignore/api/python,vim,linux,windows,macos # Edit at https://www.toptal.com/developers/gitignore?templates=python,vim,linux,windows,macos diff --git a/modules/base.py b/modules/base.py index 7d04644..6b32562 100644 --- a/modules/base.py +++ b/modules/base.py @@ -15,9 +15,6 @@ class Frame: @dataclass class BigFrame: - """ - Nimmt eine halbe PDF-Seite ein (Renderer packt 2 BigFrames pro Seite). - """ title: str def render(self, ax: Axes, mono_font: FontProperties) -> None: @@ -25,10 +22,10 @@ class BigFrame: @dataclass class ModuleResult: - summary_text: str # NEU: wird im Hauptprogramm in die Konsole gedruckt - frames: List[Frame] # Kacheln (optional) + summary_text: str + frames: List[Frame] bigframes: List[BigFrame] - pages: List[plt.Figure] # Vollseiten (optional) + pages: List[plt.Figure] class Module(Protocol): diff --git a/modules/general.py b/modules/general.py index f7eca38..3ad587a 100644 --- a/modules/general.py +++ b/modules/general.py @@ -12,57 +12,42 @@ from typing import Optional from .base import Frame, BigFrame, ModuleResult - MONEY_UNITS = {"€", "eur", "EUR", "euro", "EURO"} - def _is_money_unit(u: str) -> bool: return str(u).strip() in MONEY_UNITS - def compute_group_distribution(df: pd.DataFrame): - """ - Liefert: - group_summary: dict group -> info - per_person: DataFrame columns [person, contributed, share, balance] - per_group_person: DataFrame detail columns [group, person, contributed, usage, share, balance] - """ - # Explode Gruppen work = df.copy() work = work.explode("dist_groups") work["group"] = work["dist_groups"].fillna("").astype(str).str.strip() work = work[work["group"] != ""] - # C/U Normalisierung - work["flag"] = work["Distributionsflag"].astype(str).str.strip().str.upper() - work["person"] = work["Nutzer"].astype(str).str.strip() + work["flag"] = work["group_flag"].astype(str).str.strip().str.upper() + work["debitor"] = work["debitor"].astype(str).str.strip() - # Contributions (Geld) contrib = work[work["flag"] == "C"].copy() if len(contrib) > 0: bad_units = contrib[~contrib["unit"].apply(_is_money_unit)] if len(bad_units) > 0: raise ValueError( "Contribution (C) muss Geld-Einheit haben (z.B. € / EUR). " - f"Problemzeilen:\n{bad_units[['Datum','Nutzer','group','Positionsbezeichnung','Positionswert','unit']]}" + f"Problemzeilen:\n{bad_units[['date','debitor','group','position','val','unit']]}" ) - # Usage (Beliebige Einheit, pro Gruppe sollte es sinnvoll einheitlich sein) usage = work[work["flag"] == "U"].copy() - # Summen - contrib_by_gp = contrib.groupby(["group", "person"])["value"].sum().rename("contributed").reset_index() - contrib_tot = contrib.groupby("group")["value"].sum().rename("total_contrib").reset_index() + contrib_by_gp = contrib.groupby(["group", "debitor"])["val"].sum().rename("contributed").reset_index() + contrib_tot = contrib.groupby("group")["val"].sum().rename("total_contrib").reset_index() - usage_by_gp = usage.groupby(["group", "person"])["value"].sum().rename("usage").reset_index() - usage_tot = usage.groupby("group")["value"].sum().rename("total_usage").reset_index() + usage_by_gp = usage.groupby(["group", "debitor"])["val"].sum().rename("usage").reset_index() + usage_tot = usage.groupby("group")["val"].sum().rename("total_usage").reset_index() usage_unit = usage.groupby("group")["unit"].agg(lambda s: s.dropna().astype(str).unique().tolist()).reset_index() usage_unit = usage_unit.rename(columns={"unit": "usage_units"}) - participants = work.groupby("group")["person"].agg(lambda s: sorted(set(s.tolist()))).reset_index() - participants = participants.rename(columns={"person": "participants"}) + participants = work.groupby("group")["debitor"].agg(lambda s: sorted(set(s.tolist()))).reset_index() + participants = participants.rename(columns={"debitor": "participants"}) - # group_summary summary = ( participants.merge(contrib_tot, on="group", how="left") .merge(usage_tot, on="group", how="left") @@ -73,22 +58,19 @@ def compute_group_distribution(df: pd.DataFrame): summary["has_usage"] = summary["total_usage"].apply(lambda x: x > 0) summary["mode"] = summary.apply(lambda r: "usage" if r["has_usage"] else "equal", axis=1) - # Detail pro (group, person) detail = ( pd.DataFrame({"group": work["group"].unique()}) .assign(key=1) - .merge(pd.DataFrame({"person": work["person"].unique()}).assign(key=1), on="key") + .merge(pd.DataFrame({"debitor": work["debitor"].unique()}).assign(key=1), on="key") .drop(columns=["key"]) ) - # Nur relevante Paare, die in der Gruppe vorkommen - gp_person = work[["group", "person"]].drop_duplicates() - detail = detail.merge(gp_person, on=["group", "person"], how="inner") + gp_debitor = work[["group", "debitor"]].drop_duplicates() + detail = detail.merge(gp_debitor, on=["group", "debitor"], how="inner") - detail = detail.merge(contrib_by_gp, on=["group", "person"], how="left").merge(usage_by_gp, on=["group", "person"], how="left") + detail = detail.merge(contrib_by_gp, on=["group", "debitor"], how="left").merge(usage_by_gp, on=["group", "debitor"], how="left") detail["contributed"] = detail["contributed"].fillna(0.0) detail["usage"] = detail["usage"].fillna(0.0) - # Shares berechnen pro Gruppe shares = [] for _, row in summary.iterrows(): g = row["group"] @@ -97,7 +79,6 @@ def compute_group_distribution(df: pd.DataFrame): n = len(parts) if parts else 0 g_detail = detail[detail["group"] == g].copy() - # usage-mode, sobald es irgendeine U-Position gibt (auch wenn total_usage==0 → fallback) g_has_any_u = (usage["group"] == g).any() if g_has_any_u: @@ -114,25 +95,22 @@ def compute_group_distribution(df: pd.DataFrame): mode = "equal" g_detail["mode"] = mode - shares.append(g_detail[["group", "person", "share", "mode"]]) + shares.append(g_detail[["group", "debitor", "share", "mode"]]) - shares_df = pd.concat(shares, ignore_index=True) if shares else pd.DataFrame(columns=["group","person","share","mode"]) - detail = detail.merge(shares_df, on=["group", "person"], how="left") + shares_df = pd.concat(shares, ignore_index=True) if shares else pd.DataFrame(columns=["group","debitor","share","mode"]) + detail = detail.merge(shares_df, on=["group", "debitor"], how="left") detail["share"] = detail["share"].fillna(0.0) detail["balance"] = detail["contributed"] - detail["share"] - # per_person totals - per_person = detail.groupby("person")[["contributed", "share", "balance"]].sum().reset_index() - per_person = per_person.sort_values("person") + per_debitor = detail.groupby("debitor")[["contributed", "share", "balance"]].sum().reset_index() + per_debitor = per_debitor.sort_values("debitor") - # summary erweitern - # "Sobald es eine Position mit U gibt" zählt, auch wenn total_usage==0 (fallback) has_any_u = usage.groupby("group").size().rename("u_count").reset_index() summary = summary.merge(has_any_u, on="group", how="left") summary["u_count"] = summary["u_count"].fillna(0).astype(int) summary["mode"] = summary["u_count"].apply(lambda c: "usage" if c > 0 else "equal") - return summary, per_person, detail + return summary, per_debitor, detail @dataclass class GroupTimeSeries: @@ -142,14 +120,13 @@ class GroupTimeSeries: usage_units: List[str] xlim_start: pd.Timestamp xlim_end: pd.Timestamp - contrib_cum: Dict[str, pd.Series] # € kumulativ - usage_cum: Dict[str, pd.Series] # unit kumulativ (z.B. km, stk) - share_cum: Dict[str, pd.Series] # € kumulativ (Anteil) - ratio: Dict[str, pd.Series] # Anteil/Ausgelegt + contrib_cum: Dict[str, pd.Series] + usage_cum: Dict[str, pd.Series] + share_cum: Dict[str, pd.Series] + ratio: Dict[str, pd.Series] def _auto_time_limits(tmin: pd.Timestamp, tmax: pd.Timestamp) -> tuple[pd.Timestamp, pd.Timestamp]: - # +/- 5% Intervall, bei 0 Intervall fallback 30 Minuten dt = tmax - tmin if dt <= pd.Timedelta(0): margin = pd.Timedelta(minutes=30) @@ -159,28 +136,25 @@ def _auto_time_limits(tmin: pd.Timestamp, tmax: pd.Timestamp) -> tuple[pd.Timest def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTimeSeries]: - # explode Gruppen und filtere work = df.copy().explode("dist_groups") work["group"] = work["dist_groups"].fillna("").astype(str).str.strip() work = work[work["group"] == group].copy() - work = work[pd.notna(work["Datum"])] + work = work[pd.notna(work["date"])] if work.empty: return None - work["person"] = work["Nutzer"].astype(str).str.strip() - work["flag"] = work["Distributionsflag"].astype(str).str.strip().str.upper() + work["debitor"] = work["debitor"].astype(str).str.strip() + work["flag"] = work["group_flag"].astype(str).str.strip().str.upper() - participants = sorted(work["person"].unique().tolist()) + participants = sorted(work["debitor"].unique().tolist()) - # timeline: alle Zeitpunkte der Gruppe (unique, sortiert) - times = pd.DatetimeIndex(sorted(work["Datum"].unique())) + times = pd.DatetimeIndex(sorted(work["date"].unique())) tmin, tmax = times.min(), times.max() x0, x1 = _auto_time_limits(tmin, tmax) times = times.union(pd.DatetimeIndex([x0, x1])).sort_values() - # usage units (kann leer sein, oder mehrere – wir zeigen dann z.B. "km/stk") usage_units = sorted( work.loc[work["flag"] == "U", "unit"] .dropna() @@ -190,25 +164,21 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim .tolist() ) - # pro Person: Beiträge (C) und Nutzung (U) als kumulatives step-series auf timeline contrib_cum: Dict[str, pd.Series] = {} usage_cum: Dict[str, pd.Series] = {} for p in participants: - c = work[(work["person"] == p) & (work["flag"] == "C")].copy() - u = work[(work["person"] == p) & (work["flag"] == "U")].copy() + c = work[(work["debitor"] == p) & (work["flag"] == "C")].copy() + u = work[(work["debitor"] == p) & (work["flag"] == "U")].copy() - # Beiträge: nach Datum aggregieren, reindex auf timeline, kumulieren - c_by_t = c.groupby("Datum")["value"].sum() if not c.empty else pd.Series(dtype=float) + c_by_t = c.groupby("date")["val"].sum() if not c.empty else pd.Series(dtype=float) c_by_t = c_by_t.reindex(times, fill_value=0.0) contrib_cum[p] = c_by_t.cumsum() - # Nutzung: nach Datum aggregieren, reindex auf timeline, kumulieren - u_by_t = u.groupby("Datum")["value"].sum() if not u.empty else pd.Series(dtype=float) + u_by_t = u.groupby("date")["val"].sum() if not u.empty else pd.Series(dtype=float) u_by_t = u_by_t.reindex(times, fill_value=0.0) usage_cum[p] = u_by_t.cumsum() - # share über Zeit: kumulative total contributions verteilt total_contrib = sum((contrib_cum[p] for p in participants), start=pd.Series(0.0, index=times)) total_usage = sum((usage_cum[p] for p in participants), start=pd.Series(0.0, index=times)) @@ -217,9 +187,7 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim share_cum: Dict[str, pd.Series] = {} if has_any_u: - # usage-mode sobald U existiert; solange total_usage==0 => equal fallback for p in participants: - # share = total_contrib * usage_p / total_usage, sonst total_contrib/n usage_p = usage_cum[p] with np.errstate(divide="ignore", invalid="ignore"): share_usage = total_contrib * (usage_p / total_usage.replace(0.0, np.nan)) @@ -227,7 +195,6 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim share = share_usage.where(total_usage > 0, share_equal) share_cum[p] = share.fillna(0.0) else: - # equal-mode immer equal = total_contrib / float(n) for p in participants: share_cum[p] = equal @@ -254,13 +221,6 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim @dataclass class GroupChartBigFrame(BigFrame): - """ - kind: - - 'usage_cum' - - 'contrib_cum' - - 'share_cum' - - 'ratio' - """ gts: GroupTimeSeries kind: str @@ -271,48 +231,45 @@ class GroupChartBigFrame(BigFrame): formatter = mdates.ConciseDateFormatter(locator) ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formatter) - ax.xaxis.get_offset_text().set_visible(False) # <-- "2025-Dec" weg + ax.xaxis.get_offset_text().set_visible(False) ax.set_xlim(self.gts.xlim_start, self.gts.xlim_end) if self.kind == "usage_cum": series_map = self.gts.usage_cum unit = "/".join(self.gts.usage_units) if self.gts.usage_units else "" - ax.set_ylabel(f"Verbrauch kumulativ {unit}".strip(), fontproperties=mono_font) + ax.set_ylabel(f"Usage cumulative {unit}".strip(), fontproperties=mono_font) elif self.kind == "contrib_cum": series_map = self.gts.contrib_cum - ax.set_ylabel("Contributions kumulativ €", fontproperties=mono_font) + ax.set_ylabel("Contribution cumulative €", fontproperties=mono_font) elif self.kind == "share_cum": series_map = self.gts.share_cum - ax.set_ylabel("Anteil kumulativ €", fontproperties=mono_font) + ax.set_ylabel("Share cumulative €", fontproperties=mono_font) elif self.kind == "ratio": series_map = self.gts.ratio - ax.set_ylabel("Anteil / Ausgelegt", fontproperties=mono_font) - ax.set_yscale("log") # <-- LOG + ax.set_ylabel("Share / Contribution ratio (logarithmic)", fontproperties=mono_font) + ax.set_yscale("log") else: raise ValueError(f"Unknown kind: {self.kind}") - # Plot + Sammeln für robuste y-Limits all_vals = [] - min_ratio = 1e-3 # „quasi 0“ für log, damit Kurven am Anfang nicht "mittendrin" starten + min_ratio = 1e-3 for p in self.gts.participants: y = series_map[p].copy() if self.kind == "ratio": - # NaN/0/Inf behandeln, damit die Kurve von Anfang an existiert y = y.replace([np.inf, -np.inf], np.nan) y = y.fillna(min_ratio) y = y.clip(lower=min_ratio) else: y = y.replace([np.inf, -np.inf], np.nan).fillna(0.0) - # Steps für kumulative Kurven ist meist sauberer ax.plot(self.gts.times, y.values, label=p, linewidth=1, drawstyle="steps-post") v = y.values @@ -320,7 +277,6 @@ class GroupChartBigFrame(BigFrame): if v.size: all_vals.append(v) - # y-Limits so setzen, dass wirklich ALLE Werte sichtbar sind if all_vals: vv = np.concatenate(all_vals) @@ -329,14 +285,14 @@ class GroupChartBigFrame(BigFrame): if vmax <= 0: ax.set_ylim(0, 1) else: - ax.set_ylim(0, vmax * 1.08) # kleiner Puffer + ax.set_ylim(0, vmax * 1.08) elif self.kind == "ratio": vpos = vv[vv > 0] if vpos.size: vmin = float(np.nanmin(vpos)) vmax = float(np.nanmax(vpos)) - ax.set_ylim(vmin / 1.5, vmax * 1.5) # log: multiplicative padding + ax.set_ylim(vmin / 1.5, vmax * 1.5) ax.grid(True, alpha=0.2) @@ -345,7 +301,6 @@ class GroupChartBigFrame(BigFrame): for t in leg.get_texts(): t.set_fontproperties(mono_font) - # Tick-Fonts monospace for tick in ax.get_xticklabels() + ax.get_yticklabels(): tick.set_fontproperties(mono_font) @@ -358,12 +313,11 @@ class TextFrame(Frame): @dataclass class PlotBigFrame(BigFrame): - per_person: pd.DataFrame # erwartet Spalten: person, contributed, share + per_debitor: pd.DataFrame def render(self, ax: Axes, mono_font: FontProperties) -> None: - # Axes ist schon da, wir zeichnen direkt hinein ax.axis("on") - plot_df = self.per_person.set_index("person")[["contributed", "share"]] + plot_df = self.per_debitor.set_index("debitor")[["contributed", "share"]] plot_df.plot.bar(ax=ax) ax.tick_params(axis="x", rotation=0) leg = ax.legend(prop=mono_font) @@ -386,38 +340,37 @@ class GeneralModule: mono_font = context.get("mono_font") or FontProperties(family="DejaVu Sans Mono", size=8) - group_summary, per_person, detail = compute_group_distribution(df) + group_summary, per_debitor, detail = compute_group_distribution(df) - balance = {r["person"]: float(r["balance"]) for _, r in per_person.iterrows()} + balance = {r["debitor"]: float(r["balance"]) for _, r in per_debitor.iterrows()} payments = self._minimize_payments(balance) - # ---- NEU: Textauswertung für Konsole summary_lines = [] - summary_lines.append("General – Verteilung über Distributionsgruppen") + summary_lines.append("General") summary_lines.append("") - summary_lines.append("Gruppen:") + summary_lines.append("Goups:") for _, r in group_summary.sort_values("group").iterrows(): g = r["group"] total_c = float(r.get("total_contrib", 0.0)) u_count = int(r.get("u_count", 0)) mode = "usage" if u_count > 0 else "equal" participants = r.get("participants", []) or [] - summary_lines.append(f" - {g}: {total_c:.2f} €; mode={mode}; teilnehmer={len(participants)}") + summary_lines.append(f" - {g}: {total_c:.2f} €; mode={mode}; participants={len(participants)}") summary_lines.append("") - summary_lines.append("Personen (Summe über alle Gruppen):") - for _, r in per_person.sort_values("person").iterrows(): + summary_lines.append("Debitors (total):") + for _, r in per_debitor.sort_values("debitor").iterrows(): summary_lines.append( - f" - {r['person']}: ausgelegt={r['contributed']:.2f} €; anteil={r['share']:.2f} €; saldo={r['balance']:.2f} €" + f" - {r['debitor']}: contributed={r['contributed']:.2f} €; share={r['share']:.2f} €; balance={r['balance']:.2f} €" ) summary_lines.append("") - summary_lines.append("Ausgleich (minimiert):") + summary_lines.append("Compensation (minimized):") if payments: for p, r, a in payments: summary_lines.append(f" - {p} → {r}: {a:.2f} €") else: - summary_lines.append(" (keine Zahlungen nötig)") + summary_lines.append(" (No compensation required)") summary_text = "\n".join(summary_lines) @@ -426,45 +379,42 @@ class GeneralModule: pages: List[plt.Figure] = [] if want_pdf: - frames.extend(self._make_frames(group_summary, per_person, payments)) + frames.extend(self._make_frames(group_summary, per_debitor, payments)) - # BigFrame: Gesamt-Balkenplot bleibt (wie vorher) bigframes.append( PlotBigFrame( - title="General – Ausgelegt vs Anteil (Summe über Gruppen)", - per_person=per_person.copy(), + title="General – Shares vs. Contributions (total)", + per_debitor=per_debitor.copy(), ) ) - # NEU: pro Distributionsgruppe 4 BigFrame-Charts for g in sorted(group_summary["group"].unique().tolist()): gts = _prepare_group_timeseries(df, g) if not gts: continue bigframes.append(GroupChartBigFrame( - title=f"{g} – Kumulativer Verbrauch pro Person", + title=f"{g} – Cumulative usage per debitor", gts=gts, kind="usage_cum", )) bigframes.append(GroupChartBigFrame( - title=f"{g} – Kumulative Contributions pro Person", + title=f"{g} – Cumulative contributions per debitor", gts=gts, kind="contrib_cum", )) bigframes.append(GroupChartBigFrame( - title=f"{g} – Anteil pro Person (zeitlicher Verlauf)", + title=f"{g} – Share per debitor", gts=gts, kind="share_cum", )) bigframes.append(GroupChartBigFrame( - title=f"{g} – Verhältnis Anteil/Ausgelegt (zeitlicher Verlauf)", + title=f"{g} – Share / Contribution ratio (logarithmic)", gts=gts, kind="ratio", )) - # Pages: nur noch Detailseiten, keine Balkenplot-Seite mehr - pages.extend(self._make_pages(group_summary, per_person, detail, mono_font)) + pages.extend(self._make_pages(group_summary, per_debitor, detail, mono_font)) return ModuleResult(summary_text=summary_text, frames=frames, bigframes=bigframes, pages=pages) @@ -493,57 +443,52 @@ class GeneralModule: j += 1 return out - def _make_frames(self, group_summary: pd.DataFrame, per_person: pd.DataFrame, payments: List[Tuple[str,str,float]]) -> List[Frame]: - # Frame 1: Gruppen-Übersicht - lines = ["Gruppenübersicht:"] + def _make_frames(self, group_summary: pd.DataFrame, per_debitor: pd.DataFrame, payments: List[Tuple[str,str,float]]) -> List[Frame]: + lines = ["Groups:"] for _, r in group_summary.sort_values("group").iterrows(): g = r["group"] total_c = float(r.get("total_contrib", 0.0)) u_count = int(r.get("u_count", 0)) parts = r.get("participants", []) mode = "usage" if u_count > 0 else "equal" - lines.append(f"- {g}: {total_c:.2f} €; mode={mode}; teilnehmer={len(parts)}") + lines.append(f"- {g}: {total_c:.2f} €; mode={mode}; participants={len(parts)}") - f1 = TextFrame(title="General: Gruppen", text="\n".join(lines)) + f1 = TextFrame(title="General: Groups", text="\n".join(lines)) - # Frame 2: Personen-Totale - lines = ["Personen (Summe über alle Gruppen):", "Person | contributed | share | balance"] - for _, r in per_person.iterrows(): - lines.append(f"{r['person']}: {r['contributed']:.2f} €; {r['share']:.2f} €; {r['balance']:.2f} €") - f2 = TextFrame(title="General: Personen", text="\n".join(lines)) + lines = ["Debitor total:", "debitor | contributed | share | balance"] + for _, r in per_debitor.iterrows(): + lines.append(f"{r['debitor']} | {r['contributed']:.2f} € | {r['share']:.2f} € | {r['balance']:.2f} €") + f2 = TextFrame(title="General: Debitors", text="\n".join(lines)) - # Frame 3: Ausgleich - lines = ["Ausgleich (minimiert):"] + lines = ["Compensation (minimized):"] if payments: for p, r, a in payments: lines.append(f"{p} → {r}: {a:.2f} €") else: - lines.append("(keine Zahlungen nötig)") - f3 = TextFrame(title="General: Ausgleich", text="\n".join(lines)) + lines.append("(No compensation required)") + f3 = TextFrame(title="General: Compensation", text="\n".join(lines)) return [f1, f2, f3] - def _make_pages(self, group_summary, per_person, detail, mono_font) -> List[plt.Figure]: + def _make_pages(self, group_summary, per_debitor, detail, mono_font) -> List[plt.Figure]: pages: List[plt.Figure] = [] - # Textseiten: pro Gruppe Detail (ggf. mehrere) - # Wir machen je Gruppe eine Seite, wenn es nicht zu viele sind for g in sorted(detail["group"].unique().tolist()): - gdet = detail[detail["group"] == g].sort_values("person") + gdet = detail[detail["group"] == g].sort_values("debitor") total_c = float(group_summary[group_summary["group"] == g]["total_contrib"].iloc[0]) if (group_summary["group"] == g).any() else 0.0 u_count = int(group_summary[group_summary["group"] == g]["u_count"].iloc[0]) if (group_summary["group"] == g).any() else 0 mode = "usage" if u_count > 0 else "equal" lines = [ - f"Gruppe: {g}", + f"Group: {g}", f"Total Contribution: {total_c:.2f} €", f"Mode: {mode}", "", - "Person | contributed | usage | share | balance", + "debitor | contributed | usage | share | balance", ] for _, r in gdet.iterrows(): lines.append( - f"{r['person']}: {r['contributed']:.2f} €; {r['usage']:.4f}; {r['share']:.2f} €; {r['balance']:.2f} €" + f"{r['debitor']} | {r['contributed']:.2f} € | {r['usage']:.4f} | {r['share']:.2f} € | {r['balance']:.2f} €" ) fig, ax = plt.subplots(figsize=(8.27, 11.69)) @@ -551,10 +496,5 @@ class GeneralModule: ax.text(0, 1, "\n".join(lines), va="top", ha="left", fontproperties=mono_font) pages.append(fig) - # Optional: Nutzungsverläufe für Gruppen mit unit "km" - # (nur wenn U vorhanden und unit in den U-rows km ist) - # Dafür brauchen wir zeitliche Daten → aus detail nicht möglich, also direkt aus df wäre besser. - # Wenn du willst, ergänze ich das als eigene Seite pro km-Gruppe auf Basis der Original-DF. - return pages @@ -16,18 +16,17 @@ from modules.general import GeneralModule from datetime import datetime CSV_COLUMNS = [ - "Datum", - "Nutzer", - "Distributionsgruppe", - "Distributionsflag", - "Positionsbezeichnung", - "Positionswert", - "Modules", - "Parameters", - "Beleg", + "date", + "debitor", + "group", + "group_flag", + "position", + "value", + "modules", + "parameters", + "receipt", ] - def _pick_mono_font(size: int = 8) -> font_manager.FontProperties: for fam in ["Inconsolata", "DejaVu Sans Mono", "monospace"]: try: @@ -37,13 +36,11 @@ def _pick_mono_font(size: int = 8) -> font_manager.FontProperties: return font_manager.FontProperties(size=size) def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, total_pages: int): - # Margins: links/rechts 2cm, oben/unten 1cm margin_lr_cm = 2.0 margin_tb_cm = 1.0 - # Zusätzlicher Abstand (Bänder) zwischen Header/Footer und Content - header_gap_cm = 1.3 # mehr Abstand nach unten - footer_gap_cm = 2.0 # mehr Abstand nach oben (2-zeiliger Footer) + header_gap_cm = 1.3 + footer_gap_cm = 2.0 cm_to_in = 1 / 2.54 margin_lr_in = margin_lr_cm * cm_to_in @@ -58,27 +55,22 @@ def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, t header_gap = header_gap_in / h_in footer_gap = footer_gap_in / h_in - # Content-Bereich: innerhalb der Margins + zusätzlich Platz für Header/Footer top = 1 - my - header_gap bottom = my + footer_gap if top <= bottom: - # Fallback, falls es zu eng wird top = 1 - my bottom = my fig.subplots_adjust(left=mx, right=1 - mx, top=top, bottom=bottom) - # Header/Footer Positionen: jeweils an der inneren Kante der Margins left_x = mx right_x = 1 - mx header_y = 1 - my footer_y = my - # Kopfzeile fig.text(left_x, header_y, title, ha="left", va="top", fontproperties=mono_font, fontsize=9) fig.text(right_x, header_y, generated_at, ha="right", va="top", fontproperties=mono_font, fontsize=9) - # Fußzeile links (zweizeilig) footer_left = ( "xembu - eXtensible Event-based Multiuser Bookkeeping Utility\n" "Copyright (C) 2024 Leonard Kugis\n" @@ -87,7 +79,6 @@ def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, t fig.text(left_x, footer_y, footer_left, ha="left", va="bottom", fontproperties=mono_font, fontsize=7, linespacing=1.1) - # Fußzeile rechts fig.text(right_x, footer_y, f"{page} / {total_pages}", ha="right", va="bottom", fontproperties=mono_font, fontsize=8) @@ -112,21 +103,18 @@ def parse_value_unit(s: str): num_str = " ".join(parts[:-1]).strip().replace(",", ".").replace("€", "").strip() return float(num_str), unit - def parse_modules_list(s: str) -> List[str]: if s is None or (isinstance(s, float) and pd.isna(s)): return [] mods = [m.strip() for m in str(s).split(",")] return [m for m in mods if m] - def parse_groups_list(s: str) -> List[str]: if s is None or (isinstance(s, float) and pd.isna(s)): return [] gs = [g.strip() for g in str(s).split(",")] return [g for g in gs if g] - def parse_parameters_list(s: str) -> List[tuple]: if s is None or (isinstance(s, float) and pd.isna(s)): return [] @@ -157,24 +145,23 @@ def parse_parameters_list(s: str) -> List[tuple]: tuples.append(tuple(vals)) return tuples - def parse_csv(path: str) -> pd.DataFrame: df = _read_csv_flexible(path) - df["Datum"] = pd.to_datetime(df["Datum"], format="%Y-%m-%d-%H-%M-%S", errors="coerce") - df["Nutzer"] = df["Nutzer"].astype(str).str.strip() - df["Distributionsflag"] = df["Distributionsflag"].astype(str).str.strip().str.upper() - df["Positionsbezeichnung"] = df["Positionsbezeichnung"].astype(str).str.strip() + df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d-%H-%M-%S", errors="coerce") + df["debitor"] = df["debitor"].astype(str).str.strip() + df["group_flag"] = df["group_flag"].astype(str).str.strip().str.upper() + df["position"] = df["position"].astype(str).str.strip() - df["dist_groups"] = df["Distributionsgruppe"].apply(parse_groups_list) - df["modules_list"] = df["Modules"].apply(parse_modules_list) - df["params_list"] = df["Parameters"].apply(parse_parameters_list) + df["dist_groups"] = df["group"].apply(parse_groups_list) + df["modules_list"] = df["modules"].apply(parse_modules_list) + df["params_list"] = df["parameters"].apply(parse_parameters_list) - vals_units = df["Positionswert"].apply(parse_value_unit) - df["value"] = vals_units.apply(lambda x: x[0]) + vals_units = df["value"].apply(parse_value_unit) + df["val"] = vals_units.apply(lambda x: x[0]) df["unit"] = vals_units.apply(lambda x: x[1]) - df["Beleg"] = df["Beleg"].where(df["Beleg"].notna(), "") + df["receipt"] = df["receipt"].where(df["receipt"].notna(), "") return df @@ -193,32 +180,32 @@ def _build_positions_table_figs(df: pd.DataFrame, base_dir: str, mono_font): figures = [] columns = [ - "Datum", "Nutzer", "Distributionsgruppe", "Flag", - "Positionsbezeichnung", "Positionswert", - "Modules", "Parameters", "Beleg", "SHA1", + "Date", "Debitor", "Group", "Flag", + "Position", "Value", + "Modules", "Parameters", "Receipt", "SHA1", ] table_data = [] - for _, row in df.sort_values("Datum").iterrows(): - sha1 = compute_hash(str(row["Beleg"]), base_dir=base_dir) if row["Beleg"] else None + for _, row in df.sort_values("date").iterrows(): + sha1 = compute_hash(str(row["receipt"]), base_dir=base_dir) if row["receipt"] else None sha1_fmt = "" if sha1: sha1_fmt = sha1[: len(sha1) // 2] + "\n" + sha1[len(sha1) // 2 :] - groups_str = ", ".join(row["dist_groups"]) if isinstance(row["dist_groups"], list) else str(row["Distributionsgruppe"]) - mods_str = ", ".join(row["modules_list"]) if isinstance(row["modules_list"], list) else str(row["Modules"]) - params_str = str(row["params_list"]) if isinstance(row["params_list"], list) else str(row["Parameters"]) + groups_str = ", ".join(row["dist_groups"]) if isinstance(row["dist_groups"], list) else str(row["group"]) + mods_str = ", ".join(row["modules_list"]) if isinstance(row["modules_list"], list) else str(row["modules"]) + params_str = str(row["params_list"]) if isinstance(row["params_list"], list) else str(row["parameters"]) table_data.append([ - row["Datum"].strftime("%Y-%m-%d %H:%M:%S") if pd.notna(row["Datum"]) else "INVALID", - row["Nutzer"], + row["date"].strftime("%Y-%m-%d %H:%M:%S") if pd.notna(row["date"]) else "INVALID", + row["debitor"], groups_str, - row["Distributionsflag"], - row["Positionsbezeichnung"], - f"{row['value']:.4f} {row['unit']}".strip(), + row["group_flag"], + row["position"], + f"{row['val']:.4f} {row['unit']}".strip(), mods_str, params_str, - str(row["Beleg"]) if row["Beleg"] else "", + str(row["receipt"]) if row["receipt"] else "", sha1_fmt, ]) @@ -265,7 +252,6 @@ def _build_positions_table_figs(df: pd.DataFrame, base_dir: str, mono_font): return figures - def _separator_page(pdf: PdfPages, title: str, mono_font): fig, ax = plt.subplots(figsize=(8.27, 11.69)) ax.axis("off") @@ -338,16 +324,14 @@ def create_pdf( generated_at = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") - # 1) Alle Seiten als Figures sammeln (damit wir total_pages kennen) figs: List[plt.Figure] = [] figs.extend(_build_positions_table_figs(df, base_dir=base_dir, mono_font=mono_font)) figs.extend(_build_frame_figs(module_frames, mono_font=mono_font)) figs.extend(_build_bigframe_figs(module_bigframes, mono_font=mono_font)) - figs.extend(module_pages) # bereits fertige Figures aus Modulen + figs.extend(module_pages) total_pages = len(figs) - # 2) Speichern mit Header/Footer + Seitenzählung with PdfPages(pdf_path) as pdf: for i, fig in enumerate(figs, start=1): _decorate_figure(fig, mono_font, title=title, generated_at=generated_at, page=i, total_pages=total_pages) @@ -355,50 +339,37 @@ def create_pdf( plt.close(fig) def create_bundle(archive_path: str, csv_path: str, df: pd.DataFrame, base_dir: str, pdf_path: Optional[str] = None): - """ - Bundle enthält: CSV, optional PDF, und alle Belege (relative Pfade aus 'Beleg' relativ zu base_dir). - Ausgabe: .tar.zst (über externes zstd). - """ os.makedirs(os.path.dirname(os.path.abspath(archive_path)) or ".", exist_ok=True) - # Wir bauen ein temporäres .tar daneben und komprimieren danach. tar_path = archive_path if tar_path.endswith(".zst"): - tar_path = tar_path[:-4] # strip ".zst" + tar_path = tar_path[:-4] if not tar_path.endswith(".tar"): tar_path = tar_path + ".tar" - # Sammle Belege beleg_paths = [] - for p in df["Beleg"].astype(str).tolist(): + for p in df["receipt"].astype(str).tolist(): p = p.strip() if p: beleg_paths.append(p) with tarfile.open(tar_path, "w") as tar: - # CSV tar.add(csv_path, arcname=os.path.basename(csv_path)) - # PDF optional if pdf_path and os.path.exists(pdf_path): tar.add(pdf_path, arcname=os.path.basename(pdf_path)) - # Belege missing = [] for rel in sorted(set(beleg_paths)): abs_path = rel if os.path.isabs(rel) else os.path.join(base_dir, rel) if os.path.exists(abs_path): - # arcname: möglichst den relativen Pfad behalten arcname = os.path.basename(rel) if os.path.isabs(rel) else rel tar.add(abs_path, arcname=arcname) else: missing.append(rel) - # zstd komprimieren → archive_path - # zstd -o <archive> <tar> subprocess.run(["zstd", "-T0", "-o", archive_path, tar_path], check=True) - # tar löschen (zstd bekommt eine Kopie) try: os.remove(tar_path) except Exception: @@ -409,13 +380,12 @@ def create_bundle(archive_path: str, csv_path: str, df: pd.DataFrame, base_dir: for m in missing: print(f" - {m}") - def main(): parser = argparse.ArgumentParser() - parser.add_argument("csv", help="Pfad zur CSV-Datei") - parser.add_argument("--title", "-t", help="Titel für PDF-Kopfzeile (optional)") - parser.add_argument("--pdf", "-p", help="Pfad zur Ziel-PDF (optional)") - parser.add_argument("--bundle", "-b", help="Pfad zum Bundle (.tar.zst), enthält CSV, PDF (falls erzeugt) und Belege (optional)") + parser.add_argument("csv", help="CSV path") + parser.add_argument("--title", "-t", help="PDF header title (optional)") + parser.add_argument("--pdf", "-p", help="PDF path (optional)") + parser.add_argument("--bundle", "-b", help="Path to bundle (.tar.zst), containing CSV, PDF and receipts (optional)") args = parser.parse_args() csv_path = os.path.abspath(args.csv) @@ -424,20 +394,17 @@ def main(): title = args.title if args.title else os.path.basename(csv_path) df = parse_csv(csv_path) - if df["Datum"].isna().any(): - bad = df[df["Datum"].isna()][CSV_COLUMNS] + if df["date"].isna().any(): + bad = df[df["date"].isna()][CSV_COLUMNS] raise ValueError(f"Ungültige Datumsangaben in folgenden Zeilen:\n{bad}") want_pdf = bool(args.pdf) mono_font = _pick_mono_font(size=8) - # Module-Registry modules: Dict[str, Module] = { "general": GeneralModule(), - # weitere Module später hier registrieren } - # Modulzuordnung aus CSV rows_for_module: Dict[str, List[int]] = {} for idx, row in df.iterrows(): for m in row["modules_list"]: @@ -445,10 +412,8 @@ def main(): results: List[ModuleResult] = [] - # General immer results.append(modules["general"].process(df, context={"base_dir": base_dir, "want_pdf": want_pdf, "mono_font": mono_font})) - # weitere Module optional for mod_name, indices in rows_for_module.items(): if mod_name == "general": continue @@ -459,31 +424,27 @@ def main(): subdf = df.loc[indices].copy() results.append(mod.process(subdf, context={"base_dir": base_dir, "want_pdf": want_pdf, "mono_font": mono_font})) - # ---- NEU: Konsolen-Auswertung je Modul print("\n===== Auswertung =====") for r in results: print(r.summary_text) print("") - # PDF optional if args.pdf: module_frames: List[Frame] = [] - module_bigframes: List[BigFrame] = [] # NEU + module_bigframes: List[BigFrame] = [] module_pages: List[plt.Figure] = [] for r in results: module_frames.extend(r.frames) - module_bigframes.extend(r.bigframes) # NEU + module_bigframes.extend(r.bigframes) module_pages.extend(r.pages) create_pdf(df, module_frames, module_bigframes, module_pages, args.pdf, mono_font, base_dir=base_dir, title=title) print(f"[OK] PDF geschrieben: {args.pdf}") - # Bundle optional (enthält CSV + ggf. PDF + Belege) if args.bundle: create_bundle(args.bundle, csv_path, df, base_dir=base_dir, pdf_path=args.pdf if args.pdf else None) print(f"[OK] Bundle geschrieben: {args.bundle}") - if __name__ == "__main__": main() diff --git a/xembu_testdata/testdata.csv b/xembu_testdata/testdata.csv index c110972..90ec3bb 100644 --- a/xembu_testdata/testdata.csv +++ b/xembu_testdata/testdata.csv @@ -1,4 +1,4 @@ -Datum;Nutzer;Distributionsgruppe;Distributionsflag;Positionsbezeichnung;Positionswert;Modules;Parameters;Beleg +date;debitor;group;group_flag;position;value;modules;parameters;receipt 2025-12-02-02-17-57;Dana;KFZ;U;Autofahrt;20.2 km;;;belege/beleg058.txt 2025-12-02-02-30-53;Leo;General;C;Putzzeug;14.68 €;;;belege/beleg011.txt 2025-12-02-11-26-50;Bene;General;C;Miete Küche;38.39 €;;;belege/beleg036.txt |
