aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeonard Kugis <leonard@kug.is>2025-12-23 00:08:47 +0100
committerLeonard Kugis <leonard@kug.is>2025-12-23 00:08:47 +0100
commitec7598f568ff59ecc1eb51572f84d866b0180501 (patch)
tree775944e30a140cc20857a316397d9538e9d1eff6
parent78f4448a21614ed01b7c4e60eb496889bc58076d (diff)
downloadxembu-ec7598f568ff59ecc1eb51572f84d866b0180501.tar.gz
Removed unneccessary overhead
-rw-r--r--.gitignore2
-rw-r--r--modules/base.py9
-rw-r--r--modules/general.py214
-rw-r--r--xembu.py131
-rw-r--r--xembu_testdata/testdata.csv2
5 files changed, 129 insertions, 229 deletions
diff --git a/.gitignore b/.gitignore
index 9f4c102..b82580a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+work
+out.pdf
# Created by https://www.toptal.com/developers/gitignore/api/python,vim,linux,windows,macos
# Edit at https://www.toptal.com/developers/gitignore?templates=python,vim,linux,windows,macos
diff --git a/modules/base.py b/modules/base.py
index 7d04644..6b32562 100644
--- a/modules/base.py
+++ b/modules/base.py
@@ -15,9 +15,6 @@ class Frame:
@dataclass
class BigFrame:
- """
- Nimmt eine halbe PDF-Seite ein (Renderer packt 2 BigFrames pro Seite).
- """
title: str
def render(self, ax: Axes, mono_font: FontProperties) -> None:
@@ -25,10 +22,10 @@ class BigFrame:
@dataclass
class ModuleResult:
- summary_text: str # NEU: wird im Hauptprogramm in die Konsole gedruckt
- frames: List[Frame] # Kacheln (optional)
+ summary_text: str
+ frames: List[Frame]
bigframes: List[BigFrame]
- pages: List[plt.Figure] # Vollseiten (optional)
+ pages: List[plt.Figure]
class Module(Protocol):
diff --git a/modules/general.py b/modules/general.py
index f7eca38..3ad587a 100644
--- a/modules/general.py
+++ b/modules/general.py
@@ -12,57 +12,42 @@ from typing import Optional
from .base import Frame, BigFrame, ModuleResult
-
MONEY_UNITS = {"€", "eur", "EUR", "euro", "EURO"}
-
def _is_money_unit(u: str) -> bool:
return str(u).strip() in MONEY_UNITS
-
def compute_group_distribution(df: pd.DataFrame):
- """
- Liefert:
- group_summary: dict group -> info
- per_person: DataFrame columns [person, contributed, share, balance]
- per_group_person: DataFrame detail columns [group, person, contributed, usage, share, balance]
- """
- # Explode Gruppen
work = df.copy()
work = work.explode("dist_groups")
work["group"] = work["dist_groups"].fillna("").astype(str).str.strip()
work = work[work["group"] != ""]
- # C/U Normalisierung
- work["flag"] = work["Distributionsflag"].astype(str).str.strip().str.upper()
- work["person"] = work["Nutzer"].astype(str).str.strip()
+ work["flag"] = work["group_flag"].astype(str).str.strip().str.upper()
+ work["debitor"] = work["debitor"].astype(str).str.strip()
- # Contributions (Geld)
contrib = work[work["flag"] == "C"].copy()
if len(contrib) > 0:
bad_units = contrib[~contrib["unit"].apply(_is_money_unit)]
if len(bad_units) > 0:
raise ValueError(
"Contribution (C) muss Geld-Einheit haben (z.B. € / EUR). "
- f"Problemzeilen:\n{bad_units[['Datum','Nutzer','group','Positionsbezeichnung','Positionswert','unit']]}"
+ f"Problemzeilen:\n{bad_units[['date','debitor','group','position','val','unit']]}"
)
- # Usage (Beliebige Einheit, pro Gruppe sollte es sinnvoll einheitlich sein)
usage = work[work["flag"] == "U"].copy()
- # Summen
- contrib_by_gp = contrib.groupby(["group", "person"])["value"].sum().rename("contributed").reset_index()
- contrib_tot = contrib.groupby("group")["value"].sum().rename("total_contrib").reset_index()
+ contrib_by_gp = contrib.groupby(["group", "debitor"])["val"].sum().rename("contributed").reset_index()
+ contrib_tot = contrib.groupby("group")["val"].sum().rename("total_contrib").reset_index()
- usage_by_gp = usage.groupby(["group", "person"])["value"].sum().rename("usage").reset_index()
- usage_tot = usage.groupby("group")["value"].sum().rename("total_usage").reset_index()
+ usage_by_gp = usage.groupby(["group", "debitor"])["val"].sum().rename("usage").reset_index()
+ usage_tot = usage.groupby("group")["val"].sum().rename("total_usage").reset_index()
usage_unit = usage.groupby("group")["unit"].agg(lambda s: s.dropna().astype(str).unique().tolist()).reset_index()
usage_unit = usage_unit.rename(columns={"unit": "usage_units"})
- participants = work.groupby("group")["person"].agg(lambda s: sorted(set(s.tolist()))).reset_index()
- participants = participants.rename(columns={"person": "participants"})
+ participants = work.groupby("group")["debitor"].agg(lambda s: sorted(set(s.tolist()))).reset_index()
+ participants = participants.rename(columns={"debitor": "participants"})
- # group_summary
summary = (
participants.merge(contrib_tot, on="group", how="left")
.merge(usage_tot, on="group", how="left")
@@ -73,22 +58,19 @@ def compute_group_distribution(df: pd.DataFrame):
summary["has_usage"] = summary["total_usage"].apply(lambda x: x > 0)
summary["mode"] = summary.apply(lambda r: "usage" if r["has_usage"] else "equal", axis=1)
- # Detail pro (group, person)
detail = (
pd.DataFrame({"group": work["group"].unique()})
.assign(key=1)
- .merge(pd.DataFrame({"person": work["person"].unique()}).assign(key=1), on="key")
+ .merge(pd.DataFrame({"debitor": work["debitor"].unique()}).assign(key=1), on="key")
.drop(columns=["key"])
)
- # Nur relevante Paare, die in der Gruppe vorkommen
- gp_person = work[["group", "person"]].drop_duplicates()
- detail = detail.merge(gp_person, on=["group", "person"], how="inner")
+ gp_debitor = work[["group", "debitor"]].drop_duplicates()
+ detail = detail.merge(gp_debitor, on=["group", "debitor"], how="inner")
- detail = detail.merge(contrib_by_gp, on=["group", "person"], how="left").merge(usage_by_gp, on=["group", "person"], how="left")
+ detail = detail.merge(contrib_by_gp, on=["group", "debitor"], how="left").merge(usage_by_gp, on=["group", "debitor"], how="left")
detail["contributed"] = detail["contributed"].fillna(0.0)
detail["usage"] = detail["usage"].fillna(0.0)
- # Shares berechnen pro Gruppe
shares = []
for _, row in summary.iterrows():
g = row["group"]
@@ -97,7 +79,6 @@ def compute_group_distribution(df: pd.DataFrame):
n = len(parts) if parts else 0
g_detail = detail[detail["group"] == g].copy()
- # usage-mode, sobald es irgendeine U-Position gibt (auch wenn total_usage==0 → fallback)
g_has_any_u = (usage["group"] == g).any()
if g_has_any_u:
@@ -114,25 +95,22 @@ def compute_group_distribution(df: pd.DataFrame):
mode = "equal"
g_detail["mode"] = mode
- shares.append(g_detail[["group", "person", "share", "mode"]])
+ shares.append(g_detail[["group", "debitor", "share", "mode"]])
- shares_df = pd.concat(shares, ignore_index=True) if shares else pd.DataFrame(columns=["group","person","share","mode"])
- detail = detail.merge(shares_df, on=["group", "person"], how="left")
+ shares_df = pd.concat(shares, ignore_index=True) if shares else pd.DataFrame(columns=["group","debitor","share","mode"])
+ detail = detail.merge(shares_df, on=["group", "debitor"], how="left")
detail["share"] = detail["share"].fillna(0.0)
detail["balance"] = detail["contributed"] - detail["share"]
- # per_person totals
- per_person = detail.groupby("person")[["contributed", "share", "balance"]].sum().reset_index()
- per_person = per_person.sort_values("person")
+ per_debitor = detail.groupby("debitor")[["contributed", "share", "balance"]].sum().reset_index()
+ per_debitor = per_debitor.sort_values("debitor")
- # summary erweitern
- # "Sobald es eine Position mit U gibt" zählt, auch wenn total_usage==0 (fallback)
has_any_u = usage.groupby("group").size().rename("u_count").reset_index()
summary = summary.merge(has_any_u, on="group", how="left")
summary["u_count"] = summary["u_count"].fillna(0).astype(int)
summary["mode"] = summary["u_count"].apply(lambda c: "usage" if c > 0 else "equal")
- return summary, per_person, detail
+ return summary, per_debitor, detail
@dataclass
class GroupTimeSeries:
@@ -142,14 +120,13 @@ class GroupTimeSeries:
usage_units: List[str]
xlim_start: pd.Timestamp
xlim_end: pd.Timestamp
- contrib_cum: Dict[str, pd.Series] # € kumulativ
- usage_cum: Dict[str, pd.Series] # unit kumulativ (z.B. km, stk)
- share_cum: Dict[str, pd.Series] # € kumulativ (Anteil)
- ratio: Dict[str, pd.Series] # Anteil/Ausgelegt
+ contrib_cum: Dict[str, pd.Series]
+ usage_cum: Dict[str, pd.Series]
+ share_cum: Dict[str, pd.Series]
+ ratio: Dict[str, pd.Series]
def _auto_time_limits(tmin: pd.Timestamp, tmax: pd.Timestamp) -> tuple[pd.Timestamp, pd.Timestamp]:
- # +/- 5% Intervall, bei 0 Intervall fallback 30 Minuten
dt = tmax - tmin
if dt <= pd.Timedelta(0):
margin = pd.Timedelta(minutes=30)
@@ -159,28 +136,25 @@ def _auto_time_limits(tmin: pd.Timestamp, tmax: pd.Timestamp) -> tuple[pd.Timest
def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTimeSeries]:
- # explode Gruppen und filtere
work = df.copy().explode("dist_groups")
work["group"] = work["dist_groups"].fillna("").astype(str).str.strip()
work = work[work["group"] == group].copy()
- work = work[pd.notna(work["Datum"])]
+ work = work[pd.notna(work["date"])]
if work.empty:
return None
- work["person"] = work["Nutzer"].astype(str).str.strip()
- work["flag"] = work["Distributionsflag"].astype(str).str.strip().str.upper()
+ work["debitor"] = work["debitor"].astype(str).str.strip()
+ work["flag"] = work["group_flag"].astype(str).str.strip().str.upper()
- participants = sorted(work["person"].unique().tolist())
+ participants = sorted(work["debitor"].unique().tolist())
- # timeline: alle Zeitpunkte der Gruppe (unique, sortiert)
- times = pd.DatetimeIndex(sorted(work["Datum"].unique()))
+ times = pd.DatetimeIndex(sorted(work["date"].unique()))
tmin, tmax = times.min(), times.max()
x0, x1 = _auto_time_limits(tmin, tmax)
times = times.union(pd.DatetimeIndex([x0, x1])).sort_values()
- # usage units (kann leer sein, oder mehrere – wir zeigen dann z.B. "km/stk")
usage_units = sorted(
work.loc[work["flag"] == "U", "unit"]
.dropna()
@@ -190,25 +164,21 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
.tolist()
)
- # pro Person: Beiträge (C) und Nutzung (U) als kumulatives step-series auf timeline
contrib_cum: Dict[str, pd.Series] = {}
usage_cum: Dict[str, pd.Series] = {}
for p in participants:
- c = work[(work["person"] == p) & (work["flag"] == "C")].copy()
- u = work[(work["person"] == p) & (work["flag"] == "U")].copy()
+ c = work[(work["debitor"] == p) & (work["flag"] == "C")].copy()
+ u = work[(work["debitor"] == p) & (work["flag"] == "U")].copy()
- # Beiträge: nach Datum aggregieren, reindex auf timeline, kumulieren
- c_by_t = c.groupby("Datum")["value"].sum() if not c.empty else pd.Series(dtype=float)
+ c_by_t = c.groupby("date")["val"].sum() if not c.empty else pd.Series(dtype=float)
c_by_t = c_by_t.reindex(times, fill_value=0.0)
contrib_cum[p] = c_by_t.cumsum()
- # Nutzung: nach Datum aggregieren, reindex auf timeline, kumulieren
- u_by_t = u.groupby("Datum")["value"].sum() if not u.empty else pd.Series(dtype=float)
+ u_by_t = u.groupby("date")["val"].sum() if not u.empty else pd.Series(dtype=float)
u_by_t = u_by_t.reindex(times, fill_value=0.0)
usage_cum[p] = u_by_t.cumsum()
- # share über Zeit: kumulative total contributions verteilt
total_contrib = sum((contrib_cum[p] for p in participants), start=pd.Series(0.0, index=times))
total_usage = sum((usage_cum[p] for p in participants), start=pd.Series(0.0, index=times))
@@ -217,9 +187,7 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
share_cum: Dict[str, pd.Series] = {}
if has_any_u:
- # usage-mode sobald U existiert; solange total_usage==0 => equal fallback
for p in participants:
- # share = total_contrib * usage_p / total_usage, sonst total_contrib/n
usage_p = usage_cum[p]
with np.errstate(divide="ignore", invalid="ignore"):
share_usage = total_contrib * (usage_p / total_usage.replace(0.0, np.nan))
@@ -227,7 +195,6 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
share = share_usage.where(total_usage > 0, share_equal)
share_cum[p] = share.fillna(0.0)
else:
- # equal-mode immer
equal = total_contrib / float(n)
for p in participants:
share_cum[p] = equal
@@ -254,13 +221,6 @@ def _prepare_group_timeseries(df: pd.DataFrame, group: str) -> Optional[GroupTim
@dataclass
class GroupChartBigFrame(BigFrame):
- """
- kind:
- - 'usage_cum'
- - 'contrib_cum'
- - 'share_cum'
- - 'ratio'
- """
gts: GroupTimeSeries
kind: str
@@ -271,48 +231,45 @@ class GroupChartBigFrame(BigFrame):
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
- ax.xaxis.get_offset_text().set_visible(False) # <-- "2025-Dec" weg
+ ax.xaxis.get_offset_text().set_visible(False)
ax.set_xlim(self.gts.xlim_start, self.gts.xlim_end)
if self.kind == "usage_cum":
series_map = self.gts.usage_cum
unit = "/".join(self.gts.usage_units) if self.gts.usage_units else ""
- ax.set_ylabel(f"Verbrauch kumulativ {unit}".strip(), fontproperties=mono_font)
+ ax.set_ylabel(f"Usage cumulative {unit}".strip(), fontproperties=mono_font)
elif self.kind == "contrib_cum":
series_map = self.gts.contrib_cum
- ax.set_ylabel("Contributions kumulativ €", fontproperties=mono_font)
+ ax.set_ylabel("Contribution cumulative €", fontproperties=mono_font)
elif self.kind == "share_cum":
series_map = self.gts.share_cum
- ax.set_ylabel("Anteil kumulativ €", fontproperties=mono_font)
+ ax.set_ylabel("Share cumulative €", fontproperties=mono_font)
elif self.kind == "ratio":
series_map = self.gts.ratio
- ax.set_ylabel("Anteil / Ausgelegt", fontproperties=mono_font)
- ax.set_yscale("log") # <-- LOG
+ ax.set_ylabel("Share / Contribution ratio (logarithmic)", fontproperties=mono_font)
+ ax.set_yscale("log")
else:
raise ValueError(f"Unknown kind: {self.kind}")
- # Plot + Sammeln für robuste y-Limits
all_vals = []
- min_ratio = 1e-3 # „quasi 0“ für log, damit Kurven am Anfang nicht "mittendrin" starten
+ min_ratio = 1e-3
for p in self.gts.participants:
y = series_map[p].copy()
if self.kind == "ratio":
- # NaN/0/Inf behandeln, damit die Kurve von Anfang an existiert
y = y.replace([np.inf, -np.inf], np.nan)
y = y.fillna(min_ratio)
y = y.clip(lower=min_ratio)
else:
y = y.replace([np.inf, -np.inf], np.nan).fillna(0.0)
- # Steps für kumulative Kurven ist meist sauberer
ax.plot(self.gts.times, y.values, label=p, linewidth=1, drawstyle="steps-post")
v = y.values
@@ -320,7 +277,6 @@ class GroupChartBigFrame(BigFrame):
if v.size:
all_vals.append(v)
- # y-Limits so setzen, dass wirklich ALLE Werte sichtbar sind
if all_vals:
vv = np.concatenate(all_vals)
@@ -329,14 +285,14 @@ class GroupChartBigFrame(BigFrame):
if vmax <= 0:
ax.set_ylim(0, 1)
else:
- ax.set_ylim(0, vmax * 1.08) # kleiner Puffer
+ ax.set_ylim(0, vmax * 1.08)
elif self.kind == "ratio":
vpos = vv[vv > 0]
if vpos.size:
vmin = float(np.nanmin(vpos))
vmax = float(np.nanmax(vpos))
- ax.set_ylim(vmin / 1.5, vmax * 1.5) # log: multiplicative padding
+ ax.set_ylim(vmin / 1.5, vmax * 1.5)
ax.grid(True, alpha=0.2)
@@ -345,7 +301,6 @@ class GroupChartBigFrame(BigFrame):
for t in leg.get_texts():
t.set_fontproperties(mono_font)
- # Tick-Fonts monospace
for tick in ax.get_xticklabels() + ax.get_yticklabels():
tick.set_fontproperties(mono_font)
@@ -358,12 +313,11 @@ class TextFrame(Frame):
@dataclass
class PlotBigFrame(BigFrame):
- per_person: pd.DataFrame # erwartet Spalten: person, contributed, share
+ per_debitor: pd.DataFrame
def render(self, ax: Axes, mono_font: FontProperties) -> None:
- # Axes ist schon da, wir zeichnen direkt hinein
ax.axis("on")
- plot_df = self.per_person.set_index("person")[["contributed", "share"]]
+ plot_df = self.per_debitor.set_index("debitor")[["contributed", "share"]]
plot_df.plot.bar(ax=ax)
ax.tick_params(axis="x", rotation=0)
leg = ax.legend(prop=mono_font)
@@ -386,38 +340,37 @@ class GeneralModule:
mono_font = context.get("mono_font") or FontProperties(family="DejaVu Sans Mono", size=8)
- group_summary, per_person, detail = compute_group_distribution(df)
+ group_summary, per_debitor, detail = compute_group_distribution(df)
- balance = {r["person"]: float(r["balance"]) for _, r in per_person.iterrows()}
+ balance = {r["debitor"]: float(r["balance"]) for _, r in per_debitor.iterrows()}
payments = self._minimize_payments(balance)
- # ---- NEU: Textauswertung für Konsole
summary_lines = []
- summary_lines.append("General – Verteilung über Distributionsgruppen")
+ summary_lines.append("General")
summary_lines.append("")
- summary_lines.append("Gruppen:")
+ summary_lines.append("Goups:")
for _, r in group_summary.sort_values("group").iterrows():
g = r["group"]
total_c = float(r.get("total_contrib", 0.0))
u_count = int(r.get("u_count", 0))
mode = "usage" if u_count > 0 else "equal"
participants = r.get("participants", []) or []
- summary_lines.append(f" - {g}: {total_c:.2f} €; mode={mode}; teilnehmer={len(participants)}")
+ summary_lines.append(f" - {g}: {total_c:.2f} €; mode={mode}; participants={len(participants)}")
summary_lines.append("")
- summary_lines.append("Personen (Summe über alle Gruppen):")
- for _, r in per_person.sort_values("person").iterrows():
+ summary_lines.append("Debitors (total):")
+ for _, r in per_debitor.sort_values("debitor").iterrows():
summary_lines.append(
- f" - {r['person']}: ausgelegt={r['contributed']:.2f} €; anteil={r['share']:.2f} €; saldo={r['balance']:.2f} €"
+ f" - {r['debitor']}: contributed={r['contributed']:.2f} €; share={r['share']:.2f} €; balance={r['balance']:.2f} €"
)
summary_lines.append("")
- summary_lines.append("Ausgleich (minimiert):")
+ summary_lines.append("Compensation (minimized):")
if payments:
for p, r, a in payments:
summary_lines.append(f" - {p} → {r}: {a:.2f} €")
else:
- summary_lines.append(" (keine Zahlungen nötig)")
+ summary_lines.append(" (No compensation required)")
summary_text = "\n".join(summary_lines)
@@ -426,45 +379,42 @@ class GeneralModule:
pages: List[plt.Figure] = []
if want_pdf:
- frames.extend(self._make_frames(group_summary, per_person, payments))
+ frames.extend(self._make_frames(group_summary, per_debitor, payments))
- # BigFrame: Gesamt-Balkenplot bleibt (wie vorher)
bigframes.append(
PlotBigFrame(
- title="General – Ausgelegt vs Anteil (Summe über Gruppen)",
- per_person=per_person.copy(),
+ title="General – Shares vs. Contributions (total)",
+ per_debitor=per_debitor.copy(),
)
)
- # NEU: pro Distributionsgruppe 4 BigFrame-Charts
for g in sorted(group_summary["group"].unique().tolist()):
gts = _prepare_group_timeseries(df, g)
if not gts:
continue
bigframes.append(GroupChartBigFrame(
- title=f"{g} – Kumulativer Verbrauch pro Person",
+ title=f"{g} – Cumulative usage per debitor",
gts=gts,
kind="usage_cum",
))
bigframes.append(GroupChartBigFrame(
- title=f"{g} – Kumulative Contributions pro Person",
+ title=f"{g} – Cumulative contributions per debitor",
gts=gts,
kind="contrib_cum",
))
bigframes.append(GroupChartBigFrame(
- title=f"{g} – Anteil pro Person (zeitlicher Verlauf)",
+ title=f"{g} – Share per debitor",
gts=gts,
kind="share_cum",
))
bigframes.append(GroupChartBigFrame(
- title=f"{g} – Verhältnis Anteil/Ausgelegt (zeitlicher Verlauf)",
+ title=f"{g} – Share / Contribution ratio (logarithmic)",
gts=gts,
kind="ratio",
))
- # Pages: nur noch Detailseiten, keine Balkenplot-Seite mehr
- pages.extend(self._make_pages(group_summary, per_person, detail, mono_font))
+ pages.extend(self._make_pages(group_summary, per_debitor, detail, mono_font))
return ModuleResult(summary_text=summary_text, frames=frames, bigframes=bigframes, pages=pages)
@@ -493,57 +443,52 @@ class GeneralModule:
j += 1
return out
- def _make_frames(self, group_summary: pd.DataFrame, per_person: pd.DataFrame, payments: List[Tuple[str,str,float]]) -> List[Frame]:
- # Frame 1: Gruppen-Übersicht
- lines = ["Gruppenübersicht:"]
+ def _make_frames(self, group_summary: pd.DataFrame, per_debitor: pd.DataFrame, payments: List[Tuple[str,str,float]]) -> List[Frame]:
+ lines = ["Groups:"]
for _, r in group_summary.sort_values("group").iterrows():
g = r["group"]
total_c = float(r.get("total_contrib", 0.0))
u_count = int(r.get("u_count", 0))
parts = r.get("participants", [])
mode = "usage" if u_count > 0 else "equal"
- lines.append(f"- {g}: {total_c:.2f} €; mode={mode}; teilnehmer={len(parts)}")
+ lines.append(f"- {g}: {total_c:.2f} €; mode={mode}; participants={len(parts)}")
- f1 = TextFrame(title="General: Gruppen", text="\n".join(lines))
+ f1 = TextFrame(title="General: Groups", text="\n".join(lines))
- # Frame 2: Personen-Totale
- lines = ["Personen (Summe über alle Gruppen):", "Person | contributed | share | balance"]
- for _, r in per_person.iterrows():
- lines.append(f"{r['person']}: {r['contributed']:.2f} €; {r['share']:.2f} €; {r['balance']:.2f} €")
- f2 = TextFrame(title="General: Personen", text="\n".join(lines))
+ lines = ["Debitor total:", "debitor | contributed | share | balance"]
+ for _, r in per_debitor.iterrows():
+ lines.append(f"{r['debitor']} | {r['contributed']:.2f} € | {r['share']:.2f} € | {r['balance']:.2f} €")
+ f2 = TextFrame(title="General: Debitors", text="\n".join(lines))
- # Frame 3: Ausgleich
- lines = ["Ausgleich (minimiert):"]
+ lines = ["Compensation (minimized):"]
if payments:
for p, r, a in payments:
lines.append(f"{p} → {r}: {a:.2f} €")
else:
- lines.append("(keine Zahlungen nötig)")
- f3 = TextFrame(title="General: Ausgleich", text="\n".join(lines))
+ lines.append("(No compensation required)")
+ f3 = TextFrame(title="General: Compensation", text="\n".join(lines))
return [f1, f2, f3]
- def _make_pages(self, group_summary, per_person, detail, mono_font) -> List[plt.Figure]:
+ def _make_pages(self, group_summary, per_debitor, detail, mono_font) -> List[plt.Figure]:
pages: List[plt.Figure] = []
- # Textseiten: pro Gruppe Detail (ggf. mehrere)
- # Wir machen je Gruppe eine Seite, wenn es nicht zu viele sind
for g in sorted(detail["group"].unique().tolist()):
- gdet = detail[detail["group"] == g].sort_values("person")
+ gdet = detail[detail["group"] == g].sort_values("debitor")
total_c = float(group_summary[group_summary["group"] == g]["total_contrib"].iloc[0]) if (group_summary["group"] == g).any() else 0.0
u_count = int(group_summary[group_summary["group"] == g]["u_count"].iloc[0]) if (group_summary["group"] == g).any() else 0
mode = "usage" if u_count > 0 else "equal"
lines = [
- f"Gruppe: {g}",
+ f"Group: {g}",
f"Total Contribution: {total_c:.2f} €",
f"Mode: {mode}",
"",
- "Person | contributed | usage | share | balance",
+ "debitor | contributed | usage | share | balance",
]
for _, r in gdet.iterrows():
lines.append(
- f"{r['person']}: {r['contributed']:.2f} €; {r['usage']:.4f}; {r['share']:.2f} €; {r['balance']:.2f} €"
+ f"{r['debitor']} | {r['contributed']:.2f} € | {r['usage']:.4f} | {r['share']:.2f} € | {r['balance']:.2f} €"
)
fig, ax = plt.subplots(figsize=(8.27, 11.69))
@@ -551,10 +496,5 @@ class GeneralModule:
ax.text(0, 1, "\n".join(lines), va="top", ha="left", fontproperties=mono_font)
pages.append(fig)
- # Optional: Nutzungsverläufe für Gruppen mit unit "km"
- # (nur wenn U vorhanden und unit in den U-rows km ist)
- # Dafür brauchen wir zeitliche Daten → aus detail nicht möglich, also direkt aus df wäre besser.
- # Wenn du willst, ergänze ich das als eigene Seite pro km-Gruppe auf Basis der Original-DF.
-
return pages
diff --git a/xembu.py b/xembu.py
index 0ae60e6..02f1132 100644
--- a/xembu.py
+++ b/xembu.py
@@ -16,18 +16,17 @@ from modules.general import GeneralModule
from datetime import datetime
CSV_COLUMNS = [
- "Datum",
- "Nutzer",
- "Distributionsgruppe",
- "Distributionsflag",
- "Positionsbezeichnung",
- "Positionswert",
- "Modules",
- "Parameters",
- "Beleg",
+ "date",
+ "debitor",
+ "group",
+ "group_flag",
+ "position",
+ "value",
+ "modules",
+ "parameters",
+ "receipt",
]
-
def _pick_mono_font(size: int = 8) -> font_manager.FontProperties:
for fam in ["Inconsolata", "DejaVu Sans Mono", "monospace"]:
try:
@@ -37,13 +36,11 @@ def _pick_mono_font(size: int = 8) -> font_manager.FontProperties:
return font_manager.FontProperties(size=size)
def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, total_pages: int):
- # Margins: links/rechts 2cm, oben/unten 1cm
margin_lr_cm = 2.0
margin_tb_cm = 1.0
- # Zusätzlicher Abstand (Bänder) zwischen Header/Footer und Content
- header_gap_cm = 1.3 # mehr Abstand nach unten
- footer_gap_cm = 2.0 # mehr Abstand nach oben (2-zeiliger Footer)
+ header_gap_cm = 1.3
+ footer_gap_cm = 2.0
cm_to_in = 1 / 2.54
margin_lr_in = margin_lr_cm * cm_to_in
@@ -58,27 +55,22 @@ def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, t
header_gap = header_gap_in / h_in
footer_gap = footer_gap_in / h_in
- # Content-Bereich: innerhalb der Margins + zusätzlich Platz für Header/Footer
top = 1 - my - header_gap
bottom = my + footer_gap
if top <= bottom:
- # Fallback, falls es zu eng wird
top = 1 - my
bottom = my
fig.subplots_adjust(left=mx, right=1 - mx, top=top, bottom=bottom)
- # Header/Footer Positionen: jeweils an der inneren Kante der Margins
left_x = mx
right_x = 1 - mx
header_y = 1 - my
footer_y = my
- # Kopfzeile
fig.text(left_x, header_y, title, ha="left", va="top", fontproperties=mono_font, fontsize=9)
fig.text(right_x, header_y, generated_at, ha="right", va="top", fontproperties=mono_font, fontsize=9)
- # Fußzeile links (zweizeilig)
footer_left = (
"xembu - eXtensible Event-based Multiuser Bookkeeping Utility\n"
"Copyright (C) 2024 Leonard Kugis\n"
@@ -87,7 +79,6 @@ def _decorate_figure(fig, mono_font, title: str, generated_at: str, page: int, t
fig.text(left_x, footer_y, footer_left, ha="left", va="bottom",
fontproperties=mono_font, fontsize=7, linespacing=1.1)
- # Fußzeile rechts
fig.text(right_x, footer_y, f"{page} / {total_pages}", ha="right", va="bottom",
fontproperties=mono_font, fontsize=8)
@@ -112,21 +103,18 @@ def parse_value_unit(s: str):
num_str = " ".join(parts[:-1]).strip().replace(",", ".").replace("€", "").strip()
return float(num_str), unit
-
def parse_modules_list(s: str) -> List[str]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return []
mods = [m.strip() for m in str(s).split(",")]
return [m for m in mods if m]
-
def parse_groups_list(s: str) -> List[str]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return []
gs = [g.strip() for g in str(s).split(",")]
return [g for g in gs if g]
-
def parse_parameters_list(s: str) -> List[tuple]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return []
@@ -157,24 +145,23 @@ def parse_parameters_list(s: str) -> List[tuple]:
tuples.append(tuple(vals))
return tuples
-
def parse_csv(path: str) -> pd.DataFrame:
df = _read_csv_flexible(path)
- df["Datum"] = pd.to_datetime(df["Datum"], format="%Y-%m-%d-%H-%M-%S", errors="coerce")
- df["Nutzer"] = df["Nutzer"].astype(str).str.strip()
- df["Distributionsflag"] = df["Distributionsflag"].astype(str).str.strip().str.upper()
- df["Positionsbezeichnung"] = df["Positionsbezeichnung"].astype(str).str.strip()
+ df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d-%H-%M-%S", errors="coerce")
+ df["debitor"] = df["debitor"].astype(str).str.strip()
+ df["group_flag"] = df["group_flag"].astype(str).str.strip().str.upper()
+ df["position"] = df["position"].astype(str).str.strip()
- df["dist_groups"] = df["Distributionsgruppe"].apply(parse_groups_list)
- df["modules_list"] = df["Modules"].apply(parse_modules_list)
- df["params_list"] = df["Parameters"].apply(parse_parameters_list)
+ df["dist_groups"] = df["group"].apply(parse_groups_list)
+ df["modules_list"] = df["modules"].apply(parse_modules_list)
+ df["params_list"] = df["parameters"].apply(parse_parameters_list)
- vals_units = df["Positionswert"].apply(parse_value_unit)
- df["value"] = vals_units.apply(lambda x: x[0])
+ vals_units = df["value"].apply(parse_value_unit)
+ df["val"] = vals_units.apply(lambda x: x[0])
df["unit"] = vals_units.apply(lambda x: x[1])
- df["Beleg"] = df["Beleg"].where(df["Beleg"].notna(), "")
+ df["receipt"] = df["receipt"].where(df["receipt"].notna(), "")
return df
@@ -193,32 +180,32 @@ def _build_positions_table_figs(df: pd.DataFrame, base_dir: str, mono_font):
figures = []
columns = [
- "Datum", "Nutzer", "Distributionsgruppe", "Flag",
- "Positionsbezeichnung", "Positionswert",
- "Modules", "Parameters", "Beleg", "SHA1",
+ "Date", "Debitor", "Group", "Flag",
+ "Position", "Value",
+ "Modules", "Parameters", "Receipt", "SHA1",
]
table_data = []
- for _, row in df.sort_values("Datum").iterrows():
- sha1 = compute_hash(str(row["Beleg"]), base_dir=base_dir) if row["Beleg"] else None
+ for _, row in df.sort_values("date").iterrows():
+ sha1 = compute_hash(str(row["receipt"]), base_dir=base_dir) if row["receipt"] else None
sha1_fmt = ""
if sha1:
sha1_fmt = sha1[: len(sha1) // 2] + "\n" + sha1[len(sha1) // 2 :]
- groups_str = ", ".join(row["dist_groups"]) if isinstance(row["dist_groups"], list) else str(row["Distributionsgruppe"])
- mods_str = ", ".join(row["modules_list"]) if isinstance(row["modules_list"], list) else str(row["Modules"])
- params_str = str(row["params_list"]) if isinstance(row["params_list"], list) else str(row["Parameters"])
+ groups_str = ", ".join(row["dist_groups"]) if isinstance(row["dist_groups"], list) else str(row["group"])
+ mods_str = ", ".join(row["modules_list"]) if isinstance(row["modules_list"], list) else str(row["modules"])
+ params_str = str(row["params_list"]) if isinstance(row["params_list"], list) else str(row["parameters"])
table_data.append([
- row["Datum"].strftime("%Y-%m-%d %H:%M:%S") if pd.notna(row["Datum"]) else "INVALID",
- row["Nutzer"],
+ row["date"].strftime("%Y-%m-%d %H:%M:%S") if pd.notna(row["date"]) else "INVALID",
+ row["debitor"],
groups_str,
- row["Distributionsflag"],
- row["Positionsbezeichnung"],
- f"{row['value']:.4f} {row['unit']}".strip(),
+ row["group_flag"],
+ row["position"],
+ f"{row['val']:.4f} {row['unit']}".strip(),
mods_str,
params_str,
- str(row["Beleg"]) if row["Beleg"] else "",
+ str(row["receipt"]) if row["receipt"] else "",
sha1_fmt,
])
@@ -265,7 +252,6 @@ def _build_positions_table_figs(df: pd.DataFrame, base_dir: str, mono_font):
return figures
-
def _separator_page(pdf: PdfPages, title: str, mono_font):
fig, ax = plt.subplots(figsize=(8.27, 11.69))
ax.axis("off")
@@ -338,16 +324,14 @@ def create_pdf(
generated_at = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
- # 1) Alle Seiten als Figures sammeln (damit wir total_pages kennen)
figs: List[plt.Figure] = []
figs.extend(_build_positions_table_figs(df, base_dir=base_dir, mono_font=mono_font))
figs.extend(_build_frame_figs(module_frames, mono_font=mono_font))
figs.extend(_build_bigframe_figs(module_bigframes, mono_font=mono_font))
- figs.extend(module_pages) # bereits fertige Figures aus Modulen
+ figs.extend(module_pages)
total_pages = len(figs)
- # 2) Speichern mit Header/Footer + Seitenzählung
with PdfPages(pdf_path) as pdf:
for i, fig in enumerate(figs, start=1):
_decorate_figure(fig, mono_font, title=title, generated_at=generated_at, page=i, total_pages=total_pages)
@@ -355,50 +339,37 @@ def create_pdf(
plt.close(fig)
def create_bundle(archive_path: str, csv_path: str, df: pd.DataFrame, base_dir: str, pdf_path: Optional[str] = None):
- """
- Bundle enthält: CSV, optional PDF, und alle Belege (relative Pfade aus 'Beleg' relativ zu base_dir).
- Ausgabe: .tar.zst (über externes zstd).
- """
os.makedirs(os.path.dirname(os.path.abspath(archive_path)) or ".", exist_ok=True)
- # Wir bauen ein temporäres .tar daneben und komprimieren danach.
tar_path = archive_path
if tar_path.endswith(".zst"):
- tar_path = tar_path[:-4] # strip ".zst"
+ tar_path = tar_path[:-4]
if not tar_path.endswith(".tar"):
tar_path = tar_path + ".tar"
- # Sammle Belege
beleg_paths = []
- for p in df["Beleg"].astype(str).tolist():
+ for p in df["receipt"].astype(str).tolist():
p = p.strip()
if p:
beleg_paths.append(p)
with tarfile.open(tar_path, "w") as tar:
- # CSV
tar.add(csv_path, arcname=os.path.basename(csv_path))
- # PDF optional
if pdf_path and os.path.exists(pdf_path):
tar.add(pdf_path, arcname=os.path.basename(pdf_path))
- # Belege
missing = []
for rel in sorted(set(beleg_paths)):
abs_path = rel if os.path.isabs(rel) else os.path.join(base_dir, rel)
if os.path.exists(abs_path):
- # arcname: möglichst den relativen Pfad behalten
arcname = os.path.basename(rel) if os.path.isabs(rel) else rel
tar.add(abs_path, arcname=arcname)
else:
missing.append(rel)
- # zstd komprimieren → archive_path
- # zstd -o <archive> <tar>
subprocess.run(["zstd", "-T0", "-o", archive_path, tar_path], check=True)
- # tar löschen (zstd bekommt eine Kopie)
try:
os.remove(tar_path)
except Exception:
@@ -409,13 +380,12 @@ def create_bundle(archive_path: str, csv_path: str, df: pd.DataFrame, base_dir:
for m in missing:
print(f" - {m}")
-
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("csv", help="Pfad zur CSV-Datei")
- parser.add_argument("--title", "-t", help="Titel für PDF-Kopfzeile (optional)")
- parser.add_argument("--pdf", "-p", help="Pfad zur Ziel-PDF (optional)")
- parser.add_argument("--bundle", "-b", help="Pfad zum Bundle (.tar.zst), enthält CSV, PDF (falls erzeugt) und Belege (optional)")
+ parser.add_argument("csv", help="CSV path")
+ parser.add_argument("--title", "-t", help="PDF header title (optional)")
+ parser.add_argument("--pdf", "-p", help="PDF path (optional)")
+ parser.add_argument("--bundle", "-b", help="Path to bundle (.tar.zst), containing CSV, PDF and receipts (optional)")
args = parser.parse_args()
csv_path = os.path.abspath(args.csv)
@@ -424,20 +394,17 @@ def main():
title = args.title if args.title else os.path.basename(csv_path)
df = parse_csv(csv_path)
- if df["Datum"].isna().any():
- bad = df[df["Datum"].isna()][CSV_COLUMNS]
+ if df["date"].isna().any():
+ bad = df[df["date"].isna()][CSV_COLUMNS]
raise ValueError(f"Ungültige Datumsangaben in folgenden Zeilen:\n{bad}")
want_pdf = bool(args.pdf)
mono_font = _pick_mono_font(size=8)
- # Module-Registry
modules: Dict[str, Module] = {
"general": GeneralModule(),
- # weitere Module später hier registrieren
}
- # Modulzuordnung aus CSV
rows_for_module: Dict[str, List[int]] = {}
for idx, row in df.iterrows():
for m in row["modules_list"]:
@@ -445,10 +412,8 @@ def main():
results: List[ModuleResult] = []
- # General immer
results.append(modules["general"].process(df, context={"base_dir": base_dir, "want_pdf": want_pdf, "mono_font": mono_font}))
- # weitere Module optional
for mod_name, indices in rows_for_module.items():
if mod_name == "general":
continue
@@ -459,31 +424,27 @@ def main():
subdf = df.loc[indices].copy()
results.append(mod.process(subdf, context={"base_dir": base_dir, "want_pdf": want_pdf, "mono_font": mono_font}))
- # ---- NEU: Konsolen-Auswertung je Modul
print("\n===== Auswertung =====")
for r in results:
print(r.summary_text)
print("")
- # PDF optional
if args.pdf:
module_frames: List[Frame] = []
- module_bigframes: List[BigFrame] = [] # NEU
+ module_bigframes: List[BigFrame] = []
module_pages: List[plt.Figure] = []
for r in results:
module_frames.extend(r.frames)
- module_bigframes.extend(r.bigframes) # NEU
+ module_bigframes.extend(r.bigframes)
module_pages.extend(r.pages)
create_pdf(df, module_frames, module_bigframes, module_pages, args.pdf, mono_font, base_dir=base_dir, title=title)
print(f"[OK] PDF geschrieben: {args.pdf}")
- # Bundle optional (enthält CSV + ggf. PDF + Belege)
if args.bundle:
create_bundle(args.bundle, csv_path, df, base_dir=base_dir, pdf_path=args.pdf if args.pdf else None)
print(f"[OK] Bundle geschrieben: {args.bundle}")
-
if __name__ == "__main__":
main()
diff --git a/xembu_testdata/testdata.csv b/xembu_testdata/testdata.csv
index c110972..90ec3bb 100644
--- a/xembu_testdata/testdata.csv
+++ b/xembu_testdata/testdata.csv
@@ -1,4 +1,4 @@
-Datum;Nutzer;Distributionsgruppe;Distributionsflag;Positionsbezeichnung;Positionswert;Modules;Parameters;Beleg
+date;debitor;group;group_flag;position;value;modules;parameters;receipt
2025-12-02-02-17-57;Dana;KFZ;U;Autofahrt;20.2 km;;;belege/beleg058.txt
2025-12-02-02-30-53;Leo;General;C;Putzzeug;14.68 €;;;belege/beleg011.txt
2025-12-02-11-26-50;Bene;General;C;Miete Küche;38.39 €;;;belege/beleg036.txt