Files
crm.twinpol.com/modules/EcmInvoiceOuts/invoice_ai_analysis.py

148 lines
5.7 KiB
Python
Raw Permalink Normal View History

2025-08-22 15:56:47 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Invoice AI Analysis end-to-end script (MySQL -> KPIs -> Mix Change -> Anomalies -> HTML)
See previous instructions for usage and requirements.
"""
from __future__ import annotations
import os
import sys
import argparse
from dataclasses import dataclass
from typing import Any, List, Dict
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, text
@dataclass
class Config:
host: str
port: int
user: str
password: str
database: str
date_from: str
date_to: str
doc_type: str
output_html: str
def parse_args() -> Config:
parser = argparse.ArgumentParser(description="Invoice AI Analysis (MySQL -> HTML)")
parser.add_argument("--host", default=os.getenv("DB_HOST", "localhost"))
parser.add_argument("--port", default=int(os.getenv("DB_PORT", "3306")), type=int)
parser.add_argument("--user", default=os.getenv("DB_USER", "root"))
parser.add_argument("--password", default=os.getenv("DB_PASS", "rootpassword"))
parser.add_argument("--database", default=os.getenv("DB_NAME", "twinpol-mysql56"))
parser.add_argument("--from", dest="date_from", default="2025-07-01")
parser.add_argument("--to", dest="date_to", default="2025-08-01")
parser.add_argument("--type", dest="doc_type", default="normal")
parser.add_argument("--out", dest="output_html", default="report.html")
args = parser.parse_args()
return Config(
host=args.host, port=args.port, user=args.user, password=args.password,
database=args.database, date_from=args.date_from, date_to=args.date_to,
doc_type=args.doc_type, output_html=args.output_html
)
def get_engine(cfg: Config):
url = f"mysql+pymysql://{cfg.user}:{cfg.password}@{cfg.host}:{cfg.port}/{cfg.database}?charset=utf8mb4"
return create_engine(url, pool_recycle=3600, pool_pre_ping=True, future=True)
def fetch_invoices(engine, cfg: Config) -> pd.DataFrame:
sql = text("""
SELECT i.document_no,
i.parent_name,
DATE(i.register_date) AS register_date,
ii.code,
ii.name,
ii.quantity,
ii.total_netto
FROM ecminvoiceoutitems AS ii
JOIN ecminvoiceouts AS i ON i.id = ii.ecminvoiceout_id
WHERE i.register_date >= :date_from
AND i.register_date < :date_to
AND i.type = :doc_type
""")
with engine.connect() as con:
df = pd.read_sql(sql, con, params={
"date_from": cfg.date_from,
"date_to": cfg.date_to,
"doc_type": cfg.doc_type
})
df["register_date"] = pd.to_datetime(df["register_date"], errors="coerce")
df["quantity"] = pd.to_numeric(df["quantity"], errors="coerce")
df["total_netto"] = pd.to_numeric(df["total_netto"], errors="coerce")
return df.dropna(subset=["register_date", "quantity", "total_netto"])
def compute_kpis(df: pd.DataFrame) -> Dict[str, Any]:
total_revenue = float(df["total_netto"].sum())
total_invoices = int(df["document_no"].nunique())
total_units = float(df["quantity"].sum())
aov = float(total_revenue / total_invoices) if total_invoices else 0.0
top_products = (df.groupby(["code", "name"], as_index=False)
.agg(total_netto=("total_netto", "sum"))
.sort_values("total_netto", ascending=False)
.head(5))
top_customers = (df.groupby(["parent_name"], as_index=False)
.agg(total_netto=("total_netto", "sum"))
.sort_values("total_netto", ascending=False)
.head(5))
return {
"total_revenue": total_revenue,
"total_invoices": total_invoices,
"total_units": total_units,
"aov": aov,
"top_products": top_products,
"top_customers": top_customers,
}
def render_html(cfg: Config, kpis: Dict[str, Any]) -> str:
def fmt_cur(x): return f"{x:,.2f}".replace(",", " ").replace(".", ",")
def table(headers, rows):
th = "".join(f"<th>{h}</th>" for h in headers)
trs = "".join("<tr>" + "".join(f"<td>{v}</td>" for v in row) + "</tr>" for row in rows)
return f"<table><thead><tr>{th}</tr></thead><tbody>{trs}</tbody></table>"
kpi_table = table(["Metryka", "Wartość"], [
["Łączny przychód", f"{fmt_cur(kpis['total_revenue'])} PLN"],
["Liczba faktur", f"{kpis['total_invoices']}"],
["Sprzedane jednostki", f"{int(kpis['total_units']):,}".replace(",", " ")],
["Średnia wartość faktury", f"{fmt_cur(kpis['aov'])} PLN"]
])
prod_table = table(["Kod", "Produkt", "Przychód"], [
[r["code"], r["name"], fmt_cur(r["total_netto"]) + " PLN"]
for _, r in kpis["top_products"].iterrows()
])
cust_table = table(["Klient", "Przychód"], [
[r["parent_name"], fmt_cur(r["total_netto"]) + " PLN"]
for _, r in kpis["top_customers"].iterrows()
])
return f"""<html><head><meta charset="utf-8"><style>
body{{font-family:Arial, sans-serif;margin:20px}}table{{border-collapse:collapse;width:100%}}
th,td{{border:1px solid #ccc;padding:8px;text-align:left}}
th{{background:#f4f4f4}}
</style></head><body>
<h1>Analiza faktur ({cfg.date_from} {cfg.date_to})</h1>
<h2>KPI</h2>{kpi_table}
<h2>Top produkty</h2>{prod_table}
<h2>Top klienci</h2>{cust_table}
</body></html>"""
def main():
cfg = parse_args()
engine = get_engine(cfg)
df = fetch_invoices(engine, cfg)
if df.empty:
print("No data found.")
return
kpis = compute_kpis(df)
html = render_html(cfg, kpis)
with open(cfg.output_html, "w", encoding="utf-8") as f:
f.write(html)
print(f"Report written to {cfg.output_html}")
if __name__ == "__main__":
main()