further prototyping, added first DB interactions

2026-04-23 15:57:39 +02:00
parent e4ebb1ee7f
commit c5aadd502d
12 changed files with 1196 additions and 283 deletions
--- a/prototypes/db_access.py
+++ b/prototypes/db_access.py
@@ -0,0 +1,142 @@
+# %%
+import importlib
+from pathlib import Path
+
+import polars as pl
+import sqlalchemy as sql
+
+from wce_crm import db
+
+importlib.reload(db)
+
+# %%
+PTH_DATA_DB = Path.cwd().parent / "data/db"
+assert PTH_DATA_DB.exists()
+assert PTH_DATA_DB.is_dir()
+# %%
+DB_KL = PTH_DATA_DB / "wce_kontaktliste.db"
+DB_CRM = PTH_DATA_DB / "wce_crm.db"
+assert DB_KL.exists()
+assert DB_CRM.exists()
+# %%
+engine = sql.create_engine(f"sqlite:///{DB_CRM}")
+# %%
+db.df_crm_master
+# %%
+stmt = sql.select(db.ext_crm_master)
+str(stmt.compile(engine))
+df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_master_schema)
+# df = pl.concat([df, df[:2]])
+# %%
+df.select("ma_unternehmensname").is_duplicated().sum()
+# %%
+q = df.lazy()
+counter = pl.int_range(0, pl.len()).over(pl.col.ma_unternehmensname)
+q = q.with_columns(
+    ma_unternehmensname_dedupl=pl.when(counter == 0)
+    .then(pl.col.ma_unternehmensname)
+    .otherwise(pl.format("{} ({})", pl.col.ma_unternehmensname, counter))
+)
+df = q.collect()
+df.select("ma_unternehmensname_dedupl").is_duplicated().sum()
+
+# %%
+# mapping dedupl text to idx
+df.head()
+
+# dict(zip(df["ma_unternehmensname_dedupl"], df["ma_id"]))
+
+# %%
+sub = df[0]
+sub
+# sub.with_columns(
+#     # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein"))
+#     pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep()
+# )
+# %%
+q = (
+    sub.lazy()
+    .with_columns(
+        pl.col(pl.Datetime).dt.to_string("%d.%m.%Y"),
+        pl.col(pl.Date).dt.to_string("%d.%m.%Y"),
+        pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep(),
+    )
+    .with_columns(pl.all().cast(pl.String))
+)
+sub = q.collect()
+sub
+# %%
+df.row(0, named=True)
+
+# %%
+db.df_crm_master.estimated_size("mb")
+
+
+# %%
+# // CRM Nutzer
+stmt = sql.select(db.ext_crm_nutzer).limit(20)
+str(stmt.compile(engine))
+df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_nutzer_schema)
+# %%
+stmt = sql.text("""SELECT ma_unternehmensname, ma_ersteintrag_datum, ma_aktualisierung_datum 
+FROM Master 
+WHERE ma_ersteintrag_datum LIKE '%ff' 
+LIMIT 10;""")
+
+with engine.connect() as con:
+    res = con.execute(stmt)
+
+print(res.fetchall())
+
+# %%
+# ----------------------------------------------------------------
+engine = sql.create_engine(f"sqlite:///{DB_KL}")
+stmt = sql.select(db.ext_kl_unternehmen.c.u_firmenname).limit(20)
+
+with engine.connect() as con:
+    res = con.execute(stmt)
+
+res.scalars().all()
+# %%
+for _ in res.mappings():
+    print(_)
+# %%
+# %%
+stmt = sql.select(db.ext_kl_unternehmen)
+df = pl.read_database(stmt, engine, schema_overrides=db.ext_kl_unternehmen_schema)
+# %%
+df
+# %%
+df.estimated_size("mb")
+# %%
+df.height
+# %%
+db.df_kontaktliste
+# %%
+sub = db.df_kontaktliste.select(["u_id", "u_firmenname"]).lazy()
+# %%
+counter = pl.int_range(0, pl.len()).over(pl.col.u_firmenname)
+sub = sub.with_columns(
+    t=pl.when(counter == 0)
+    .then(pl.col.u_firmenname)
+    .otherwise(pl.format("{} ({})", pl.col.u_firmenname, counter))
+)
+# %%
+sub.collect()
+
+# %%
+# 1. Create a sample DataFrame
+df = pl.DataFrame({"text_col": ["TEST", "APPLE", "TEST", "TEST", "BANANA", "APPLE"]})
+
+# 2. Define the window function to count occurrences
+# This generates a sequence [0, 1, 2...] for each unique string
+counter = pl.int_range(0, pl.len()).over("text_col")
+
+# 3. Apply the conditional formatting
+df = df.with_columns(
+    updated_col=pl.when(counter == 0)
+    .then(pl.col("text_col"))  # Keep original for the first occurrence
+    .otherwise(pl.format("{} ({})", pl.col("text_col"), counter))  # Format duplicates
+)
+# %%
+df