NAFKA-crm-gui/prototypes/db_access.py

# %%
import importlib
from pathlib import Path

import polars as pl
import sqlalchemy as sql

from wce_crm import db

importlib.reload(db)

# %%
PTH_DATA_DB = Path.cwd().parent / "data/db"
assert PTH_DATA_DB.exists()
assert PTH_DATA_DB.is_dir()
# %%
DB_KL = PTH_DATA_DB / "wce_kontaktliste.db"
DB_CRM = PTH_DATA_DB / "wce_crm.db"
assert DB_KL.exists()
assert DB_CRM.exists()
# %%
engine = sql.create_engine(f"sqlite:///{DB_CRM}")
# %%
db.df_crm_master
# %%
stmt = sql.select(db.ext_crm_master)
str(stmt.compile(engine))
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_master_schema)
# df = pl.concat([df, df[:2]])
# %%
df.select("ma_unternehmensname").is_duplicated().sum()
# %%
q = df.lazy()
counter = pl.int_range(0, pl.len()).over(pl.col.ma_unternehmensname)
q = q.with_columns(
    ma_unternehmensname_dedupl=pl.when(counter == 0)
    .then(pl.col.ma_unternehmensname)
    .otherwise(pl.format("{} ({})", pl.col.ma_unternehmensname, counter))
)
df = q.collect()
df.select("ma_unternehmensname_dedupl").is_duplicated().sum()

# %%
# mapping dedupl text to idx
df.head()

# dict(zip(df["ma_unternehmensname_dedupl"], df["ma_id"]))

# %%
sub = df[0]
sub
# sub.with_columns(
#     # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein"))
#     pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep()
# )
# %%
q = (
    sub.lazy()
    .with_columns(
        pl.col(pl.Datetime).dt.to_string("%d.%m.%Y"),
        pl.col(pl.Date).dt.to_string("%d.%m.%Y"),
        pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep(),
    )
    .with_columns(pl.all().cast(pl.String))
)
sub = q.collect()
sub
# %%
df.row(0, named=True)

# %%
db.df_crm_master.estimated_size("mb")


# %%
# // CRM Nutzer
stmt = sql.select(db.ext_crm_nutzer).limit(20)
str(stmt.compile(engine))
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_nutzer_schema)
# %%
stmt = sql.text("""SELECT ma_unternehmensname, ma_ersteintrag_datum, ma_aktualisierung_datum
FROM Master
WHERE ma_ersteintrag_datum LIKE '%ff'
LIMIT 10;""")

with engine.connect() as con:
    res = con.execute(stmt)

print(res.fetchall())

# %%
# ----------------------------------------------------------------
engine = sql.create_engine(f"sqlite:///{DB_KL}")
stmt = sql.select(db.ext_kl_unternehmen.c.u_firmenname).limit(20)

with engine.connect() as con:
    res = con.execute(stmt)

res.scalars().all()
# %%
for _ in res.mappings():
    print(_)
# %%
# %%
stmt = sql.select(db.ext_kl_unternehmen)
df = pl.read_database(stmt, engine, schema_overrides=db.ext_kl_unternehmen_schema)
# %%
df
# %%
df.estimated_size("mb")
# %%
df.height
# %%
db.df_kontaktliste
# %%
sub = db.df_kontaktliste.select(["u_id", "u_firmenname"]).lazy()
# %%
counter = pl.int_range(0, pl.len()).over(pl.col.u_firmenname)
sub = sub.with_columns(
    t=pl.when(counter == 0)
    .then(pl.col.u_firmenname)
    .otherwise(pl.format("{} ({})", pl.col.u_firmenname, counter))
)
# %%
sub.collect()

# %%
# 1. Create a sample DataFrame
df = pl.DataFrame({"text_col": ["TEST", "APPLE", "TEST", "TEST", "BANANA", "APPLE"]})

# 2. Define the window function to count occurrences
# This generates a sequence [0, 1, 2...] for each unique string
counter = pl.int_range(0, pl.len()).over("text_col")

# 3. Apply the conditional formatting
df = df.with_columns(
    updated_col=pl.when(counter == 0)
    .then(pl.col("text_col"))  # Keep original for the first occurrence
    .otherwise(pl.format("{} ({})", pl.col("text_col"), counter))  # Format duplicates
)
# %%
df