NAFKA-crm-gui/prototypes/db_access.py

143 lines
3.5 KiB
Python

# %%
import importlib
from pathlib import Path
import polars as pl
import sqlalchemy as sql
from wce_crm import db
importlib.reload(db)
# %%
PTH_DATA_DB = Path.cwd().parent / "data/db"
assert PTH_DATA_DB.exists()
assert PTH_DATA_DB.is_dir()
# %%
DB_KL = PTH_DATA_DB / "wce_kontaktliste.db"
DB_CRM = PTH_DATA_DB / "wce_crm.db"
assert DB_KL.exists()
assert DB_CRM.exists()
# %%
engine = sql.create_engine(f"sqlite:///{DB_CRM}")
# %%
db.df_crm_master
# %%
stmt = sql.select(db.ext_crm_master)
str(stmt.compile(engine))
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_master_schema)
# df = pl.concat([df, df[:2]])
# %%
df.select("ma_unternehmensname").is_duplicated().sum()
# %%
q = df.lazy()
counter = pl.int_range(0, pl.len()).over(pl.col.ma_unternehmensname)
q = q.with_columns(
ma_unternehmensname_dedupl=pl.when(counter == 0)
.then(pl.col.ma_unternehmensname)
.otherwise(pl.format("{} ({})", pl.col.ma_unternehmensname, counter))
)
df = q.collect()
df.select("ma_unternehmensname_dedupl").is_duplicated().sum()
# %%
# mapping dedupl text to idx
df.head()
# dict(zip(df["ma_unternehmensname_dedupl"], df["ma_id"]))
# %%
sub = df[0]
sub
# sub.with_columns(
# # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein"))
# pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep()
# )
# %%
q = (
sub.lazy()
.with_columns(
pl.col(pl.Datetime).dt.to_string("%d.%m.%Y"),
pl.col(pl.Date).dt.to_string("%d.%m.%Y"),
pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep(),
)
.with_columns(pl.all().cast(pl.String))
)
sub = q.collect()
sub
# %%
df.row(0, named=True)
# %%
db.df_crm_master.estimated_size("mb")
# %%
# // CRM Nutzer
stmt = sql.select(db.ext_crm_nutzer).limit(20)
str(stmt.compile(engine))
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_nutzer_schema)
# %%
stmt = sql.text("""SELECT ma_unternehmensname, ma_ersteintrag_datum, ma_aktualisierung_datum
FROM Master
WHERE ma_ersteintrag_datum LIKE '%ff'
LIMIT 10;""")
with engine.connect() as con:
res = con.execute(stmt)
print(res.fetchall())
# %%
# ----------------------------------------------------------------
engine = sql.create_engine(f"sqlite:///{DB_KL}")
stmt = sql.select(db.ext_kl_unternehmen.c.u_firmenname).limit(20)
with engine.connect() as con:
res = con.execute(stmt)
res.scalars().all()
# %%
for _ in res.mappings():
print(_)
# %%
# %%
stmt = sql.select(db.ext_kl_unternehmen)
df = pl.read_database(stmt, engine, schema_overrides=db.ext_kl_unternehmen_schema)
# %%
df
# %%
df.estimated_size("mb")
# %%
df.height
# %%
db.df_kontaktliste
# %%
sub = db.df_kontaktliste.select(["u_id", "u_firmenname"]).lazy()
# %%
counter = pl.int_range(0, pl.len()).over(pl.col.u_firmenname)
sub = sub.with_columns(
t=pl.when(counter == 0)
.then(pl.col.u_firmenname)
.otherwise(pl.format("{} ({})", pl.col.u_firmenname, counter))
)
# %%
sub.collect()
# %%
# 1. Create a sample DataFrame
df = pl.DataFrame({"text_col": ["TEST", "APPLE", "TEST", "TEST", "BANANA", "APPLE"]})
# 2. Define the window function to count occurrences
# This generates a sequence [0, 1, 2...] for each unique string
counter = pl.int_range(0, pl.len()).over("text_col")
# 3. Apply the conditional formatting
df = df.with_columns(
updated_col=pl.when(counter == 0)
.then(pl.col("text_col")) # Keep original for the first occurrence
.otherwise(pl.format("{} ({})", pl.col("text_col"), counter)) # Format duplicates
)
# %%
df