# %% import importlib from pathlib import Path import polars as pl import sqlalchemy as sql from wce_crm import db importlib.reload(db) # %% PTH_DATA_DB = Path.cwd().parent / "data/db" assert PTH_DATA_DB.exists() assert PTH_DATA_DB.is_dir() # %% DB_KL = PTH_DATA_DB / "wce_kontaktliste.db" DB_CRM = PTH_DATA_DB / "wce_crm.db" assert DB_KL.exists() assert DB_CRM.exists() # %% engine = sql.create_engine(f"sqlite:///{DB_CRM}") # %% db.df_crm_master # %% stmt = sql.select(db.ext_crm_master) str(stmt.compile(engine)) df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_master_schema) # df = pl.concat([df, df[:2]]) # %% df.select("ma_unternehmensname").is_duplicated().sum() # %% q = df.lazy() counter = pl.int_range(0, pl.len()).over(pl.col.ma_unternehmensname) q = q.with_columns( ma_unternehmensname_dedupl=pl.when(counter == 0) .then(pl.col.ma_unternehmensname) .otherwise(pl.format("{} ({})", pl.col.ma_unternehmensname, counter)) ) df = q.collect() df.select("ma_unternehmensname_dedupl").is_duplicated().sum() # %% # mapping dedupl text to idx df.head() # dict(zip(df["ma_unternehmensname_dedupl"], df["ma_id"])) # %% sub = df[0] sub # sub.with_columns( # # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")) # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep() # ) # %% q = ( sub.lazy() .with_columns( pl.col(pl.Datetime).dt.to_string("%d.%m.%Y"), pl.col(pl.Date).dt.to_string("%d.%m.%Y"), pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep(), ) .with_columns(pl.all().cast(pl.String)) ) sub = q.collect() sub # %% df.row(0, named=True) # %% db.df_crm_master.estimated_size("mb") # %% # // CRM Nutzer stmt = sql.select(db.ext_crm_nutzer).limit(20) str(stmt.compile(engine)) df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_nutzer_schema) # %% stmt = sql.text("""SELECT ma_unternehmensname, ma_ersteintrag_datum, ma_aktualisierung_datum FROM Master WHERE ma_ersteintrag_datum LIKE '%ff' LIMIT 10;""") with engine.connect() as con: res = con.execute(stmt) print(res.fetchall()) # %% # ---------------------------------------------------------------- engine = sql.create_engine(f"sqlite:///{DB_KL}") stmt = sql.select(db.ext_kl_unternehmen.c.u_firmenname).limit(20) with engine.connect() as con: res = con.execute(stmt) res.scalars().all() # %% for _ in res.mappings(): print(_) # %% # %% stmt = sql.select(db.ext_kl_unternehmen) df = pl.read_database(stmt, engine, schema_overrides=db.ext_kl_unternehmen_schema) # %% df # %% df.estimated_size("mb") # %% df.height # %% db.df_kontaktliste # %% sub = db.df_kontaktliste.select(["u_id", "u_firmenname"]).lazy() # %% counter = pl.int_range(0, pl.len()).over(pl.col.u_firmenname) sub = sub.with_columns( t=pl.when(counter == 0) .then(pl.col.u_firmenname) .otherwise(pl.format("{} ({})", pl.col.u_firmenname, counter)) ) # %% sub.collect() # %% # 1. Create a sample DataFrame df = pl.DataFrame({"text_col": ["TEST", "APPLE", "TEST", "TEST", "BANANA", "APPLE"]}) # 2. Define the window function to count occurrences # This generates a sequence [0, 1, 2...] for each unique string counter = pl.int_range(0, pl.len()).over("text_col") # 3. Apply the conditional formatting df = df.with_columns( updated_col=pl.when(counter == 0) .then(pl.col("text_col")) # Keep original for the first occurrence .otherwise(pl.format("{} ({})", pl.col("text_col"), counter)) # Format duplicates ) # %% df