generated from dopt-python/py311
154 lines
3.7 KiB
Python
154 lines
3.7 KiB
Python
# %%
|
|
import importlib
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
import sqlalchemy as sql
|
|
|
|
from wce_crm import db
|
|
|
|
importlib.reload(db)
|
|
|
|
# %%
|
|
PTH_DATA_DB = Path.cwd().parent / "data/db"
|
|
assert PTH_DATA_DB.exists()
|
|
assert PTH_DATA_DB.is_dir()
|
|
# %%
|
|
DB_KL = PTH_DATA_DB / "wce_kontaktliste.db"
|
|
DB_CRM = PTH_DATA_DB / "wce_crm.db"
|
|
assert DB_KL.exists()
|
|
assert DB_CRM.exists()
|
|
# %%
|
|
engine = sql.create_engine(f"sqlite:///{DB_CRM}")
|
|
# %%
|
|
db.df_crm_master
|
|
# %%
|
|
stmt = sql.select(db.ext_crm_master)
|
|
str(stmt.compile(engine))
|
|
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_master_schema)
|
|
# df = pl.concat([df, df[:2]])
|
|
# %%
|
|
df.select("ma_unternehmensname").is_duplicated().sum()
|
|
# %%
|
|
q = df.lazy()
|
|
counter = pl.int_range(0, pl.len()).over(pl.col.ma_unternehmensname)
|
|
q = q.with_columns(
|
|
ma_unternehmensname_dedupl=pl.when(counter == 0)
|
|
.then(pl.col.ma_unternehmensname)
|
|
.otherwise(pl.format("{} ({})", pl.col.ma_unternehmensname, counter))
|
|
)
|
|
df = q.collect()
|
|
df.select("ma_unternehmensname_dedupl").is_duplicated().sum()
|
|
|
|
# %%
|
|
# mapping dedupl text to idx
|
|
df.head()
|
|
|
|
# dict(zip(df["ma_unternehmensname_dedupl"], df["ma_id"]))
|
|
|
|
# %%
|
|
sub = df[0]
|
|
sub
|
|
# sub.with_columns(
|
|
# # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein"))
|
|
# pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep()
|
|
# )
|
|
# %%
|
|
q = (
|
|
sub.lazy()
|
|
.with_columns(
|
|
pl.col(pl.Datetime).dt.to_string("%d.%m.%Y"),
|
|
pl.col(pl.Date).dt.to_string("%d.%m.%Y"),
|
|
pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep(),
|
|
)
|
|
.with_columns(pl.all().cast(pl.String))
|
|
)
|
|
sub = q.collect()
|
|
sub
|
|
# %%
|
|
df.row(0, named=True)
|
|
|
|
# %%
|
|
db.df_crm_master.estimated_size("mb")
|
|
|
|
|
|
# %%
|
|
# // CRM Ansprechpartner
|
|
df = db.df_contact_person.filter(pl.col.ma_id == 410)
|
|
df = df.with_columns(pl.col(pl.String).str.replace_all(r"[\r\t\n]", " ").str.strip_chars(" "))
|
|
df
|
|
|
|
# %%
|
|
tuple(zip(df["ma_id"], df["an_id"]))
|
|
|
|
# %%
|
|
db.df_crm_master
|
|
# %%
|
|
# // CRM Nutzer
|
|
stmt = sql.select(db.ext_crm_nutzer).limit(20)
|
|
str(stmt.compile(engine))
|
|
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_nutzer_schema)
|
|
# %%
|
|
stmt = sql.text("""SELECT ma_unternehmensname, ma_ersteintrag_datum, ma_aktualisierung_datum
|
|
FROM Master
|
|
WHERE ma_ersteintrag_datum LIKE '%ff'
|
|
LIMIT 10;""")
|
|
|
|
with engine.connect() as con:
|
|
res = con.execute(stmt)
|
|
|
|
print(res.fetchall())
|
|
|
|
# %%
|
|
# ----------------------------------------------------------------
|
|
engine = sql.create_engine(f"sqlite:///{DB_KL}")
|
|
stmt = sql.select(db.ext_kl_unternehmen.c.u_firmenname).limit(20)
|
|
|
|
with engine.connect() as con:
|
|
res = con.execute(stmt)
|
|
|
|
res.scalars().all()
|
|
# %%
|
|
for _ in res.mappings():
|
|
print(_)
|
|
# %%
|
|
# %%
|
|
stmt = sql.select(db.ext_kl_unternehmen)
|
|
df = pl.read_database(stmt, engine, schema_overrides=db.ext_kl_unternehmen_schema)
|
|
# %%
|
|
df
|
|
# %%
|
|
df.estimated_size("mb")
|
|
# %%
|
|
df.height
|
|
# %%
|
|
db.df_kontaktliste
|
|
# %%
|
|
sub = db.df_kontaktliste.select(["u_id", "u_firmenname"]).lazy()
|
|
# %%
|
|
counter = pl.int_range(0, pl.len()).over(pl.col.u_firmenname)
|
|
sub = sub.with_columns(
|
|
t=pl.when(counter == 0)
|
|
.then(pl.col.u_firmenname)
|
|
.otherwise(pl.format("{} ({})", pl.col.u_firmenname, counter))
|
|
)
|
|
# %%
|
|
sub.collect()
|
|
|
|
# %%
|
|
# 1. Create a sample DataFrame
|
|
df = pl.DataFrame({"text_col": ["TEST", "APPLE", "TEST", "TEST", "BANANA", "APPLE"]})
|
|
|
|
# 2. Define the window function to count occurrences
|
|
# This generates a sequence [0, 1, 2...] for each unique string
|
|
counter = pl.int_range(0, pl.len()).over("text_col")
|
|
|
|
# 3. Apply the conditional formatting
|
|
df = df.with_columns(
|
|
updated_col=pl.when(counter == 0)
|
|
.then(pl.col("text_col")) # Keep original for the first occurrence
|
|
.otherwise(pl.format("{} ({})", pl.col("text_col"), counter)) # Format duplicates
|
|
)
|
|
# %%
|
|
df
|