generated from dopt-python/py311
further prototyping, added first DB interactions
This commit is contained in:
142
prototypes/db_access.py
Normal file
142
prototypes/db_access.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# %%
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
import sqlalchemy as sql
|
||||
|
||||
from wce_crm import db
|
||||
|
||||
importlib.reload(db)
|
||||
|
||||
# %%
|
||||
PTH_DATA_DB = Path.cwd().parent / "data/db"
|
||||
assert PTH_DATA_DB.exists()
|
||||
assert PTH_DATA_DB.is_dir()
|
||||
# %%
|
||||
DB_KL = PTH_DATA_DB / "wce_kontaktliste.db"
|
||||
DB_CRM = PTH_DATA_DB / "wce_crm.db"
|
||||
assert DB_KL.exists()
|
||||
assert DB_CRM.exists()
|
||||
# %%
|
||||
engine = sql.create_engine(f"sqlite:///{DB_CRM}")
|
||||
# %%
|
||||
db.df_crm_master
|
||||
# %%
|
||||
stmt = sql.select(db.ext_crm_master)
|
||||
str(stmt.compile(engine))
|
||||
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_master_schema)
|
||||
# df = pl.concat([df, df[:2]])
|
||||
# %%
|
||||
df.select("ma_unternehmensname").is_duplicated().sum()
|
||||
# %%
|
||||
q = df.lazy()
|
||||
counter = pl.int_range(0, pl.len()).over(pl.col.ma_unternehmensname)
|
||||
q = q.with_columns(
|
||||
ma_unternehmensname_dedupl=pl.when(counter == 0)
|
||||
.then(pl.col.ma_unternehmensname)
|
||||
.otherwise(pl.format("{} ({})", pl.col.ma_unternehmensname, counter))
|
||||
)
|
||||
df = q.collect()
|
||||
df.select("ma_unternehmensname_dedupl").is_duplicated().sum()
|
||||
|
||||
# %%
|
||||
# mapping dedupl text to idx
|
||||
df.head()
|
||||
|
||||
# dict(zip(df["ma_unternehmensname_dedupl"], df["ma_id"]))
|
||||
|
||||
# %%
|
||||
sub = df[0]
|
||||
sub
|
||||
# sub.with_columns(
|
||||
# # pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein"))
|
||||
# pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep()
|
||||
# )
|
||||
# %%
|
||||
q = (
|
||||
sub.lazy()
|
||||
.with_columns(
|
||||
pl.col(pl.Datetime).dt.to_string("%d.%m.%Y"),
|
||||
pl.col(pl.Date).dt.to_string("%d.%m.%Y"),
|
||||
pl.when(pl.col(pl.Boolean)).then(pl.lit("Ja")).otherwise(pl.lit("Nein")).name.keep(),
|
||||
)
|
||||
.with_columns(pl.all().cast(pl.String))
|
||||
)
|
||||
sub = q.collect()
|
||||
sub
|
||||
# %%
|
||||
df.row(0, named=True)
|
||||
|
||||
# %%
|
||||
db.df_crm_master.estimated_size("mb")
|
||||
|
||||
|
||||
# %%
|
||||
# // CRM Nutzer
|
||||
stmt = sql.select(db.ext_crm_nutzer).limit(20)
|
||||
str(stmt.compile(engine))
|
||||
df = pl.read_database(stmt, engine, schema_overrides=db.ext_crm_nutzer_schema)
|
||||
# %%
|
||||
stmt = sql.text("""SELECT ma_unternehmensname, ma_ersteintrag_datum, ma_aktualisierung_datum
|
||||
FROM Master
|
||||
WHERE ma_ersteintrag_datum LIKE '%ff'
|
||||
LIMIT 10;""")
|
||||
|
||||
with engine.connect() as con:
|
||||
res = con.execute(stmt)
|
||||
|
||||
print(res.fetchall())
|
||||
|
||||
# %%
|
||||
# ----------------------------------------------------------------
|
||||
engine = sql.create_engine(f"sqlite:///{DB_KL}")
|
||||
stmt = sql.select(db.ext_kl_unternehmen.c.u_firmenname).limit(20)
|
||||
|
||||
with engine.connect() as con:
|
||||
res = con.execute(stmt)
|
||||
|
||||
res.scalars().all()
|
||||
# %%
|
||||
for _ in res.mappings():
|
||||
print(_)
|
||||
# %%
|
||||
# %%
|
||||
stmt = sql.select(db.ext_kl_unternehmen)
|
||||
df = pl.read_database(stmt, engine, schema_overrides=db.ext_kl_unternehmen_schema)
|
||||
# %%
|
||||
df
|
||||
# %%
|
||||
df.estimated_size("mb")
|
||||
# %%
|
||||
df.height
|
||||
# %%
|
||||
db.df_kontaktliste
|
||||
# %%
|
||||
sub = db.df_kontaktliste.select(["u_id", "u_firmenname"]).lazy()
|
||||
# %%
|
||||
counter = pl.int_range(0, pl.len()).over(pl.col.u_firmenname)
|
||||
sub = sub.with_columns(
|
||||
t=pl.when(counter == 0)
|
||||
.then(pl.col.u_firmenname)
|
||||
.otherwise(pl.format("{} ({})", pl.col.u_firmenname, counter))
|
||||
)
|
||||
# %%
|
||||
sub.collect()
|
||||
|
||||
# %%
|
||||
# 1. Create a sample DataFrame
|
||||
df = pl.DataFrame({"text_col": ["TEST", "APPLE", "TEST", "TEST", "BANANA", "APPLE"]})
|
||||
|
||||
# 2. Define the window function to count occurrences
|
||||
# This generates a sequence [0, 1, 2...] for each unique string
|
||||
counter = pl.int_range(0, pl.len()).over("text_col")
|
||||
|
||||
# 3. Apply the conditional formatting
|
||||
df = df.with_columns(
|
||||
updated_col=pl.when(counter == 0)
|
||||
.then(pl.col("text_col")) # Keep original for the first occurrence
|
||||
.otherwise(pl.format("{} ({})", pl.col("text_col"), counter)) # Format duplicates
|
||||
)
|
||||
# %%
|
||||
df
|
||||
Reference in New Issue
Block a user