generated from dopt-python/py311
further data analysis
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
# %%
|
||||
import json
|
||||
import time
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
@@ -165,8 +166,7 @@ print(stmt.compile(engine))
|
||||
|
||||
# %%
|
||||
# raw data query
|
||||
# TODO change to left join, otherwise possible that requests are missed
|
||||
# TODO after that: look for entries which do not have an associated title number
|
||||
# TODO look for entries which do not have an associated title number
|
||||
|
||||
print("--------------- ext_bedpbed --------------")
|
||||
t1 = time.perf_counter()
|
||||
@@ -199,7 +199,50 @@ elapsed = t2 - t1
|
||||
print(f"Query duration: {elapsed:.4f} sec")
|
||||
print("Number of entries: ", len(df))
|
||||
print(f"Estimated size in memory: {df.estimated_size(unit='mb')} MB")
|
||||
# %%
|
||||
# SAVING/LOADING
|
||||
p_save = Path.cwd() / "raw_data_from_sql_query_20251202-2.arrow"
|
||||
# df.write_ipc(p_save)
|
||||
df = pl.read_ipc(p_save)
|
||||
# %%
|
||||
len(df)
|
||||
df.head()
|
||||
# 4591588: in title database with different MANDANT (are MANDANTFUEHR and BEDP_MAN feasible for matching?)
|
||||
# %%
|
||||
df.filter(pl.col("BEDP_MAN").is_in((1, 90))).filter(pl.col("MELDENUMMER"))
|
||||
# %%
|
||||
# !! CHECK: null values set in the query with CASE statement
|
||||
print(len(df.filter(pl.col("MELDENUMMER") == 18)))
|
||||
# df.filter(pl.col("MELDENUMMER") == 18).filter((pl.col("BEDP_MENGE_BEDARF_VM").is_not_null()) & (pl.col("BEDP_MENGE_BEDARF_VM") > 0))
|
||||
df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER"))
|
||||
# %%
|
||||
# !! CHECK: titles with request where no title information is found
|
||||
# not_in_title_table = df.filter(pl.col("BEDP_MAN").is_in((1, 90))).filter(
|
||||
# pl.col("MELDENUMMER").is_null()
|
||||
# )
|
||||
# EXPORT_FEAT = "BEDP_TITELNR"
|
||||
# to_save = {EXPORT_FEAT: not_in_title_table.select(EXPORT_FEAT).to_series().to_list()}
|
||||
# p_save_not_in_title_table = Path.cwd() / "not_in_title_table.json"
|
||||
|
||||
# with open(p_save_not_in_title_table, "w") as file:
|
||||
# json.dump(to_save, file, indent=4)
|
||||
# %%
|
||||
# !! CHECK: different MANDANTEN
|
||||
# check for valid entries for unknown MANDANTEN
|
||||
# MANDANT = 80
|
||||
|
||||
# print(f"Mandant: {MANDANT}")
|
||||
# print(
|
||||
# df.filter(pl.col("BEDP_MAN") == MANDANT).select(
|
||||
# ["BEDP_MENGE_BEDARF_VM", "MELDENUMMER", "MENGE_VORMERKER"]
|
||||
# )
|
||||
# )
|
||||
# print(
|
||||
# df.filter(pl.col("BEDP_MAN") == MANDANT).select(
|
||||
# ["BEDP_MENGE_BEDARF_VM", "MELDENUMMER", "MENGE_VORMERKER"]
|
||||
# ).null_count()
|
||||
# )
|
||||
# print("Unique value counts: ", df.select(pl.col("BEDP_MAN").value_counts()))
|
||||
# %%
|
||||
# VM_CRITERION = "MENGE_VORMERKER"
|
||||
VM_CRITERION = "BEDP_MENGE_BEDARF_VM"
|
||||
@@ -221,7 +264,7 @@ def get_raw_data() -> pl.DataFrame:
|
||||
).label("BEDP_MENGE_BEDARF_VM"),
|
||||
db.ext_titel_info.c.MELDENUMMER,
|
||||
db.ext_titel_info.c.MENGE_VORMERKER,
|
||||
).select_from(db.ext_bedpbed.join(db.ext_titel_info, join_condition))
|
||||
).select_from(db.ext_bedpbed.join(db.ext_titel_info, join_condition, isouter=True))
|
||||
|
||||
return pl.read_database(
|
||||
stmt,
|
||||
|
||||
Reference in New Issue
Block a user