generated from dopt-python/py311
further data analysis
This commit is contained in:
parent
547d924f31
commit
b51f372a9d
@ -1,4 +1,5 @@
|
|||||||
# %%
|
# %%
|
||||||
|
import json
|
||||||
import time
|
import time
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -165,8 +166,7 @@ print(stmt.compile(engine))
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# raw data query
|
# raw data query
|
||||||
# TODO change to left join, otherwise possible that requests are missed
|
# TODO look for entries which do not have an associated title number
|
||||||
# TODO after that: look for entries which do not have an associated title number
|
|
||||||
|
|
||||||
print("--------------- ext_bedpbed --------------")
|
print("--------------- ext_bedpbed --------------")
|
||||||
t1 = time.perf_counter()
|
t1 = time.perf_counter()
|
||||||
@ -199,7 +199,50 @@ elapsed = t2 - t1
|
|||||||
print(f"Query duration: {elapsed:.4f} sec")
|
print(f"Query duration: {elapsed:.4f} sec")
|
||||||
print("Number of entries: ", len(df))
|
print("Number of entries: ", len(df))
|
||||||
print(f"Estimated size in memory: {df.estimated_size(unit='mb')} MB")
|
print(f"Estimated size in memory: {df.estimated_size(unit='mb')} MB")
|
||||||
|
# %%
|
||||||
|
# SAVING/LOADING
|
||||||
|
p_save = Path.cwd() / "raw_data_from_sql_query_20251202-2.arrow"
|
||||||
|
# df.write_ipc(p_save)
|
||||||
|
df = pl.read_ipc(p_save)
|
||||||
|
# %%
|
||||||
|
len(df)
|
||||||
|
df.head()
|
||||||
|
# 4591588: in title database with different MANDANT (are MANDANTFUEHR and BEDP_MAN feasible for matching?)
|
||||||
|
# %%
|
||||||
|
df.filter(pl.col("BEDP_MAN").is_in((1, 90))).filter(pl.col("MELDENUMMER"))
|
||||||
|
# %%
|
||||||
|
# !! CHECK: null values set in the query with CASE statement
|
||||||
|
print(len(df.filter(pl.col("MELDENUMMER") == 18)))
|
||||||
|
# df.filter(pl.col("MELDENUMMER") == 18).filter((pl.col("BEDP_MENGE_BEDARF_VM").is_not_null()) & (pl.col("BEDP_MENGE_BEDARF_VM") > 0))
|
||||||
|
df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER"))
|
||||||
|
# %%
|
||||||
|
# !! CHECK: titles with request where no title information is found
|
||||||
|
# not_in_title_table = df.filter(pl.col("BEDP_MAN").is_in((1, 90))).filter(
|
||||||
|
# pl.col("MELDENUMMER").is_null()
|
||||||
|
# )
|
||||||
|
# EXPORT_FEAT = "BEDP_TITELNR"
|
||||||
|
# to_save = {EXPORT_FEAT: not_in_title_table.select(EXPORT_FEAT).to_series().to_list()}
|
||||||
|
# p_save_not_in_title_table = Path.cwd() / "not_in_title_table.json"
|
||||||
|
|
||||||
|
# with open(p_save_not_in_title_table, "w") as file:
|
||||||
|
# json.dump(to_save, file, indent=4)
|
||||||
|
# %%
|
||||||
|
# !! CHECK: different MANDANTEN
|
||||||
|
# check for valid entries for unknown MANDANTEN
|
||||||
|
# MANDANT = 80
|
||||||
|
|
||||||
|
# print(f"Mandant: {MANDANT}")
|
||||||
|
# print(
|
||||||
|
# df.filter(pl.col("BEDP_MAN") == MANDANT).select(
|
||||||
|
# ["BEDP_MENGE_BEDARF_VM", "MELDENUMMER", "MENGE_VORMERKER"]
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
# print(
|
||||||
|
# df.filter(pl.col("BEDP_MAN") == MANDANT).select(
|
||||||
|
# ["BEDP_MENGE_BEDARF_VM", "MELDENUMMER", "MENGE_VORMERKER"]
|
||||||
|
# ).null_count()
|
||||||
|
# )
|
||||||
|
# print("Unique value counts: ", df.select(pl.col("BEDP_MAN").value_counts()))
|
||||||
# %%
|
# %%
|
||||||
# VM_CRITERION = "MENGE_VORMERKER"
|
# VM_CRITERION = "MENGE_VORMERKER"
|
||||||
VM_CRITERION = "BEDP_MENGE_BEDARF_VM"
|
VM_CRITERION = "BEDP_MENGE_BEDARF_VM"
|
||||||
@ -221,7 +264,7 @@ def get_raw_data() -> pl.DataFrame:
|
|||||||
).label("BEDP_MENGE_BEDARF_VM"),
|
).label("BEDP_MENGE_BEDARF_VM"),
|
||||||
db.ext_titel_info.c.MELDENUMMER,
|
db.ext_titel_info.c.MELDENUMMER,
|
||||||
db.ext_titel_info.c.MENGE_VORMERKER,
|
db.ext_titel_info.c.MENGE_VORMERKER,
|
||||||
).select_from(db.ext_bedpbed.join(db.ext_titel_info, join_condition))
|
).select_from(db.ext_bedpbed.join(db.ext_titel_info, join_condition, isouter=True))
|
||||||
|
|
||||||
return pl.read_database(
|
return pl.read_database(
|
||||||
stmt,
|
stmt,
|
||||||
|
|||||||
49
data_analysis/not_in_title_table.json
Normal file
49
data_analysis/not_in_title_table.json
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
{
|
||||||
|
"BEDP_TITELNR": [
|
||||||
|
6132326,
|
||||||
|
4836777,
|
||||||
|
4836779,
|
||||||
|
3370676,
|
||||||
|
3370678,
|
||||||
|
6261428,
|
||||||
|
6261430,
|
||||||
|
8254295,
|
||||||
|
8139588,
|
||||||
|
6178366,
|
||||||
|
6178367,
|
||||||
|
8139587,
|
||||||
|
6178370,
|
||||||
|
6178371,
|
||||||
|
8139586,
|
||||||
|
8139585,
|
||||||
|
4837536,
|
||||||
|
3369003,
|
||||||
|
6132318,
|
||||||
|
6132319,
|
||||||
|
8254301,
|
||||||
|
6132322,
|
||||||
|
6132323,
|
||||||
|
4838000,
|
||||||
|
4838001,
|
||||||
|
4836769,
|
||||||
|
4836770,
|
||||||
|
8139590,
|
||||||
|
8139591,
|
||||||
|
3369002,
|
||||||
|
4837537,
|
||||||
|
3408130,
|
||||||
|
3408132,
|
||||||
|
5227666,
|
||||||
|
5227665,
|
||||||
|
5227663,
|
||||||
|
5227661,
|
||||||
|
139058,
|
||||||
|
9126790,
|
||||||
|
5917263,
|
||||||
|
7112355,
|
||||||
|
1462793,
|
||||||
|
1216207,
|
||||||
|
507075,
|
||||||
|
8254294
|
||||||
|
]
|
||||||
|
}
|
||||||
39
data_analysis/queries.sql
Normal file
39
data_analysis/queries.sql
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
-- SELECT count(*) FROM EXT_TITEL_INFO
|
||||||
|
-- SELECT * FROM EXT_TITEL_INFO
|
||||||
|
set timing on
|
||||||
|
|
||||||
|
SELECT count(*) FROM EXT_BEDPBED;
|
||||||
|
|
||||||
|
-- PROMPT No Hashing allowed...
|
||||||
|
-- SELECT /*+ NO_USE_HASH(bedp t_info) */
|
||||||
|
-- bedp.BEDARFNR,
|
||||||
|
-- bedp.BEDP_SEQUENZ,
|
||||||
|
-- bedp.BEDP_TITELNR,
|
||||||
|
-- bedp.BEDP_MAN,
|
||||||
|
-- bedp.BEDP_MENGE_BEDARF_VM,
|
||||||
|
-- t_info.MELDENUMMER,
|
||||||
|
-- t_info.MENGE_VORMERKER
|
||||||
|
-- FROM EXT_BEDPBED bedp
|
||||||
|
-- LEFT JOIN EXT_TITEL_INFO t_info
|
||||||
|
-- ON bedp.BEDP_TITELNR = t_info.TI_NUMMER
|
||||||
|
-- AND bedp.BEDP_MAN = t_info.MANDFUEHR;
|
||||||
|
-- PROMPT ####################################
|
||||||
|
|
||||||
|
-- PROMPT All allowed
|
||||||
|
-- SELECT
|
||||||
|
-- bedp.BEDARFNR,
|
||||||
|
-- bedp.BEDP_SEQUENZ,
|
||||||
|
-- bedp.BEDP_TITELNR,
|
||||||
|
-- bedp.BEDP_MAN,
|
||||||
|
-- bedp.BEDP_MENGE_BEDARF_VM,
|
||||||
|
-- t_info.MELDENUMMER,
|
||||||
|
-- t_info.MENGE_VORMERKER
|
||||||
|
-- FROM EXT_BEDPBED bedp
|
||||||
|
-- LEFT JOIN EXT_TITEL_INFO t_info
|
||||||
|
-- ON bedp.BEDP_TITELNR = t_info.TI_NUMMER
|
||||||
|
-- AND bedp.BEDP_MAN = t_info.MANDFUEHR;
|
||||||
|
-- -- WHERE bedp.BEDP_MAN IN (1, 90) AND t_info.MELDENUMMER != 26;
|
||||||
|
-- PROMPT ######################################
|
||||||
|
|
||||||
|
-- SELECT * FROM EXT_TITEL_INFO t_info WHERE t_info.TI_NUMMER = 6132326;
|
||||||
|
SELECT * FROM EXT_TITEL_INFO t_info WHERE t_info.TI_NUMMER = 4591588;
|
||||||
BIN
data_analysis/raw_data_from_sql_query_20251202-1.arrow
Normal file
BIN
data_analysis/raw_data_from_sql_query_20251202-1.arrow
Normal file
Binary file not shown.
BIN
data_analysis/raw_data_from_sql_query_20251202-2.arrow
Normal file
BIN
data_analysis/raw_data_from_sql_query_20251202-2.arrow
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user