generated from dopt-python/py311
drop instead of select
This commit is contained in:
@@ -73,7 +73,6 @@ def preprocess_psm(
|
|||||||
|
|
||||||
# drop duplicates
|
# drop duplicates
|
||||||
# use null count as information measure, least amount of nulls should be contained
|
# use null count as information measure, least amount of nulls should be contained
|
||||||
base_columns = data.columns
|
|
||||||
data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count"))
|
data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count"))
|
||||||
data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False)
|
data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False)
|
||||||
filtered_data = pl.concat(
|
filtered_data = pl.concat(
|
||||||
@@ -81,7 +80,7 @@ def preprocess_psm(
|
|||||||
filtered_data,
|
filtered_data,
|
||||||
data.filter(
|
data.filter(
|
||||||
~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()
|
~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()
|
||||||
).select(base_columns),
|
).drop("null_count"),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct())
|
data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct())
|
||||||
|
|||||||
Reference in New Issue
Block a user