generated from dopt-python/py311
drop instead of select
This commit is contained in:
@@ -73,7 +73,6 @@ def preprocess_psm(
|
||||
|
||||
# drop duplicates
|
||||
# use null count as information measure, least amount of nulls should be contained
|
||||
base_columns = data.columns
|
||||
data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count"))
|
||||
data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False)
|
||||
filtered_data = pl.concat(
|
||||
@@ -81,7 +80,7 @@ def preprocess_psm(
|
||||
filtered_data,
|
||||
data.filter(
|
||||
~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()
|
||||
).select(base_columns),
|
||||
).drop("null_count"),
|
||||
]
|
||||
)
|
||||
data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct())
|
||||
|
||||
Reference in New Issue
Block a user