add date plausibility filter, fixes #31

This commit is contained in:
Florian Förster 2025-05-14 09:13:49 +02:00
parent 33760bd764
commit 3e14a8660e
2 changed files with 26 additions and 3 deletions

View File

@ -193,8 +193,8 @@ def _process_sales(
DATE_FEAT: Final[str] = "buchungs_datum"
SALES_FEAT: Final[str] = "betrag"
# TODO: add pre-filter to filter out non-relevant dates, see issue #31
data[DATE_FEAT] = pd.to_datetime(data[DATE_FEAT], errors="coerce")
data = data.dropna(subset=["buchungs_datum"])
df_filter = data[(data["betrag"] > 0)]
df_cust = df_filter.copy()
df_cust = df_cust.sort_values(by=DATE_FEAT).reset_index()

View File

@ -279,6 +279,28 @@ def test_process_sales_Success(sales_data_real_preproc):
assert pipe.statistics.xgb_params is not None
@pytest.mark.forecast
def test_process_sales_InvalidDates(sales_data_real_preproc):
false_date = Datetime(2519, 6, 30)
data = sales_data_real_preproc.copy()
data = data.iloc[:20, :]
data["buchungs_datum"] = data["buchungs_datum"].astype(object)
data.at[0, "buchungs_datum"] = false_date
assert data["buchungs_datum"].dtype.char == "O"
assert len(data) == 20
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
pipe = fc._process_sales(
pipe,
min_num_data_points=36,
base_num_data_points_months=36,
)
assert pipe.status != STATUS_HANDLER.SUCCESS
assert pipe.status == STATUS_HANDLER.pipe_states.TOO_FEW_POINTS
assert pipe.data is None
assert pipe.results is None
assert pipe.statistics is not None
@pytest.mark.forecast
def test_process_sales_FailTooFewPoints(sales_data_real_preproc):
data = sales_data_real_preproc.copy()
@ -432,6 +454,7 @@ def test_export_on_fail():
@patch("delta_barth.session.CFG_HOT_RELOAD", False)
def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp, session):
assert session.cfg.forecast.threshold_month_data_points is not None
date = Datetime(2023, 8, 15)
company_ids = [5661, 1027, 1024]
with (
@ -442,7 +465,7 @@ def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp,
):
get_mock.return_value = exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS
sess_mock.cfg.forecast.threshold_month_data_points = 1
result = fc.pipeline_sales_forecast(None, company_ids, date) # type: ignore
result = fc.pipeline_sales_forecast(session, company_ids, date) # type: ignore
assert result.status == STATUS_HANDLER.SUCCESS
assert len(result.response.daten) > 0