From 3e14a8660ed5b07f1ea0965287c8beec1803f039 Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 14 May 2025 09:13:49 +0200 Subject: [PATCH] add date plausibility filter, fixes #31 --- src/delta_barth/analysis/forecast.py | 4 ++-- tests/analysis/test_forecast.py | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/delta_barth/analysis/forecast.py b/src/delta_barth/analysis/forecast.py index 5262261..ece1656 100644 --- a/src/delta_barth/analysis/forecast.py +++ b/src/delta_barth/analysis/forecast.py @@ -193,8 +193,8 @@ def _process_sales( DATE_FEAT: Final[str] = "buchungs_datum" SALES_FEAT: Final[str] = "betrag" - # TODO: add pre-filter to filter out non-relevant dates, see issue #31 - + data[DATE_FEAT] = pd.to_datetime(data[DATE_FEAT], errors="coerce") + data = data.dropna(subset=["buchungs_datum"]) df_filter = data[(data["betrag"] > 0)] df_cust = df_filter.copy() df_cust = df_cust.sort_values(by=DATE_FEAT).reset_index() diff --git a/tests/analysis/test_forecast.py b/tests/analysis/test_forecast.py index 4e316a1..f2b8acf 100644 --- a/tests/analysis/test_forecast.py +++ b/tests/analysis/test_forecast.py @@ -279,6 +279,28 @@ def test_process_sales_Success(sales_data_real_preproc): assert pipe.statistics.xgb_params is not None +@pytest.mark.forecast +def test_process_sales_InvalidDates(sales_data_real_preproc): + false_date = Datetime(2519, 6, 30) + data = sales_data_real_preproc.copy() + data = data.iloc[:20, :] + data["buchungs_datum"] = data["buchungs_datum"].astype(object) + data.at[0, "buchungs_datum"] = false_date + assert data["buchungs_datum"].dtype.char == "O" + assert len(data) == 20 + pipe = PipeResult(data, STATUS_HANDLER.SUCCESS) + pipe = fc._process_sales( + pipe, + min_num_data_points=36, + base_num_data_points_months=36, + ) + assert pipe.status != STATUS_HANDLER.SUCCESS + assert pipe.status == STATUS_HANDLER.pipe_states.TOO_FEW_POINTS + assert pipe.data is None + assert pipe.results is None + assert pipe.statistics is not None + + @pytest.mark.forecast def test_process_sales_FailTooFewPoints(sales_data_real_preproc): data = sales_data_real_preproc.copy() @@ -432,6 +454,7 @@ def test_export_on_fail(): @patch("delta_barth.session.CFG_HOT_RELOAD", False) def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp, session): + assert session.cfg.forecast.threshold_month_data_points is not None date = Datetime(2023, 8, 15) company_ids = [5661, 1027, 1024] with ( @@ -442,7 +465,7 @@ def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp, ): get_mock.return_value = exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS sess_mock.cfg.forecast.threshold_month_data_points = 1 - result = fc.pipeline_sales_forecast(None, company_ids, date) # type: ignore + result = fc.pipeline_sales_forecast(session, company_ids, date) # type: ignore assert result.status == STATUS_HANDLER.SUCCESS assert len(result.response.daten) > 0