39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
|
|
# TODO: maybe include in main package depending if needed in future
|
|
def _cvt_str_float(value: str) -> float:
|
|
import locale
|
|
|
|
locale.setlocale(locale.LC_NUMERIC, "de_DE.UTF-8")
|
|
return locale.atof(value)
|
|
|
|
|
|
def _cvt_str_ts(value: str) -> Any:
|
|
date = value.split("_")[0]
|
|
|
|
return pd.to_datetime(date, format="%Y%m%d", errors="coerce")
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def sales_data() -> pd.DataFrame:
|
|
pwd = Path.cwd()
|
|
assert "barth" in pwd.parent.name.lower(), "not in project root directory"
|
|
data_pth = pwd / "./tests/_test_data/swm_f_umsatz_fakt.csv"
|
|
assert data_pth.exists(), "file to sales data not found"
|
|
data = pd.read_csv(data_pth, sep="\t")
|
|
data["betrag"] = data["betrag"].apply(_cvt_str_float)
|
|
data["buchungs_datum"] = data["buchungs_datum"].apply(_cvt_str_ts)
|
|
data = data.dropna(
|
|
how="any",
|
|
subset=["firma_refid", "beleg_typ", "buchungs_datum", "betrag"],
|
|
ignore_index=True,
|
|
)
|
|
data["buchungs_datum"] = pd.to_datetime(data["buchungs_datum"])
|
|
|
|
return data
|