diff --git a/test-notebooks/Analyse_5-1_Timeline.ipynb b/notebooks/Analyse_5-1_Timeline.ipynb
similarity index 100%
rename from test-notebooks/Analyse_5-1_Timeline.ipynb
rename to notebooks/Analyse_5-1_Timeline.ipynb
diff --git a/test-notebooks/Preprocess_Pipeline.ipynb b/notebooks/Preprocess_Pipeline.ipynb
similarity index 100%
rename from test-notebooks/Preprocess_Pipeline.ipynb
rename to notebooks/Preprocess_Pipeline.ipynb
diff --git a/test-notebooks/Token_Analysis.ipynb b/notebooks/Token_Analysis.ipynb
similarity index 100%
rename from test-notebooks/Token_Analysis.ipynb
rename to notebooks/Token_Analysis.ipynb
diff --git a/test-notebooks/archive/Analyse.ipynb b/notebooks/archive/Analyse.ipynb
similarity index 100%
rename from test-notebooks/archive/Analyse.ipynb
rename to notebooks/archive/Analyse.ipynb
diff --git a/test-notebooks/archive/Analyse_2-2.ipynb b/notebooks/archive/Analyse_2-2.ipynb
similarity index 100%
rename from test-notebooks/archive/Analyse_2-2.ipynb
rename to notebooks/archive/Analyse_2-2.ipynb
diff --git a/test-notebooks/archive/Analyse_2.ipynb b/notebooks/archive/Analyse_2.ipynb
similarity index 100%
rename from test-notebooks/archive/Analyse_2.ipynb
rename to notebooks/archive/Analyse_2.ipynb
diff --git a/test-notebooks/archive/Analyse_3.ipynb b/notebooks/archive/Analyse_3.ipynb
similarity index 100%
rename from test-notebooks/archive/Analyse_3.ipynb
rename to notebooks/archive/Analyse_3.ipynb
diff --git a/test-notebooks/archive/Analyse_4-1.ipynb b/notebooks/archive/Analyse_4-1.ipynb
similarity index 100%
rename from test-notebooks/archive/Analyse_4-1.ipynb
rename to notebooks/archive/Analyse_4-1.ipynb
diff --git a/test-notebooks/archive/Analyse_4-2.ipynb b/notebooks/archive/Analyse_4-2.ipynb
similarity index 100%
rename from test-notebooks/archive/Analyse_4-2.ipynb
rename to notebooks/archive/Analyse_4-2.ipynb
diff --git a/test-notebooks/archive/test_new_dupl_merge.ipynb b/notebooks/archive/test_new_dupl_merge.ipynb
similarity index 100%
rename from test-notebooks/archive/test_new_dupl_merge.ipynb
rename to notebooks/archive/test_new_dupl_merge.ipynb
diff --git a/test-notebooks/archive/test_sentence_trf.ipynb b/notebooks/archive/test_sentence_trf.ipynb
similarity index 100%
rename from test-notebooks/archive/test_sentence_trf.ipynb
rename to notebooks/archive/test_sentence_trf.ipynb
diff --git a/test-notebooks/display_results.ipynb b/notebooks/display_results.ipynb
similarity index 100%
rename from test-notebooks/display_results.ipynb
rename to notebooks/display_results.ipynb
diff --git a/notebooks/dummy_data_generation.ipynb b/notebooks/dummy_data_generation.ipynb
new file mode 100644
index 0000000..4ebf4c7
--- /dev/null
+++ b/notebooks/dummy_data_generation.ipynb
@@ -0,0 +1,1244 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "91e5d121-4267-4ee7-baaa-3cec3da1f869",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from collections import namedtuple\n",
+ "\n",
+ "from pathlib import Path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f7c989c6-67e1-4c34-bd08-34d6626cd33a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "N_SAMPLES = 1000\n",
+ "SEED = 42\n",
+ "RNG = np.random.default_rng(seed=SEED)\n",
+ "COLS_DUMMY_DATA = ['type', 'problem', 'action']\n",
+ "TOTAL_POSSIBILITY_FAILURES = 0.4\n",
+ "TYPE_MAPPING = {\n",
+ " 'Reguläre Wartung': 'Wartung',\n",
+ " 'Unerwarteter Fehler': 'Störungsmeldung',\n",
+ "}\n",
+ "OBJ_IDS_2_TXT = {\n",
+ " 1: 'Fräsmaschine-FS435X',\n",
+ " 2: 'Schleifmaschine-S4x87',\n",
+ " 3: 'Bohrbearbeitungszentrum-BBZ35',\n",
+ "}\n",
+ "STARTING_DATE = pd.to_datetime('2022-01-01')\n",
+ "ENDING_DATE = pd.to_datetime('2024-08-07')\n",
+ "DATASET_FEATURES = [\n",
+ " 'VorgangsID',\n",
+ " 'ObjektID',\n",
+ " 'HObjektText',\n",
+ " 'VorgangsTypName',\n",
+ " 'VorgangsBeschreibung',\n",
+ " 'ErledigungsBeschreibung',\n",
+ " 'ErstellungsDatum',\n",
+ " 'VorgangsDatum',\n",
+ " 'Arbeitsbeginn',\n",
+ " 'ErledigungsDatum',\n",
+ "]\n",
+ "DF_SKELLETON = {feat: [] for feat in DATASET_FEATURES}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0be70014-4fe0-45dd-8bd5-f731bd12cfe1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "source = '../data/Dummy_Data.xlsx'\n",
+ "dest = f'../data/Dummy_Dataset_N_{N_SAMPLES}.csv'\n",
+ "pth_source = Path(source)\n",
+ "pth_dest = Path(dest)\n",
+ "assert pth_source.exists()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "193304e9-9db1-4697-ae48-836a716ce80e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_dummy_data(pth_data, columns=COLS_DUMMY_DATA):\n",
+ " data = pd.read_excel(pth_data)\n",
+ " data.columns = columns.copy()\n",
+ " \n",
+ " return data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "85ac2d6c-4eee-429a-8511-82f39d4e8716",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " problem | \n",
+ " action | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Reguläre Wartung | \n",
+ " Schmierung der Lager | \n",
+ " Nachfüllen des Schmiermittels | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Unerwarteter Fehler | \n",
+ " Motorüberhitzung | \n",
+ " Austausch des Kühlgebläses | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Reguläre Wartung | \n",
+ " Überprüfung der Hydraulik | \n",
+ " Reinigung und Nachfüllen der Hydraulikflüssigkeit | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Unerwarteter Fehler | \n",
+ " Elektronikfehler | \n",
+ " Austausch der defekten Platine | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Reguläre Wartung | \n",
+ " Kalibrierung der Sensoren | \n",
+ " Justierung und Test der Sensoren | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type problem \\\n",
+ "0 Reguläre Wartung Schmierung der Lager \n",
+ "1 Unerwarteter Fehler Motorüberhitzung \n",
+ "2 Reguläre Wartung Überprüfung der Hydraulik \n",
+ "3 Unerwarteter Fehler Elektronikfehler \n",
+ "4 Reguläre Wartung Kalibrierung der Sensoren \n",
+ "\n",
+ " action \n",
+ "0 Nachfüllen des Schmiermittels \n",
+ "1 Austausch des Kühlgebläses \n",
+ "2 Reinigung und Nachfüllen der Hydraulikflüssigkeit \n",
+ "3 Austausch der defekten Platine \n",
+ "4 Justierung und Test der Sensoren "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = read_dummy_data(pth_source)\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "aa6d3af2-31c7-44ee-a3a8-4201b559038f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_subset(data, target_type, type_mapping=TYPE_MAPPING):\n",
+ " Entry = namedtuple('ProblemActionPairs', ['type', 'problem', 'action'])\n",
+ " entries = []\n",
+ " data_subset = data.loc[data['type']==target_type,:].copy()\n",
+ "\n",
+ " for row in data_subset.itertuples(index=False):\n",
+ " type_mapped = type_mapping[row.type]\n",
+ " entries.append(Entry(type_mapped, row.problem, row.action))\n",
+ "\n",
+ " return entries"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79bb0e96-3e04-458e-bbfb-dbd11a5386b9",
+ "metadata": {},
+ "source": [
+ "## Activity Types\n",
+ "\n",
+ "- relevant activity types:\n",
+ " - 'Reparaturauftrag (Portal)'\n",
+ " - 'Störungsmeldung'\n",
+ " - 'Wartung'\n",
+ "- ``regular`` --> 'Wartung'\n",
+ "- ``failures`` --> 'Störungsmeldung'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ec7a69d-80a6-4ede-928f-3ad933d3e090",
+ "metadata": {},
+ "source": [
+ "### Failures"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "668c0275-c8d8-4390-8857-a2ada566d786",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[ProblemActionPairs(type='Störungsmeldung', problem='Motorüberhitzung', action='Austausch des Kühlgebläses'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Elektronikfehler', action='Austausch der defekten Platine'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Getriebeausfall', action='Reparatur und Austausch der beschädigten Zahnräder'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Leckage in der Hydraulikleitung', action='Abdichtung der Leckstelle und Nachfüllen der Hydraulikflüssigkeit'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Ausfall der Steuerungseinheit', action='Neustart und Software-Update der Steuerungseinheit'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Bruch eines Zahnriemens', action='Austausch des Zahnriemens'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Kurzschluss im Schaltschrank', action='Austausch der Sicherungen und Kabel'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Vibrationsprobleme am Motor', action='Auswuchten des Motors und Austausch der Dämpfer'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Fehlfunktion der Hydraulikpumpe', action='Austausch der Hydraulikpumpe'),\n",
+ " ProblemActionPairs(type='Störungsmeldung', problem='Bruch eines Förderbands', action='Austausch des Förderbands')]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "failures = make_subset(data, target_type='Unerwarteter Fehler')\n",
+ "failures[:10]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4f784dba-5e0a-41aa-9005-3e310fda47cb",
+ "metadata": {},
+ "source": [
+ "### Regular Maintenance"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d4b8ca4a-4230-463e-bb74-f965b0732155",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[ProblemActionPairs(type='Wartung', problem='Schmierung der Lager', action='Nachfüllen des Schmiermittels'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Überprüfung der Hydraulik', action='Reinigung und Nachfüllen der Hydraulikflüssigkeit'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Kalibrierung der Sensoren', action='Justierung und Test der Sensoren'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Reinigung der Luftfilter', action='Austausch der Luftfilter'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Überprüfung der Sicherheitsvorrichtungen', action='Funktionstest und Justierung der Sicherheitsvorrichtungen'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Inspektion der Förderbänder', action='Einstellung und Austausch abgenutzter Teile'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Überprüfung der Druckventile', action='Reinigung und Einstellung der Druckventile'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Test der Not-Aus-Schalter', action='Test und Austausch defekter Not-Aus-Schalter'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Überprüfung der Kühlmittelsysteme', action='Nachfüllen und Entlüftung des Kühlmittelsystems'),\n",
+ " ProblemActionPairs(type='Wartung', problem='Kontrolle der Lichtschranken', action='Reinigung und Neujustierung der Lichtschranken')]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regular = make_subset(data, target_type='Reguläre Wartung')\n",
+ "regular[:10]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aaf5b6bd-a7bf-4c6b-a969-566cd90d2353",
+ "metadata": {},
+ "source": [
+ "## ObjectIDs and HObjektTexts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "bf2d380d-4103-40b0-a99d-7770e73a9ef5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def random_objects(mapping, rng, n_samples):\n",
+ " max_val = max(mapping.keys())\n",
+ " rands = rng.integers(1, max_val+1, size=n_samples)\n",
+ "\n",
+ " obj_ids = rands.tolist()\n",
+ " texts =[mapping[obj_id] for obj_id in obj_ids]\n",
+ "\n",
+ " return obj_ids, texts"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "708521a4-a93c-4d29-9be7-19a29fd8aa7d",
+ "metadata": {},
+ "source": [
+ "## Random Dates"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "bff4fc9e-7a61-42df-abcb-1540e2d04b80",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def random_dates(start, end, rng, n_samples):\n",
+ "\n",
+ " start_u = start.value//10**9\n",
+ " end_u = end.value//10**9\n",
+ " days_to_finish = rng.exponential(1.3, n_samples).astype(np.int_)\n",
+ " td = pd.to_timedelta(days_to_finish, unit='day')\n",
+ "\n",
+ " creation_dates = pd.to_datetime(rng.integers(start_u, end_u, n_samples), unit='s').normalize()\n",
+ " done_dates = creation_dates + td\n",
+ "\n",
+ " return creation_dates.to_list(), done_dates.to_list()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2a77202-0eb9-4390-89af-e8e60e5a1e34",
+ "metadata": {},
+ "source": [
+ "## Random descriptions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d360fe54-36f2-4f35-a42c-7ca09a7599c3",
+ "metadata": {},
+ "source": [
+ "proportions:\n",
+ "- regular: 0.6\n",
+ "- failure: 0.4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "626a290b-a3ee-4c37-a754-b29ecca59f70",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def random_descriptions(failures, regular, target_prop_fail, rng, n_samples):\n",
+ " poss_per_entry_fail = target_prop_fail / len(failures)\n",
+ " poss_per_entry_regular = (1 - target_prop_fail) / len(regular)\n",
+ "\n",
+ " failure_possibilities = np.full(len(failures), poss_per_entry_fail)\n",
+ " regular_possibilities = np.full(len(regular), poss_per_entry_regular)\n",
+ " possibilities = np.concatenate((failure_possibilities, regular_possibilities))\n",
+ "\n",
+ " content_descriptions = failures.copy()\n",
+ " content_descriptions.extend(regular.copy())\n",
+ "\n",
+ " return rng.choice(content_descriptions, size=n_samples, p=possibilities)\n",
+ "\n",
+ "def description_parts(descriptions):\n",
+ " types = descriptions[:,0].tolist()\n",
+ " todo = descriptions[:,1].tolist()\n",
+ " dones = descriptions[:,2].tolist()\n",
+ "\n",
+ " return types, todo, dones"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d8999cba-a460-4f67-901f-b7936f04cd74",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "39f72300-d73b-431f-89ee-af85e7bcdccc",
+ "metadata": {},
+ "source": [
+ "# Complete Dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "8eb838de-d28e-4499-a63a-a708a58e0c6f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def create_dataset(df_skelleton, type_failure, type_regular, starting_date, ending_date, rng, n_samples):\n",
+ " df_dict = df_skelleton.copy()\n",
+ " \n",
+ " failures = make_subset(data, target_type=type_failure)\n",
+ " regular = make_subset(data, target_type=type_regular)\n",
+ " \n",
+ " event_ids = list(range(1,n_samples+1))\n",
+ " obj_ids, txts = random_objects(OBJ_IDS_2_TXT, rng, n_samples)\n",
+ " creation_dates, done_dates = random_dates(starting_date, ending_date, rng, n_samples)\n",
+ " process_date = creation_dates.copy()\n",
+ " done_start_date = done_dates.copy()\n",
+ " descriptions = random_descriptions(failures, regular, TOTAL_POSSIBILITY_FAILURES, rng, n_samples)\n",
+ " types, todo, dones = description_parts(descriptions)\n",
+ "\n",
+ " df_dict.update(\n",
+ " VorgangsID=event_ids,\n",
+ " ObjektID=obj_ids,\n",
+ " HObjektText=txts,\n",
+ " VorgangsTypName=types,\n",
+ " VorgangsBeschreibung=todo,\n",
+ " ErledigungsBeschreibung=dones,\n",
+ " ErstellungsDatum=creation_dates,\n",
+ " VorgangsDatum=process_date,\n",
+ " Arbeitsbeginn=done_start_date,\n",
+ " ErledigungsDatum=done_dates,\n",
+ " )\n",
+ " df = pd.DataFrame.from_dict(df_dict)\n",
+ " df = df.sort_values(by='ErstellungsDatum', ascending=True)\n",
+ " df = df.reset_index(drop=True)\n",
+ " df['VorgangsID'] = event_ids\n",
+ "\n",
+ " return df.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "195775b3-e44a-4d80-92bc-799093bd4ef2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = create_dataset(\n",
+ " df_skelleton=DF_SKELLETON,\n",
+ " type_failure='Unerwarteter Fehler',\n",
+ " type_regular='Reguläre Wartung',\n",
+ " starting_date=STARTING_DATE,\n",
+ " ending_date=ENDING_DATE,\n",
+ " rng=RNG,\n",
+ " n_samples=N_SAMPLES,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "d3182c98-b57a-4619-aa41-8ab4a90bd1f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " VorgangsID | \n",
+ " ObjektID | \n",
+ " HObjektText | \n",
+ " VorgangsTypName | \n",
+ " VorgangsBeschreibung | \n",
+ " ErledigungsBeschreibung | \n",
+ " ErstellungsDatum | \n",
+ " VorgangsDatum | \n",
+ " Arbeitsbeginn | \n",
+ " ErledigungsDatum | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Störungsmeldung | \n",
+ " Ölleckage durch undichten Ölsumpf | \n",
+ " Abdichtung und Austausch des Ölsumpfs | \n",
+ " 2022-01-01 | \n",
+ " 2022-01-01 | \n",
+ " 2022-01-01 | \n",
+ " 2022-01-01 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Wartung | \n",
+ " Überprüfung der Schwingungsdämpfer | \n",
+ " Austausch und Justierung der Schwingungsdämpfer | \n",
+ " 2022-01-03 | \n",
+ " 2022-01-03 | \n",
+ " 2022-01-03 | \n",
+ " 2022-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " Fräsmaschine-FS435X | \n",
+ " Wartung | \n",
+ " Überprüfung der Kühlmittelsysteme | \n",
+ " Nachfüllen und Entlüftung des Kühlmittelsystems | \n",
+ " 2022-01-05 | \n",
+ " 2022-01-05 | \n",
+ " 2022-01-05 | \n",
+ " 2022-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " Bohrbearbeitungszentrum-BBZ35 | \n",
+ " Störungsmeldung | \n",
+ " Blockierung der Förderschnecke | \n",
+ " Beseitigung der Blockierung und Überprüfung de... | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-07 | \n",
+ " 2022-01-07 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " Bohrbearbeitungszentrum-BBZ35 | \n",
+ " Störungsmeldung | \n",
+ " Überhitzung durch mangelnde Kühlmittelzirkulation | \n",
+ " Reinigung der Leitungen und Austausch des Kühl... | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-09 | \n",
+ " 2022-01-09 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " 996 | \n",
+ " 1 | \n",
+ " Fräsmaschine-FS435X | \n",
+ " Wartung | \n",
+ " Test der Not-Aus-Schalter | \n",
+ " Test und Austausch defekter Not-Aus-Schalter | \n",
+ " 2024-08-03 | \n",
+ " 2024-08-03 | \n",
+ " 2024-08-03 | \n",
+ " 2024-08-03 | \n",
+ "
\n",
+ " \n",
+ " | 996 | \n",
+ " 997 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Störungsmeldung | \n",
+ " Fehlfunktion der Hydraulikpumpe | \n",
+ " Austausch der Hydraulikpumpe | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-06 | \n",
+ " 2024-08-06 | \n",
+ "
\n",
+ " \n",
+ " | 997 | \n",
+ " 998 | \n",
+ " 3 | \n",
+ " Bohrbearbeitungszentrum-BBZ35 | \n",
+ " Wartung | \n",
+ " Kalibrierung der Sensoren | \n",
+ " Justierung und Test der Sensoren | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-07 | \n",
+ " 2024-08-07 | \n",
+ "
\n",
+ " \n",
+ " | 998 | \n",
+ " 999 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Wartung | \n",
+ " Überprüfung der Hydraulikzylinder | \n",
+ " Nachjustierung und Schmierung der Hydraulikzyl... | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ "
\n",
+ " \n",
+ " | 999 | \n",
+ " 1000 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Wartung | \n",
+ " Inspektion der Schutzabdeckungen | \n",
+ " Reparatur und Austausch beschädigter Abdeckungen | \n",
+ " 2024-08-06 | \n",
+ " 2024-08-06 | \n",
+ " 2024-08-07 | \n",
+ " 2024-08-07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1000 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " VorgangsID ObjektID HObjektText VorgangsTypName \\\n",
+ "0 1 2 Schleifmaschine-S4x87 Störungsmeldung \n",
+ "1 2 2 Schleifmaschine-S4x87 Wartung \n",
+ "2 3 1 Fräsmaschine-FS435X Wartung \n",
+ "3 4 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n",
+ "4 5 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n",
+ ".. ... ... ... ... \n",
+ "995 996 1 Fräsmaschine-FS435X Wartung \n",
+ "996 997 2 Schleifmaschine-S4x87 Störungsmeldung \n",
+ "997 998 3 Bohrbearbeitungszentrum-BBZ35 Wartung \n",
+ "998 999 2 Schleifmaschine-S4x87 Wartung \n",
+ "999 1000 2 Schleifmaschine-S4x87 Wartung \n",
+ "\n",
+ " VorgangsBeschreibung \\\n",
+ "0 Ölleckage durch undichten Ölsumpf \n",
+ "1 Überprüfung der Schwingungsdämpfer \n",
+ "2 Überprüfung der Kühlmittelsysteme \n",
+ "3 Blockierung der Förderschnecke \n",
+ "4 Überhitzung durch mangelnde Kühlmittelzirkulation \n",
+ ".. ... \n",
+ "995 Test der Not-Aus-Schalter \n",
+ "996 Fehlfunktion der Hydraulikpumpe \n",
+ "997 Kalibrierung der Sensoren \n",
+ "998 Überprüfung der Hydraulikzylinder \n",
+ "999 Inspektion der Schutzabdeckungen \n",
+ "\n",
+ " ErledigungsBeschreibung ErstellungsDatum \\\n",
+ "0 Abdichtung und Austausch des Ölsumpfs 2022-01-01 \n",
+ "1 Austausch und Justierung der Schwingungsdämpfer 2022-01-03 \n",
+ "2 Nachfüllen und Entlüftung des Kühlmittelsystems 2022-01-05 \n",
+ "3 Beseitigung der Blockierung und Überprüfung de... 2022-01-06 \n",
+ "4 Reinigung der Leitungen und Austausch des Kühl... 2022-01-06 \n",
+ ".. ... ... \n",
+ "995 Test und Austausch defekter Not-Aus-Schalter 2024-08-03 \n",
+ "996 Austausch der Hydraulikpumpe 2024-08-05 \n",
+ "997 Justierung und Test der Sensoren 2024-08-05 \n",
+ "998 Nachjustierung und Schmierung der Hydraulikzyl... 2024-08-05 \n",
+ "999 Reparatur und Austausch beschädigter Abdeckungen 2024-08-06 \n",
+ "\n",
+ " VorgangsDatum Arbeitsbeginn ErledigungsDatum \n",
+ "0 2022-01-01 2022-01-01 2022-01-01 \n",
+ "1 2022-01-03 2022-01-03 2022-01-03 \n",
+ "2 2022-01-05 2022-01-05 2022-01-05 \n",
+ "3 2022-01-06 2022-01-07 2022-01-07 \n",
+ "4 2022-01-06 2022-01-09 2022-01-09 \n",
+ ".. ... ... ... \n",
+ "995 2024-08-03 2024-08-03 2024-08-03 \n",
+ "996 2024-08-05 2024-08-06 2024-08-06 \n",
+ "997 2024-08-05 2024-08-07 2024-08-07 \n",
+ "998 2024-08-05 2024-08-05 2024-08-05 \n",
+ "999 2024-08-06 2024-08-07 2024-08-07 \n",
+ "\n",
+ "[1000 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "2bb03fdb-ea45-46a1-81b4-525f7568355c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df.to_excel(pth_dest)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "ff9f6f80-b709-4011-89fe-90c8812d7e7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv(pth_dest, sep=';', encoding='cp1252', index=False, date_format='%d.%m.%Y')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e2871889-f128-419c-8e89-d8eb48ceb2e1",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cdba82ad-d4d1-4266-ad41-8d90bb059956",
+ "metadata": {},
+ "source": [
+ "# Check processed data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "af26cd9b-e5d1-46e1-b269-ac46de10dfe2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pth_to_data = '../scripts/results/dummy_N_1000/'\n",
+ "pth_to_data = Path(pth_to_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "c7338787-716c-43c0-9d11-03567459f594",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[WindowsPath('../scripts/results/dummy_N_1000/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TIMELINE.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TOKEN_ANALYSIS.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TK-GRAPH_POSTPROCESSING.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph.graphml'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TK-GRAPH_ANALYSIS.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph-filtered.graphml'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TK-GRAPH_ANALYSIS_RESCALED.pkl'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph-directed-rescaled.graphml'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph-undirected-rescaled.graphml'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/token_graph.svg'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_1.svg'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_2.svg'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_3.svg'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_4.svg'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_5.svg'),\n",
+ " WindowsPath('../scripts/results/dummy_N_1000/Pipe-Graph_Static-Rendering_Step-6_build_subnetworks.pkl')]"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "files = list(pth_to_data.glob(r'*'))\n",
+ "files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "1dd0da25-9097-46a1-bac8-dce281e17c5b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " _torch_pytree._register_pytree_node(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2024-08-07 13:37:19 +0000 | lang_main:io:INFO | Loaded TOML config file successfully.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from lang_main import io"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "33ae3e52-f638-40a0-b243-6578cde52a19",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "WindowsPath('../scripts/results/dummy_N_1000/TIMELINE.pkl')"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "files[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "fc598842-f218-4895-8d1e-20b09f9e6d12",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2024-08-07 13:51:31 +0000 | lang_main:io:INFO | Loaded file successfully.\n"
+ ]
+ }
+ ],
+ "source": [
+ "(data,) = io.load_pickle(files[1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "3cbffa6c-4199-4a9f-b041-3c34fdbc7266",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " VorgangsID | \n",
+ " ObjektID | \n",
+ " HObjektText | \n",
+ " VorgangsTypName | \n",
+ " VorgangsBeschreibung | \n",
+ " ErledigungsBeschreibung | \n",
+ " ErstellungsDatum | \n",
+ " VorgangsDatum | \n",
+ " Arbeitsbeginn | \n",
+ " ErledigungsDatum | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Störungsmeldung | \n",
+ " Ölleckage durch undichten Ölsumpf | \n",
+ " Abdichtung und Austausch des Ölsumpfs | \n",
+ " 2022-01-01 | \n",
+ " 2022-01-01 | \n",
+ " 2022-01-01 | \n",
+ " 2022-01-01 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Wartung | \n",
+ " Überprüfung der Schwingungsdämpfer | \n",
+ " Austausch und Justierung der Schwingungsdämpfer | \n",
+ " 2022-01-03 | \n",
+ " 2022-01-03 | \n",
+ " 2022-01-03 | \n",
+ " 2022-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " Fräsmaschine-FS435X | \n",
+ " Wartung | \n",
+ " Überprüfung der Kühlmittelsysteme | \n",
+ " Nachfüllen und Entlüftung des Kühlmittelsystems | \n",
+ " 2022-01-05 | \n",
+ " 2022-01-05 | \n",
+ " 2022-01-05 | \n",
+ " 2022-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " Bohrbearbeitungszentrum-BBZ35 | \n",
+ " Störungsmeldung | \n",
+ " Blockierung der Förderschnecke | \n",
+ " Beseitigung der Blockierung und Überprüfung de... | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-07 | \n",
+ " 2022-01-07 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " Bohrbearbeitungszentrum-BBZ35 | \n",
+ " Störungsmeldung | \n",
+ " Überhitzung durch mangelnde Kühlmittelzirkulation | \n",
+ " Reinigung der Leitungen und Austausch des Kühl... | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-06 | \n",
+ " 2022-01-09 | \n",
+ " 2022-01-09 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " 996 | \n",
+ " 1 | \n",
+ " Fräsmaschine-FS435X | \n",
+ " Wartung | \n",
+ " Test der Not-Aus-Schalter | \n",
+ " Test und Austausch defekter Not-Aus-Schalter | \n",
+ " 2024-08-03 | \n",
+ " 2024-08-03 | \n",
+ " 2024-08-03 | \n",
+ " 2024-08-03 | \n",
+ "
\n",
+ " \n",
+ " | 996 | \n",
+ " 997 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Störungsmeldung | \n",
+ " Fehlfunktion der Hydraulikpumpe | \n",
+ " Austausch der Hydraulikpumpe | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-06 | \n",
+ " 2024-08-06 | \n",
+ "
\n",
+ " \n",
+ " | 997 | \n",
+ " 998 | \n",
+ " 3 | \n",
+ " Bohrbearbeitungszentrum-BBZ35 | \n",
+ " Wartung | \n",
+ " Kalibrierung der Sensoren | \n",
+ " Justierung und Test der Sensoren | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-07 | \n",
+ " 2024-08-07 | \n",
+ "
\n",
+ " \n",
+ " | 998 | \n",
+ " 999 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Wartung | \n",
+ " Überprüfung der Hydraulikzylinder | \n",
+ " Nachjustierung und Schmierung der Hydraulikzyl... | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ " 2024-08-05 | \n",
+ "
\n",
+ " \n",
+ " | 999 | \n",
+ " 1000 | \n",
+ " 2 | \n",
+ " Schleifmaschine-S4x87 | \n",
+ " Wartung | \n",
+ " Inspektion der Schutzabdeckungen | \n",
+ " Reparatur und Austausch beschädigter Abdeckungen | \n",
+ " 2024-08-06 | \n",
+ " 2024-08-06 | \n",
+ " 2024-08-07 | \n",
+ " 2024-08-07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1000 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " VorgangsID ObjektID HObjektText VorgangsTypName \\\n",
+ "0 1 2 Schleifmaschine-S4x87 Störungsmeldung \n",
+ "1 2 2 Schleifmaschine-S4x87 Wartung \n",
+ "2 3 1 Fräsmaschine-FS435X Wartung \n",
+ "3 4 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n",
+ "4 5 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n",
+ ".. ... ... ... ... \n",
+ "995 996 1 Fräsmaschine-FS435X Wartung \n",
+ "996 997 2 Schleifmaschine-S4x87 Störungsmeldung \n",
+ "997 998 3 Bohrbearbeitungszentrum-BBZ35 Wartung \n",
+ "998 999 2 Schleifmaschine-S4x87 Wartung \n",
+ "999 1000 2 Schleifmaschine-S4x87 Wartung \n",
+ "\n",
+ " VorgangsBeschreibung \\\n",
+ "0 Ölleckage durch undichten Ölsumpf \n",
+ "1 Überprüfung der Schwingungsdämpfer \n",
+ "2 Überprüfung der Kühlmittelsysteme \n",
+ "3 Blockierung der Förderschnecke \n",
+ "4 Überhitzung durch mangelnde Kühlmittelzirkulation \n",
+ ".. ... \n",
+ "995 Test der Not-Aus-Schalter \n",
+ "996 Fehlfunktion der Hydraulikpumpe \n",
+ "997 Kalibrierung der Sensoren \n",
+ "998 Überprüfung der Hydraulikzylinder \n",
+ "999 Inspektion der Schutzabdeckungen \n",
+ "\n",
+ " ErledigungsBeschreibung ErstellungsDatum \\\n",
+ "0 Abdichtung und Austausch des Ölsumpfs 2022-01-01 \n",
+ "1 Austausch und Justierung der Schwingungsdämpfer 2022-01-03 \n",
+ "2 Nachfüllen und Entlüftung des Kühlmittelsystems 2022-01-05 \n",
+ "3 Beseitigung der Blockierung und Überprüfung de... 2022-01-06 \n",
+ "4 Reinigung der Leitungen und Austausch des Kühl... 2022-01-06 \n",
+ ".. ... ... \n",
+ "995 Test und Austausch defekter Not-Aus-Schalter 2024-08-03 \n",
+ "996 Austausch der Hydraulikpumpe 2024-08-05 \n",
+ "997 Justierung und Test der Sensoren 2024-08-05 \n",
+ "998 Nachjustierung und Schmierung der Hydraulikzyl... 2024-08-05 \n",
+ "999 Reparatur und Austausch beschädigter Abdeckungen 2024-08-06 \n",
+ "\n",
+ " VorgangsDatum Arbeitsbeginn ErledigungsDatum \n",
+ "0 2022-01-01 2022-01-01 2022-01-01 \n",
+ "1 2022-01-03 2022-01-03 2022-01-03 \n",
+ "2 2022-01-05 2022-01-05 2022-01-05 \n",
+ "3 2022-01-06 2022-01-07 2022-01-07 \n",
+ "4 2022-01-06 2022-01-09 2022-01-09 \n",
+ ".. ... ... ... \n",
+ "995 2024-08-03 2024-08-03 2024-08-03 \n",
+ "996 2024-08-05 2024-08-06 2024-08-06 \n",
+ "997 2024-08-05 2024-08-07 2024-08-07 \n",
+ "998 2024-08-05 2024-08-05 2024-08-05 \n",
+ "999 2024-08-06 2024-08-07 2024-08-07 \n",
+ "\n",
+ "[1000 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "fd422d51-6118-47aa-80a1-6e80819a3205",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = data.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "4225af01-b9df-4b27-aae2-b06257b0dd3a",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "Can only use .dt accessor with datetimelike values",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[37], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErledigungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdt\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py:6299\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6292\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 6293\u001b[0m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_internal_names_set\n\u001b[0;32m 6294\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata\n\u001b[0;32m 6295\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessors\n\u001b[0;32m 6296\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info_axis\u001b[38;5;241m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[0;32m 6297\u001b[0m ):\n\u001b[0;32m 6298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m[name]\n\u001b[1;32m-> 6299\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\accessor.py:224\u001b[0m, in \u001b[0;36mCachedAccessor.__get__\u001b[1;34m(self, obj, cls)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 222\u001b[0m \u001b[38;5;66;03m# we're accessing the attribute of the class, i.e., Dataset.geo\u001b[39;00m\n\u001b[0;32m 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessor\n\u001b[1;32m--> 224\u001b[0m accessor_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_accessor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# Replace the property with the accessor object. Inspired by:\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# https://www.pydanny.com/cached-property.html\u001b[39;00m\n\u001b[0;32m 227\u001b[0m \u001b[38;5;66;03m# We need to use object.__setattr__ because we overwrite __setattr__ on\u001b[39;00m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;66;03m# NDFrame\u001b[39;00m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28mobject\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__setattr__\u001b[39m(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name, accessor_obj)\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\accessors.py:643\u001b[0m, in \u001b[0;36mCombinedDatetimelikeProperties.__new__\u001b[1;34m(cls, data)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data\u001b[38;5;241m.\u001b[39mdtype, PeriodDtype):\n\u001b[0;32m 641\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PeriodProperties(data, orig)\n\u001b[1;32m--> 643\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan only use .dt accessor with datetimelike values\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "\u001b[1;31mAttributeError\u001b[0m: Can only use .dt accessor with datetimelike values"
+ ]
+ }
+ ],
+ "source": [
+ "t['ErledigungsDatum'].dt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "9ad24677-b0be-4f4e-9067-b4746e0ba039",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "Can only use .dt accessor with datetimelike values",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[38], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErstellungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdt\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py:6299\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6292\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 6293\u001b[0m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_internal_names_set\n\u001b[0;32m 6294\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata\n\u001b[0;32m 6295\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessors\n\u001b[0;32m 6296\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info_axis\u001b[38;5;241m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[0;32m 6297\u001b[0m ):\n\u001b[0;32m 6298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m[name]\n\u001b[1;32m-> 6299\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\accessor.py:224\u001b[0m, in \u001b[0;36mCachedAccessor.__get__\u001b[1;34m(self, obj, cls)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 222\u001b[0m \u001b[38;5;66;03m# we're accessing the attribute of the class, i.e., Dataset.geo\u001b[39;00m\n\u001b[0;32m 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessor\n\u001b[1;32m--> 224\u001b[0m accessor_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_accessor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# Replace the property with the accessor object. Inspired by:\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# https://www.pydanny.com/cached-property.html\u001b[39;00m\n\u001b[0;32m 227\u001b[0m \u001b[38;5;66;03m# We need to use object.__setattr__ because we overwrite __setattr__ on\u001b[39;00m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;66;03m# NDFrame\u001b[39;00m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28mobject\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__setattr__\u001b[39m(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name, accessor_obj)\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\accessors.py:643\u001b[0m, in \u001b[0;36mCombinedDatetimelikeProperties.__new__\u001b[1;34m(cls, data)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data\u001b[38;5;241m.\u001b[39mdtype, PeriodDtype):\n\u001b[0;32m 641\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PeriodProperties(data, orig)\n\u001b[1;32m--> 643\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan only use .dt accessor with datetimelike values\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "\u001b[1;31mAttributeError\u001b[0m: Can only use .dt accessor with datetimelike values"
+ ]
+ }
+ ],
+ "source": [
+ "t['ErstellungsDatum'].dt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "de697da1-2a4d-465f-988e-5d0a68840167",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "TypeError",
+ "evalue": "unsupported operand type(s) for -: 'str' and 'str'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[1;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\computation\\expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[1;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[0;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[1;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\computation\\expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[1;34m(op, op_str, a, b)\u001b[0m\n\u001b[0;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'str' and 'str'",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[36], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m t[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErledigungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErstellungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[0;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[1;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\arraylike.py:194\u001b[0m, in \u001b[0;36mOpsMixin.__sub__\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 192\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__sub__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 193\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__sub__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[1;32m--> 194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msub\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[1;34m(self, other, op)\u001b[0m\n\u001b[0;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[0;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[1;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[1;34m(self, other, op)\u001b[0m\n\u001b[0;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[0;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[1;34m(left, right, op)\u001b[0m\n\u001b[0;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[0;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[1;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[1;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[0;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[0;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[0;32m 222\u001b[0m ):\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[1;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[1;34m(x, y, op)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[0;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n",
+ "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'str' and 'str'"
+ ]
+ }
+ ],
+ "source": [
+ "t['test'] = t['ErledigungsDatum'] - t['ErstellungsDatum']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c2b1724e-f48d-41a3-98c6-710bef840ba5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/test-notebooks/lang_main.xml b/notebooks/lang_main.xml
similarity index 100%
rename from test-notebooks/lang_main.xml
rename to notebooks/lang_main.xml
diff --git a/test-notebooks/lang_main_config.toml b/notebooks/lang_main_config.toml
similarity index 100%
rename from test-notebooks/lang_main_config.toml
rename to notebooks/lang_main_config.toml
diff --git a/test-notebooks/misc.ipynb b/notebooks/misc.ipynb
similarity index 97%
rename from test-notebooks/misc.ipynb
rename to notebooks/misc.ipynb
index 7d0193d..ae63ff3 100644
--- a/test-notebooks/misc.ipynb
+++ b/notebooks/misc.ipynb
@@ -13,7 +13,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "d46b6ce8-b51b-49e0-b494-fc24fda0f73f",
"metadata": {},
"outputs": [],
@@ -23,7 +23,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"id": "6fde72d3-b95b-4d37-be71-a7d3661dd3f5",
"metadata": {},
"outputs": [
@@ -40,7 +40,7 @@
"'You are connected to Cytoscape!'"
]
},
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -52,46 +52,41 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "43eeb870-6f97-4029-ac0d-210315ccaabf",
+ "id": "187ced81-6304-49bd-afc7-c18e656bc9a3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "af118d77-d87a-4687-be5b-e810a24c403e",
"metadata": {
"scrolled": true
},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2024-07-31 06:33:53 +0000 | io:INFO | Loaded TOML config file successfully.\n"
- ]
- },
{
"name": "stderr",
"output_type": "stream",
"text": [
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
- " _torch_pytree._register_pytree_node(\n",
- "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
- " warnings.warn(\n",
- "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
- " _torch_pytree._register_pytree_node(\n",
- "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
- " _torch_pytree._register_pytree_node(\n",
- "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
- " warnings.warn(\n"
+ " _torch_pytree._register_pytree_node(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2024-08-07 07:36:54 +0000 | io:INFO | Loaded TOML config file successfully.\n"
]
}
],
"source": [
"from lang_main import io\n",
"from lang_main.analysis.graphs import rescale_edge_weights, get_graph_metadata\n",
+ "from lang_main import model_loader\n",
+ "from lang_main.types import LanguageModels\n",
+ "from lang_main.constants import MODEL_LOADER_MAP\n",
"\n",
"from pathlib import Path\n",
"import pickle\n",
@@ -112,6 +107,88 @@
"#p4c.py4cytoscape_logger.detail_logger.addHandler(NullHandler())"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59cbcf38-6fe1-403b-9c10-f107e28185f0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "a33cb410-f774-4cc9-b972-bf05df36d3d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " _torch_pytree._register_pytree_node(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "spacy imports\n",
+ "end\n",
+ "Nothing\n",
+ "2024-08-07 07:51:22 +0000 | io:INFO | Loaded TOML config file successfully.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from lang_main import __init__"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7f55780a-a91e-49ef-a24f-503eaf2efae8",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "2d91512f-8976-452e-acc9-4bff3dc33dd1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " _torch_pytree._register_pytree_node(\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_loader.instantiate_model(MODEL_LOADER_MAP, LanguageModels.SPACY)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7fe9337f-a11b-4eab-ae46-a8a4ccf3f461",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": 3,
@@ -3753,7 +3830,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 3,
"id": "fcd9247f-c4f9-4f73-9fd3-2ab56700073f",
"metadata": {},
"outputs": [
@@ -3761,23 +3838,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Calling cytoscape_ping()\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀCalling cytoscape_version_info(base_url='http://127.0.0.1:1234/v1')\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀCalling cyrest_get('version', base_url='http://127.0.0.1:1234/v1')\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀHTTP GET(http://127.0.0.1:1234/v1/version)\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀOK[200], content: {\"apiVersion\":\"v1\",\"cytoscapeVersion\":\"3.10.2\"}\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀReturning 'cyrest_get': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2'}\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀReturning 'cytoscape_version_info': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2', 'automationAPIVersion': '1.9.0', 'py4cytoscapeVersion': '1.9.0'}\n",
- "You are connected to Cytoscape!\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Returning 'cytoscape_ping': 'You are connected to Cytoscape!'\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | --------------------\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Calling cytoscape_version_info()\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀCalling cyrest_get('version', base_url='http://127.0.0.1:1234/v1')\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀHTTP GET(http://127.0.0.1:1234/v1/version)\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀOK[200], content: {\"apiVersion\":\"v1\",\"cytoscapeVersion\":\"3.10.2\"}\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀReturning 'cyrest_get': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2'}\n",
- "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Returning 'cytoscape_version_info': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2', 'automationAPIVersion': '1.9.0', 'py4cytoscapeVersion': '1.9.0'}\n",
- "2024-07-10 11:19:16 +0000 | py4cytoscape_logger:DEBUG | --------------------\n"
+ "You are connected to Cytoscape!\n"
]
},
{
@@ -3789,7 +3850,7 @@
" 'py4cytoscapeVersion': '1.9.0'}"
]
},
- "execution_count": 11,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -3803,7 +3864,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"id": "b9290659-e33c-47fc-8d89-7aa3dd6e843a",
"metadata": {},
"outputs": [],
@@ -3815,7 +3876,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"id": "979d6def-83ac-47f6-ac6f-0d20ddf48d48",
"metadata": {},
"outputs": [
@@ -3882,7 +3943,7 @@
"3 node 3 B 5"
]
},
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -3893,7 +3954,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"id": "81702429-5735-48de-96a4-1f32c7c7d68c",
"metadata": {},
"outputs": [
@@ -3965,7 +4026,7 @@
"3 node 2 node 3 interacts 9.9"
]
},
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -3976,7 +4037,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"id": "6b29d561-fffd-4a5b-91c1-8fb6a075ae4f",
"metadata": {},
"outputs": [
@@ -3994,7 +4055,7 @@
"128"
]
},
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -4003,6 +4064,187 @@
"p4c.create_network_from_data_frames(nodes, edges, title=\"my first network\", collection=\"DataFrame Example\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "1e87b4d9-6ef0-4108-81ff-e0124e45b793",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p4c.hide_all_panels()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "23706ea9-b661-428e-a4de-ac4543aafc76",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "''"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p4c.set_network_zoom_bypass(1.5, bypass=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "9c7e7fa0-8b17-43f6-9076-3e117748b06b",
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "In cyrest_delete(): Bypass Visual Property does not exist: NETWORK_SCALE_FACTOR\n"
+ ]
+ },
+ {
+ "ename": "CyError",
+ "evalue": "In cyrest_delete(): Bypass Visual Property does not exist: NETWORK_SCALE_FACTOR",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\commands.py:109\u001b[0m, in \u001b[0;36mcyrest_delete\u001b[1;34m(operation, parameters, base_url, require_json)\u001b[0m\n\u001b[0;32m 108\u001b[0m r \u001b[38;5;241m=\u001b[39m _do_request(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDELETE\u001b[39m\u001b[38;5;124m'\u001b[39m, url, params\u001b[38;5;241m=\u001b[39mparameters, base_url\u001b[38;5;241m=\u001b[39mbase_url)\n\u001b[1;32m--> 109\u001b[0m \u001b[43mr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 110\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\requests\\models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
+ "\u001b[1;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: http://127.0.0.1:1234/v1/networks/3004/views/3129/network/NETWORK_SCALE_FACTOR/bypass",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[1;31mCyError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[25], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mp4c\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclear_network_zoom_bypass\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:133\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 133\u001b[0m \u001b[43mlog_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 134\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 135\u001b[0m log_finally()\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:130\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 128\u001b[0m log_incoming(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 130\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Call function being logged\u001b[39;00m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\style_bypasses.py:2726\u001b[0m, in \u001b[0;36mclear_network_zoom_bypass\u001b[1;34m(network, base_url)\u001b[0m\n\u001b[0;32m 2697\u001b[0m \u001b[38;5;129m@cy_log\u001b[39m\n\u001b[0;32m 2698\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mclear_network_zoom_bypass\u001b[39m(network\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, base_url\u001b[38;5;241m=\u001b[39mDEFAULT_BASE_URL):\n\u001b[0;32m 2699\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Clear the bypass value for the scale factor for the network, effectively restoring prior default values.\u001b[39;00m\n\u001b[0;32m 2700\u001b[0m \n\u001b[0;32m 2701\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2724\u001b[0m \u001b[38;5;124;03m clearing this property will throw an exception.\u001b[39;00m\n\u001b[0;32m 2725\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2726\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mclear_network_property_bypass\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNETWORK_SCALE_FACTOR\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnetwork\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnetwork\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbase_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2727\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:133\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 133\u001b[0m \u001b[43mlog_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 134\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 135\u001b[0m log_finally()\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:130\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 128\u001b[0m log_incoming(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 130\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Call function being logged\u001b[39;00m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\style_bypasses.py:516\u001b[0m, in \u001b[0;36mclear_network_property_bypass\u001b[1;34m(visual_property, network, base_url)\u001b[0m\n\u001b[0;32m 512\u001b[0m view_suid \u001b[38;5;241m=\u001b[39m network_views\u001b[38;5;241m.\u001b[39mget_network_views(net_suid, base_url\u001b[38;5;241m=\u001b[39mbase_url)[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 514\u001b[0m visual_property \u001b[38;5;241m=\u001b[39m normalize_prop_name(visual_property)\n\u001b[1;32m--> 516\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mcommands\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcyrest_delete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnetworks/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnet_suid\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/views/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mview_suid\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/network/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mvisual_property\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/bypass\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[43mbase_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:133\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 133\u001b[0m \u001b[43mlog_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 134\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 135\u001b[0m log_finally()\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:130\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 128\u001b[0m log_incoming(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 130\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Call function being logged\u001b[39;00m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\commands.py:118\u001b[0m, in \u001b[0;36mcyrest_delete\u001b[1;34m(operation, parameters, base_url, require_json)\u001b[0m\n\u001b[0;32m 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\u001b[38;5;241m.\u001b[39mtext\n\u001b[0;32m 117\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 118\u001b[0m \u001b[43m_handle_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\commands.py:683\u001b[0m, in \u001b[0;36m_handle_error\u001b[1;34m(e, force_cy_error)\u001b[0m\n\u001b[0;32m 681\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 682\u001b[0m show_error(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mIn \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcaller\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m--> 683\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n",
+ "\u001b[1;31mCyError\u001b[0m: In cyrest_delete(): Bypass Visual Property does not exist: NETWORK_SCALE_FACTOR"
+ ]
+ }
+ ],
+ "source": [
+ "p4c.clear_network_zoom_bypass()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "234855d1-a961-4dd7-9b2e-a96d7acc1142",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{}"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p4c.fit_content()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "79ccddf6-3b0f-4151-a333-05b6e7b5ed8a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.631300425888794"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "zoom = p4c.get_network_zoom()\n",
+ "zoom"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "dc8f2bcc-9836-4476-bfa4-1d83308375ac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.5660484088532423"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "zoom * 0.96"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "3e3106c6-d9f4-41cd-8a5a-452cd32b25c0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "''"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p4c.set_network_zoom_bypass(zoom * 0.96, bypass=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "05a19acc-8af7-45d0-8902-1e9776824a38",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4647c594-46f1-4e12-9927-e73ca5a0486a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": 10,
diff --git a/test-notebooks/styles_template.xml b/notebooks/styles_template.xml
similarity index 100%
rename from test-notebooks/styles_template.xml
rename to notebooks/styles_template.xml
diff --git a/test-notebooks/timeline_analysis.ipynb b/notebooks/timeline_analysis.ipynb
similarity index 100%
rename from test-notebooks/timeline_analysis.ipynb
rename to notebooks/timeline_analysis.ipynb
diff --git a/test-notebooks/truncate_dataset.ipynb b/notebooks/truncate_dataset.ipynb
similarity index 100%
rename from test-notebooks/truncate_dataset.ipynb
rename to notebooks/truncate_dataset.ipynb
diff --git a/pdm.lock b/pdm.lock
index 20f19b6..c916a9b 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -2,10 +2,13 @@
# It is not intended for manual editing.
[metadata]
-groups = ["default", "notebooks", "trials", "trails", "dev"]
-strategy = ["cross_platform", "inherit_metadata"]
-lock_version = "4.4.2"
-content_hash = "sha256:a9f1cc71f6ee89d2f0572ef7254c9f0be702dbd1a4957b2f0d00d3b83ccc20d4"
+groups = ["default", "dev", "notebooks", "trails", "trials"]
+strategy = ["inherit_metadata"]
+lock_version = "4.5.0"
+content_hash = "sha256:468a23f2e765abd2cf8760a33a219a4e475f1ebc73630f792eddf6563293720a"
+
+[[metadata.targets]]
+requires_python = ">=3.11"
[[package]]
name = "annotated-types"
@@ -13,6 +16,9 @@ version = "0.6.0"
requires_python = ">=3.8"
summary = "Reusable constraint types to use with typing.Annotated"
groups = ["default"]
+dependencies = [
+ "typing-extensions>=4.0.0; python_version < \"3.9\"",
+]
files = [
{file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
{file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
@@ -25,8 +31,10 @@ requires_python = ">=3.8"
summary = "High level compatibility layer for multiple asynchronous event loop implementations"
groups = ["notebooks"]
dependencies = [
+ "exceptiongroup>=1.0.2; python_version < \"3.11\"",
"idna>=2.8",
"sniffio>=1.1",
+ "typing-extensions>=4.1; python_version < \"3.11\"",
]
files = [
{file = "anyio-4.3.0-py3-none-any.whl", hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8"},
@@ -53,6 +61,7 @@ summary = "Argon2 for Python"
groups = ["notebooks"]
dependencies = [
"argon2-cffi-bindings",
+ "typing-extensions; python_version < \"3.8\"",
]
files = [
{file = "argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea"},
@@ -114,6 +123,7 @@ summary = "Annotate AST trees with source code positions"
groups = ["notebooks"]
dependencies = [
"six>=1.12.0",
+ "typing; python_version < \"3.5\"",
]
files = [
{file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"},
@@ -126,6 +136,9 @@ version = "2.0.4"
requires_python = ">=3.8"
summary = "Simple LRU cache for asyncio"
groups = ["notebooks"]
+dependencies = [
+ "typing-extensions>=4.0.0; python_version < \"3.11\"",
+]
files = [
{file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"},
{file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"},
@@ -137,6 +150,9 @@ version = "23.2.0"
requires_python = ">=3.7"
summary = "Classes Without Boilerplate"
groups = ["notebooks"]
+dependencies = [
+ "importlib-metadata; python_version < \"3.8\"",
+]
files = [
{file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
{file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
@@ -148,6 +164,9 @@ version = "2.15.0"
requires_python = ">=3.8"
summary = "Internationalization utilities"
groups = ["notebooks"]
+dependencies = [
+ "pytz>=2015.7; python_version < \"3.9\"",
+]
files = [
{file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"},
{file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"},
@@ -210,6 +229,7 @@ version = "0.7.11"
summary = "The Blis BLAS-like linear algebra library, as a self-contained C-extension."
groups = ["default"]
dependencies = [
+ "numpy>=1.15.0; python_version < \"3.9\"",
"numpy>=1.19.0; python_version >= \"3.9\"",
]
files = [
@@ -232,6 +252,10 @@ version = "2.0.10"
requires_python = ">=3.6"
summary = "Super lightweight function registries for your library"
groups = ["default"]
+dependencies = [
+ "typing-extensions>=3.6.4; python_version < \"3.8\"",
+ "zipp>=0.5; python_version < \"3.8\"",
+]
files = [
{file = "catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f"},
{file = "catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15"},
@@ -342,6 +366,7 @@ summary = "Composable command line interface toolkit"
groups = ["default"]
dependencies = [
"colorama; platform_system == \"Windows\"",
+ "importlib-metadata; python_version < \"3.8\"",
]
files = [
{file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
@@ -354,6 +379,10 @@ version = "0.16.0"
requires_python = ">=3.7"
summary = "pathlib-style classes for cloud storage services."
groups = ["default"]
+dependencies = [
+ "importlib-metadata; python_version < \"3.8\"",
+ "typing-extensions>4; python_version < \"3.11\"",
+]
files = [
{file = "cloudpathlib-0.16.0-py3-none-any.whl", hash = "sha256:f46267556bf91f03db52b5df7a152548596a15aabca1c8731ef32b0b25a1a6a3"},
{file = "cloudpathlib-0.16.0.tar.gz", hash = "sha256:cdfcd35d46d529587d744154a0bdf962aca953b725c8784cd2ec478354ea63a3"},
@@ -417,6 +446,7 @@ groups = ["default"]
dependencies = [
"pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4",
"srsly<3.0.0,>=2.4.0",
+ "typing-extensions<4.5.0,>=3.7.4.1; python_version < \"3.8\"",
]
files = [
{file = "confection-0.1.4-py3-none-any.whl", hash = "sha256:a658818d004939069c3e2b3db74a2cb9d956a5e61a1c9ad61788e0ee09a7090f"},
@@ -578,6 +608,17 @@ files = [
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
]
+[[package]]
+name = "et-xmlfile"
+version = "1.1.0"
+requires_python = ">=3.6"
+summary = "An implementation of lxml.xmlfile for the standard library"
+groups = ["dev"]
+files = [
+ {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
+ {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
+]
+
[[package]]
name = "executing"
version = "2.0.1"
@@ -621,6 +662,7 @@ dependencies = [
"Werkzeug>=3.0.0",
"blinker>=1.6.2",
"click>=8.1.3",
+ "importlib-metadata>=3.6.0; python_version < \"3.10\"",
"itsdangerous>=2.1.2",
]
files = [
@@ -634,6 +676,9 @@ version = "1.5.1"
requires_python = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4"
summary = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers"
groups = ["notebooks"]
+dependencies = [
+ "cached-property>=1.3.0; python_version < \"3.8\"",
+]
files = [
{file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"},
{file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
@@ -656,6 +701,9 @@ version = "0.14.0"
requires_python = ">=3.7"
summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
groups = ["notebooks"]
+dependencies = [
+ "typing-extensions; python_version < \"3.8\"",
+]
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -773,6 +821,7 @@ requires_python = ">=3.8"
summary = "Read metadata from Python packages"
groups = ["default"]
dependencies = [
+ "typing-extensions>=3.6.4; python_version < \"3.8\"",
"zipp>=0.5",
]
files = [
@@ -829,6 +878,7 @@ groups = ["notebooks"]
dependencies = [
"colorama; sys_platform == \"win32\"",
"decorator",
+ "exceptiongroup; python_version < \"3.11\"",
"jedi>=0.16",
"matplotlib-inline",
"pexpect>4.3; sys_platform != \"win32\" and sys_platform != \"emscripten\"",
@@ -955,7 +1005,9 @@ summary = "An implementation of JSON Schema validation for Python"
groups = ["notebooks"]
dependencies = [
"attrs>=22.2.0",
+ "importlib-resources>=1.4.0; python_version < \"3.9\"",
"jsonschema-specifications>=2023.03.6",
+ "pkgutil-resolve-name>=1.3.10; python_version < \"3.9\"",
"referencing>=0.28.4",
"rpds-py>=0.7.1",
]
@@ -971,6 +1023,7 @@ requires_python = ">=3.8"
summary = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
groups = ["notebooks"]
dependencies = [
+ "importlib-resources>=1.4.0; python_version < \"3.9\"",
"referencing>=0.31.0",
]
files = [
@@ -1008,6 +1061,7 @@ requires_python = ">=3.8"
summary = "Jupyter protocol implementation and client libraries"
groups = ["notebooks"]
dependencies = [
+ "importlib-metadata>=4.8.3; python_version < \"3.10\"",
"jupyter-core!=5.0.*,>=4.12",
"python-dateutil>=2.8.2",
"pyzmq>=23.0",
@@ -1062,6 +1116,7 @@ requires_python = ">=3.8"
summary = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server"
groups = ["notebooks"]
dependencies = [
+ "importlib-metadata>=4.8.3; python_version < \"3.10\"",
"jupyter-server>=1.1.2",
]
files = [
@@ -1125,6 +1180,8 @@ groups = ["notebooks"]
dependencies = [
"async-lru>=1.0.0",
"httpx>=0.25.0",
+ "importlib-metadata>=4.8.3; python_version < \"3.10\"",
+ "importlib-resources>=1.4; python_version < \"3.9\"",
"ipykernel>=6.5.0",
"jinja2>=3.0.3",
"jupyter-core",
@@ -1134,6 +1191,7 @@ dependencies = [
"notebook-shim>=0.2",
"packaging",
"setuptools>=40.1.0",
+ "tomli>=1.2.2; python_version < \"3.11\"",
"tornado>=6.2.0",
"traitlets",
]
@@ -1161,6 +1219,7 @@ summary = "A set of server components for JupyterLab and JupyterLab like applica
groups = ["notebooks"]
dependencies = [
"babel>=2.10",
+ "importlib-metadata>=4.8.3; python_version < \"3.10\"",
"jinja2>=3.0.3",
"json5>=0.9.0",
"jsonschema>=4.18.0",
@@ -1184,6 +1243,23 @@ files = [
{file = "jupyterlab_widgets-3.0.11.tar.gz", hash = "sha256:dd5ac679593c969af29c9bed054c24f26842baa51352114736756bc035deee27"},
]
+[[package]]
+name = "kaleido"
+version = "0.2.1"
+summary = "Static image export for web-based visualization libraries with zero dependencies"
+groups = ["default"]
+dependencies = [
+ "pathlib; python_version < \"3.4\"",
+]
+files = [
+ {file = "kaleido-0.2.1-py2.py3-none-macosx_10_11_x86_64.whl", hash = "sha256:ca6f73e7ff00aaebf2843f73f1d3bacde1930ef5041093fe76b83a15785049a7"},
+ {file = "kaleido-0.2.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bb9a5d1f710357d5d432ee240ef6658a6d124c3e610935817b4b42da9c787c05"},
+ {file = "kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:aa21cf1bf1c78f8fa50a9f7d45e1003c387bd3d6fe0a767cfbbf344b95bdc3a8"},
+ {file = "kaleido-0.2.1-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:845819844c8082c9469d9c17e42621fbf85c2b237ef8a86ec8a8527f98b6512a"},
+ {file = "kaleido-0.2.1-py2.py3-none-win32.whl", hash = "sha256:ecc72635860be616c6b7161807a65c0dbd9b90c6437ac96965831e2e24066552"},
+ {file = "kaleido-0.2.1-py2.py3-none-win_amd64.whl", hash = "sha256:4670985f28913c2d063c5734d125ecc28e40810141bdb0a46f15b76c1d45f23c"},
+]
+
[[package]]
name = "langcodes"
version = "3.4.0"
@@ -1399,6 +1475,7 @@ dependencies = [
"beautifulsoup4",
"bleach!=5.0.0",
"defusedxml",
+ "importlib-metadata>=3.6; python_version < \"3.10\"",
"jinja2>=3.0",
"jupyter-core>=4.7",
"jupyterlab-pygments",
@@ -1653,12 +1730,29 @@ files = [
{file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
]
+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+requires_python = ">=3.8"
+summary = "A Python library to read/write Excel 2010 xlsx/xlsm files"
+groups = ["dev"]
+dependencies = [
+ "et-xmlfile",
+]
+files = [
+ {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
+ {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
+]
+
[[package]]
name = "overrides"
version = "7.7.0"
requires_python = ">=3.6"
summary = "A decorator to automatically detect mismatch when overriding a method."
groups = ["notebooks"]
+dependencies = [
+ "typing; python_version < \"3.5\"",
+]
files = [
{file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"},
{file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"},
@@ -1682,6 +1776,7 @@ requires_python = ">=3.9"
summary = "Powerful data structures for data analysis, time series, and statistics"
groups = ["default"]
dependencies = [
+ "numpy>=1.22.4; python_version < \"3.11\"",
"numpy>=1.23.2; python_version == \"3.11\"",
"numpy>=1.26.0; python_version >= \"3.12\"",
"python-dateutil>=2.8.2",
@@ -2562,6 +2657,7 @@ dependencies = [
"jinja2",
"langcodes<4.0.0,>=3.2.0",
"murmurhash<1.1.0,>=0.28.0",
+ "numpy>=1.15.0; python_version < \"3.9\"",
"numpy>=1.19.0; python_version >= \"3.9\"",
"packaging>=20.0",
"preshed<3.1.0,>=3.0.2",
@@ -2654,6 +2750,8 @@ requires_python = ">=3.7"
summary = "spaCy pipelines for pre-trained BERT and other transformers"
groups = ["default"]
dependencies = [
+ "dataclasses<1.0,>=0.6; python_version < \"3.7\"",
+ "numpy>=1.15.0; python_version < \"3.9\"",
"numpy>=1.19.0; python_version >= \"3.9\"",
"spacy-alignments<1.0.0,>=0.7.2",
"spacy<4.1.0,>=3.5.0",
@@ -2812,14 +2910,18 @@ dependencies = [
"blis<0.8.0,>=0.7.8",
"catalogue<2.1.0,>=2.0.4",
"confection<1.0.0,>=0.0.1",
+ "contextvars<3,>=2.4; python_version < \"3.7\"",
"cymem<2.1.0,>=2.0.2",
+ "dataclasses<1.0,>=0.6; python_version < \"3.7\"",
"murmurhash<1.1.0,>=1.0.2",
+ "numpy>=1.15.0; python_version < \"3.9\"",
"numpy>=1.19.0; python_version >= \"3.9\"",
"packaging>=20.0",
"preshed<3.1.0,>=3.0.2",
"pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4",
"setuptools",
"srsly<3.0.0,>=2.4.0",
+ "typing-extensions<4.5.0,>=3.7.4.1; python_version < \"3.8\"",
"wasabi<1.2.0,>=0.8.1",
]
files = [
@@ -3133,6 +3235,7 @@ summary = "A lightweight console printing and formatting toolkit"
groups = ["default"]
dependencies = [
"colorama>=0.4.6; sys_platform == \"win32\" and python_version >= \"3.7\"",
+ "typing-extensions<4.5.0,>=3.7.4.1; python_version < \"3.8\"",
]
files = [
{file = "wasabi-1.1.2-py3-none-any.whl", hash = "sha256:0a3f933c4bf0ed3f93071132c1b87549733256d6c8de6473c5f7ed2e171b5cf9"},
@@ -3144,6 +3247,9 @@ name = "wcwidth"
version = "0.2.13"
summary = "Measures the displayed width of unicode strings in a terminal"
groups = ["notebooks"]
+dependencies = [
+ "backports-functools-lru-cache>=1.2.1; python_version < \"3.2\"",
+]
files = [
{file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
{file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
diff --git a/pyproject.toml b/pyproject.toml
index 27cd55f..5a7bb6d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
"dash>=2.17.0",
"dash-cytoscape>=1.0.1",
"py4cytoscape>=1.9.0",
+ "kaleido==0.2.1",
]
requires-python = ">=3.11"
readme = "README.md"
@@ -44,6 +45,7 @@ trials = [
]
dev = [
"cython>=3.0.10",
+ "openpyxl>=3.1.5",
]
[tool.ruff]
diff --git a/scripts/dash_timeline_static.py b/scripts/dash_timeline_static.py
index 6a04ae6..5dc2372 100644
--- a/scripts/dash_timeline_static.py
+++ b/scripts/dash_timeline_static.py
@@ -3,11 +3,11 @@ import webbrowser
from collections.abc import Collection, Iterable
from threading import Thread
from typing import Any, Final, cast
-
-import pandas as pd
+from pathlib import Path
# import dash_cytoscape as cyto
import plotly.express as px
+import plotly.io
from dash import (
Dash,
Input,
@@ -22,16 +22,17 @@ from pandas import DataFrame
from plotly.graph_objects import Figure
import lang_main.io
+from lang_main import model_loader as m_load
from lang_main.analysis import graphs, tokens
from lang_main.analysis.timeline import (
calc_delta_to_next_failure,
filter_timeline_cands,
)
from lang_main.constants import (
+ MODEL_LOADER_MAP,
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER,
- SPCY_MODEL,
)
from lang_main.errors import EmptyEdgesError, EmptyGraphError
from lang_main.pipelines.predefined import (
@@ -43,10 +44,16 @@ from lang_main.types import (
EntryPoints,
HTMLColumns,
HTMLTable,
+ LanguageModels,
ObjectID,
TimelineCandidates,
)
+# ** model
+SPACY_MODEL = m_load.instantiate_model(
+ model_load_map=MODEL_LOADER_MAP,
+ model=LanguageModels.SPACY,
+)
# ** data
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
@@ -62,17 +69,42 @@ rescaling_pipe = build_tk_graph_rescaling_pipe(
exit_point=EntryPoints.TIMELINE_TK_GRAPH_RESCALED,
save_result=False,
)
-BASE_NETWORK_NAME: Final[str] = 'test_timeline'
+BASE_NETWORK_NAME: Final[str] = 'timeline_candidates'
# RENDER_FOLDER: Final[Path] = Path.cwd() / 'assets/'
graph_render_pipe = build_tk_graph_render_pipe(
with_subgraphs=False,
base_network_name=BASE_NETWORK_NAME,
)
# PTH_RENDERED_GRAPH = f'assets/{BASE_NETWORK_NAME}.svg'
+PTH_RENDERED_TIMELINE = lang_main.io.get_entry_point(
+ SAVE_PATH_FOLDER,
+ 'chart_timeline',
+ file_ext='.svg',
+ check_existence=False,
+)
+PTH_TABLE_TIMELINE = lang_main.io.get_entry_point(
+ SAVE_PATH_FOLDER,
+ 'table_timeline',
+ file_ext='.xlsx',
+ check_existence=False,
+)
+PTH_RENDERED_DELTA_REPAIR = lang_main.io.get_entry_point(
+ SAVE_PATH_FOLDER,
+ 'chart_delta_repair',
+ file_ext='.svg',
+ check_existence=False,
+)
+PTH_TABLE_DELTA_REPAIR = lang_main.io.get_entry_point(
+ SAVE_PATH_FOLDER,
+ 'table_delta_repair',
+ file_ext='.xlsx',
+ check_existence=False,
+)
PTH_RENDERED_GRAPH = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
BASE_NETWORK_NAME,
file_ext='.svg',
+ check_existence=False,
)
# NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
@@ -168,7 +200,7 @@ graph_layout = html.Div(
id='static-graph-img',
alt='static rendered graph',
style={
- 'width': 'auto',
+ 'width': '900px',
'height': 'auto',
},
),
@@ -212,7 +244,27 @@ app.layout = html.Div(
children=[
html.H3(id='object-text'),
dcc.Dropdown(id='selector-candidates'),
+ html.Button(
+ 'Download Diagramm',
+ id='bt-dl-timeline',
+ style={
+ 'marginLeft': 'auto',
+ 'width': '300px',
+ 'marginTop': '1em',
+ },
+ ),
+ dcc.Download(id='dl-timeline'),
dcc.Graph(id='figure-occurrences'),
+ html.Button(
+ 'Download Diagramm',
+ id='bt-dl-deltarepair',
+ style={
+ 'marginLeft': 'auto',
+ 'width': '300px',
+ 'marginTop': '1em',
+ },
+ ),
+ dcc.Download(id='dl-deltarepair'),
dcc.Graph(id='figure-delta'),
]
),
@@ -221,6 +273,16 @@ app.layout = html.Div(
html.Div(
[
html.H5('Überblick ähnlicher Vorgänge'),
+ dcc.Download(id='dl-table-timeline'),
+ html.Button(
+ 'Download Table',
+ id='bt-table-timeline',
+ style={
+ 'marginLeft': 'auto',
+ 'width': '300px',
+ 'marginTop': '1em',
+ },
+ ),
dash_table.DataTable(id='table-candidates'),
],
style={'paddingBottom': '1em'},
@@ -233,6 +295,16 @@ app.layout = html.Div(
'bis zum nächsten Ereignis'
)
),
+ dcc.Download(id='dl-table-deltarepair'),
+ html.Button(
+ 'Download Table',
+ id='bt-table-deltarepair',
+ style={
+ 'marginLeft': 'auto',
+ 'width': '300px',
+ 'marginTop': '1em',
+ },
+ ),
dash_table.DataTable(id='table-best-actions'),
]
),
@@ -368,6 +440,7 @@ def transform_to_HTML_table(
date_cols: Iterable[str] | None = None,
sorting_feature: str | None = None,
sorting_ascending: bool = True,
+ save_path: Path | None = None,
) -> tuple[HTMLColumns, HTMLTable]:
target_features = list(target_features)
data = data.copy()
@@ -383,6 +456,9 @@ def transform_to_HTML_table(
columns = [{'name': col, 'id': col} for col in data.columns]
table_data = data.to_dict('records')
+ if save_path is not None:
+ data.to_excel(save_path)
+
return columns, table_data
@@ -410,6 +486,7 @@ def update_tables_candidates(
date_cols=TABLE_FEATS_DATES,
sorting_feature='ErstellungsDatum',
sorting_ascending=True,
+ save_path=PTH_TABLE_TIMELINE,
)
# df = df.filter(items=TABLE_FEATS_OVERVIEW, axis=1).sort_values(
# by='ErstellungsDatum', ascending=True
@@ -430,6 +507,7 @@ def update_tables_candidates(
data=cands_best_actions,
target_features=TABLE_FEATS_BEST_ACTIONS,
date_cols=TABLE_FEATS_DATES,
+ save_path=PTH_TABLE_DELTA_REPAIR,
)
return overview_cols, overview_table, best_actions_cols, best_actions_table
@@ -457,7 +535,7 @@ def display_candidates_as_graph(index, obj_id):
t1 = time.perf_counter()
tk_graph_cands, _ = tokens.build_token_graph(
data=df,
- model=SPCY_MODEL,
+ model=SPACY_MODEL,
target_feature='VorgangsBeschreibung',
build_map=False,
logging_graph=False,
@@ -496,10 +574,58 @@ def display_candidates_as_graph(index, obj_id):
Input('bt-reset', 'n_clicks'),
prevent_initial_call=True,
)
-def func(n_clicks):
+def download_graph(_):
return dcc.send_file(path=PTH_RENDERED_GRAPH)
+@callback(
+ Output('dl-timeline', 'data'),
+ Input('bt-dl-timeline', 'n_clicks'),
+ State('figure-occurrences', 'figure'),
+ prevent_initial_call=True,
+)
+def download_timeline(_, fig: dict):
+ # add these lines before fig = go.Figure(fig_raw)
+ if 'rangeslider' in fig['layout']['xaxis']:
+ del fig['layout']['xaxis']['rangeslider']['yaxis']
+ figure = Figure(fig)
+ figure.write_image(PTH_RENDERED_TIMELINE)
+ return dcc.send_file(path=PTH_RENDERED_TIMELINE)
+
+
+@callback(
+ Output('dl-deltarepair', 'data'),
+ Input('bt-dl-deltarepair', 'n_clicks'),
+ State('figure-delta', 'figure'),
+ prevent_initial_call=True,
+)
+def download_delta_repair(_, fig: dict):
+ # add these lines before fig = go.Figure(fig_raw)
+ if 'rangeslider' in fig['layout']['xaxis']:
+ del fig['layout']['xaxis']['rangeslider']['yaxis']
+ figure = Figure(fig)
+ figure.write_image(PTH_RENDERED_DELTA_REPAIR)
+ return dcc.send_file(path=PTH_RENDERED_DELTA_REPAIR)
+
+
+@callback(
+ Output('dl-table-timeline', 'data'),
+ Input('bt-table-timeline', 'n_clicks'),
+ prevent_initial_call=True,
+)
+def download_table_timeline(_):
+ return dcc.send_file(path=PTH_TABLE_TIMELINE)
+
+
+@callback(
+ Output('dl-table-deltarepair', 'data'),
+ Input('bt-table-deltarepair', 'n_clicks'),
+ prevent_initial_call=True,
+)
+def download_table_delta_repair(_):
+ return dcc.send_file(path=PTH_TABLE_DELTA_REPAIR)
+
+
def _start_webbrowser():
host = '127.0.0.1'
port = '8050'
diff --git a/scripts/lang_main_config.toml b/scripts/lang_main_config.toml
index 14699c6..77bd396 100644
--- a/scripts/lang_main_config.toml
+++ b/scripts/lang_main_config.toml
@@ -2,8 +2,10 @@
[paths]
inputs = './inputs/'
-results = './results/test_20240619/'
-dataset = '../data/02_202307/Export4.csv'
+results = './results/dummy_N_1000/'
+dataset = '../data/Dummy_Dataset_N_1000.csv'
+# results = './results/test_20240807/'
+# dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
@@ -12,12 +14,12 @@ dataset = '../data/02_202307/Export4.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
-preprocessing_skip = true
-token_analysis_skip = true
-graph_postprocessing_skip = true
-graph_rescaling_skip = true
+preprocessing_skip = false
+token_analysis_skip = false
+graph_postprocessing_skip = false
+graph_rescaling_skip = false
graph_static_rendering_skip = false
-time_analysis_skip = true
+time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
@@ -34,7 +36,7 @@ threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
-threshold_edge_weight = 150
+threshold_edge_weight = 1
[time_analysis.uniqueness]
threshold_unique_texts = 4
diff --git a/src/lang_main/__init__.py b/src/lang_main/__init__.py
index 6d1d346..f10c95c 100644
--- a/src/lang_main/__init__.py
+++ b/src/lang_main/__init__.py
@@ -18,7 +18,7 @@ p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
# ** lang-main config
logging.Formatter.converter = gmtime
-LOG_FMT: Final[str] = '%(asctime)s | %(module)s:%(levelname)s | %(message)s'
+LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s'
LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
logging.basicConfig(
stream=sys.stdout,
diff --git a/src/lang_main/analysis/preprocessing.py b/src/lang_main/analysis/preprocessing.py
index 9130bb1..b5a3eea 100644
--- a/src/lang_main/analysis/preprocessing.py
+++ b/src/lang_main/analysis/preprocessing.py
@@ -70,7 +70,7 @@ def load_raw_data(
filepath_or_buffer=path,
sep=';',
encoding='cp1252',
- parse_dates=date_cols,
+ parse_dates=list(date_cols),
dayfirst=True,
)
logger.info('Loaded dataset successfully.')
@@ -278,7 +278,8 @@ def merge_similarity_dupl(
return (merged_data,)
-#####################################################################
+# ** #################################################################################
+# TODO check removal
def build_embedding_map(
data: Series,
model: GermanSpacyModel | SentenceTransformer,
diff --git a/src/lang_main/analysis/timeline.py b/src/lang_main/analysis/timeline.py
index 4e339a0..9339a7d 100644
--- a/src/lang_main/analysis/timeline.py
+++ b/src/lang_main/analysis/timeline.py
@@ -8,10 +8,13 @@ from tqdm.auto import tqdm # TODO: check deletion
from lang_main.analysis.shared import (
candidates_by_index,
entry_wise_cleansing,
- pattern_escape_seq_sentences,
similar_index_connection_graph,
similar_index_groups,
)
+from lang_main.constants import (
+ NAME_DELTA_FEAT_TO_NEXT_FAILURE,
+ NAME_DELTA_FEAT_TO_REPAIR,
+)
from lang_main.loggers import logger_timeline as logger
from lang_main.types import (
DataFrameTLFiltered,
@@ -94,7 +97,7 @@ def calc_delta_to_repair(
data: DataFrame,
date_feature_start: str = 'ErstellungsDatum',
date_feature_end: str = 'ErledigungsDatum',
- name_delta_feature: str = 'delta_to_repair',
+ name_delta_feature: str = NAME_DELTA_FEAT_TO_REPAIR,
convert_to_days: bool = True,
) -> tuple[DataFrame]:
logger.info('Calculating time differences between start and end of operations...')
@@ -316,7 +319,7 @@ def filter_timeline_cands(
def calc_delta_to_next_failure(
data: DataFrameTLFiltered,
date_feature: str = 'ErstellungsDatum',
- name_delta_feature: str = 'delta_to_next_failure',
+ name_delta_feature: str = NAME_DELTA_FEAT_TO_NEXT_FAILURE,
convert_to_days: bool = True,
) -> DataFrameTLFiltered:
data = data.copy()
diff --git a/src/lang_main/analysis/tokens.py b/src/lang_main/analysis/tokens.py
index aaf0534..7e6dedf 100644
--- a/src/lang_main/analysis/tokens.py
+++ b/src/lang_main/analysis/tokens.py
@@ -5,9 +5,6 @@ from typing import Literal, cast, overload
from dateutil.parser import parse
from pandas import DataFrame
-from spacy.language import Language as GermanSpacyModel
-from spacy.tokens.doc import Doc as SpacyDoc
-from spacy.tokens.token import Token as SpacyToken
from tqdm.auto import tqdm
from lang_main.analysis.graphs import (
@@ -15,7 +12,12 @@ from lang_main.analysis.graphs import (
update_graph,
)
from lang_main.loggers import logger_token_analysis as logger
-from lang_main.types import PandasIndex
+from lang_main.types import (
+ PandasIndex,
+ SpacyDoc,
+ SpacyModel,
+ SpacyToken,
+)
# ** POS
# POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'ADJ', 'VERB', 'AUX'])
@@ -147,7 +149,7 @@ def add_doc_info_to_graph(
@overload
def build_token_graph(
data: DataFrame,
- model: GermanSpacyModel,
+ model: SpacyModel,
*,
target_feature: str = ...,
weights_feature: str | None = ...,
@@ -161,7 +163,7 @@ def build_token_graph(
@overload
def build_token_graph(
data: DataFrame,
- model: GermanSpacyModel,
+ model: SpacyModel,
*,
target_feature: str = ...,
weights_feature: str | None = ...,
@@ -174,7 +176,7 @@ def build_token_graph(
def build_token_graph(
data: DataFrame,
- model: GermanSpacyModel,
+ model: SpacyModel,
*,
target_feature: str = 'entry',
weights_feature: str | None = None,
@@ -233,7 +235,7 @@ def build_token_graph(
def build_token_graph_simple(
data: DataFrame,
- model: GermanSpacyModel,
+ model: SpacyModel,
) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
graph = TokenGraph()
model_input = cast(tuple[str], tuple(data['entry'].to_list()))
@@ -264,7 +266,7 @@ def build_token_graph_simple(
def build_token_graph_old(
data: DataFrame,
- model: GermanSpacyModel,
+ model: SpacyModel,
) -> tuple[TokenGraph]:
# empty NetworkX directed graph
# graph = nx.DiGraph()
diff --git a/src/lang_main/constants.py b/src/lang_main/constants.py
index 9d0e52b..88a789c 100644
--- a/src/lang_main/constants.py
+++ b/src/lang_main/constants.py
@@ -1,12 +1,19 @@
from pathlib import Path
from typing import Final
-import spacy
-from sentence_transformers import SentenceTransformer
-from spacy.language import Language as GermanSpacyModel
-
+# TODO check removal
+# import spacy
+# from sentence_transformers import SentenceTransformer
+# from spacy.language import Language as GermanSpacyModel
from lang_main import CONFIG, CYTO_PATH_STYLESHEET
-from lang_main.types import CytoLayoutProperties, CytoLayouts, STFRDeviceTypes
+from lang_main import model_loader as m_load
+from lang_main.types import (
+ CytoLayoutProperties,
+ CytoLayouts,
+ LanguageModels,
+ ModelLoaderMap,
+ STFRDeviceTypes,
+)
__all__ = [
'CONFIG',
@@ -38,14 +45,33 @@ SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
# ** models
-# ** sentence_transformers
+# ** loading
+SPACY_MODEL_NAME: Final[str] = 'de_dep_news_trf'
+STFR_MODEL_NAME: Final[str] = 'sentence-transformers/all-mpnet-base-v2'
STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU
-STFR_MODEL: Final[SentenceTransformer] = SentenceTransformer(
- 'sentence-transformers/all-mpnet-base-v2', device=STFR_DEVICE
-)
+MODEL_LOADER_MAP: Final[ModelLoaderMap] = {
+ LanguageModels.SENTENCE_TRANSFORMER: {
+ 'func': m_load.load_sentence_transformer,
+ 'kwargs': {
+ 'model_name': STFR_MODEL_NAME,
+ 'device': STFR_DEVICE,
+ },
+ },
+ LanguageModels.SPACY: {
+ 'func': m_load.load_spacy,
+ 'kwargs': {
+ 'model_name': SPACY_MODEL_NAME,
+ },
+ },
+}
+# ** sentence_transformers
+
+# STFR_MODEL: Final[SentenceTransformer] = SentenceTransformer(
+# 'sentence-transformers/all-mpnet-base-v2', device=STFR_DEVICE
+# )
# ** spacy
-SPCY_MODEL: Final[GermanSpacyModel] = spacy.load('de_dep_news_trf')
+# SPCY_MODEL: Final[GermanSpacyModel] = spacy.load('de_dep_news_trf')
# ** export
# ** preprocessing
@@ -82,6 +108,7 @@ CYTO_STYLESHEET_NAME: Final[str] = 'lang_main'
CYTO_SELECTION_PROPERTY: Final[str] = 'node_selection'
CYTO_NUMBER_SUBGRAPHS: Final[int] = 5
CYTO_ITER_NEIGHBOUR_DEPTH: Final[int] = 2
+CYTO_NETWORK_ZOOM_FACTOR: Final[float] = 0.96
# ** time_analysis.uniqueness
THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['uniqueness'][
diff --git a/src/lang_main/cytoscape_config/template_test.cys b/src/lang_main/cytoscape_config/template_test.cys
index d245d1e..8cb5259 100644
Binary files a/src/lang_main/cytoscape_config/template_test.cys and b/src/lang_main/cytoscape_config/template_test.cys differ
diff --git a/src/lang_main/io.py b/src/lang_main/io.py
index 21322cd..402323e 100644
--- a/src/lang_main/io.py
+++ b/src/lang_main/io.py
@@ -93,9 +93,10 @@ def get_entry_point(
saving_path: Path,
filename: str,
file_ext: str = '.pkl',
+ check_existence: bool = True,
) -> Path:
entry_point_path = (saving_path / filename).with_suffix(file_ext)
- if not entry_point_path.exists():
+ if check_existence and not entry_point_path.exists():
raise FileNotFoundError(
f'Could not find provided entry data under path: >>{entry_point_path}<<'
)
diff --git a/src/lang_main/model_loader.py b/src/lang_main/model_loader.py
new file mode 100644
index 0000000..fcac638
--- /dev/null
+++ b/src/lang_main/model_loader.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from typing import Literal, overload
+
+import spacy
+from sentence_transformers import SentenceTransformer
+
+from lang_main.types import (
+ LanguageModels,
+ Model,
+ ModelLoaderMap,
+ SpacyModel,
+ STFRDeviceTypes,
+)
+
+
+@overload
+def instantiate_model(
+ model_load_map: ModelLoaderMap,
+ model: Literal[LanguageModels.SENTENCE_TRANSFORMER],
+) -> SentenceTransformer: ...
+
+
+@overload
+def instantiate_model(
+ model_load_map: ModelLoaderMap,
+ model: Literal[LanguageModels.SPACY],
+) -> SpacyModel: ...
+
+
+def instantiate_model(
+ model_load_map: ModelLoaderMap,
+ model: LanguageModels,
+) -> Model:
+ if model not in model_load_map:
+ raise KeyError(f'Model >>{model}<< not known. Choose from: {model_load_map.keys()}')
+ builder_func = model_load_map[model]['func']
+ func_kwargs = model_load_map[model]['kwargs']
+
+ return builder_func(**func_kwargs)
+
+
+def load_spacy(
+ model_name: str,
+) -> SpacyModel:
+ return spacy.load(model_name)
+
+
+def load_sentence_transformer(
+ model_name: str,
+ device: STFRDeviceTypes,
+) -> SentenceTransformer:
+ return SentenceTransformer(model_name_or_path=model_name, device=device)
diff --git a/src/lang_main/pipelines/predefined.py b/src/lang_main/pipelines/predefined.py
index f4d404f..c074ebe 100644
--- a/src/lang_main/pipelines/predefined.py
+++ b/src/lang_main/pipelines/predefined.py
@@ -1,5 +1,6 @@
from pathlib import Path
+from lang_main import model_loader as m_load
from lang_main.analysis import graphs
from lang_main.analysis.preprocessing import (
analyse_feature,
@@ -29,10 +30,9 @@ from lang_main.constants import (
DATE_COLS,
FEATURE_NAME_OBJ_ID,
MODEL_INPUT_FEATURES,
+ MODEL_LOADER_MAP,
NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER,
- SPCY_MODEL,
- STFR_MODEL,
THRESHOLD_AMOUNT_CHARACTERS,
THRESHOLD_EDGE_WEIGHT,
THRESHOLD_NUM_ACTIVITIES,
@@ -43,7 +43,18 @@ from lang_main.constants import (
)
from lang_main.pipelines.base import Pipeline
from lang_main.render import cytoscape as cyto
-from lang_main.types import EntryPoints
+from lang_main.types import EntryPoints, LanguageModels
+
+# ** Models
+STFR_MODEL = m_load.instantiate_model(
+ model_load_map=MODEL_LOADER_MAP,
+ model=LanguageModels.SENTENCE_TRANSFORMER,
+)
+
+SPACY_MODEL = m_load.instantiate_model(
+ model_load_map=MODEL_LOADER_MAP,
+ model=LanguageModels.SPACY,
+)
# ** pipeline configuration
@@ -61,7 +72,7 @@ def build_base_target_feature_pipe() -> Pipeline:
pipe_target_feat.add(
entry_wise_cleansing,
{
- 'target_feature': ('VorgangsBeschreibung',),
+ 'target_features': ('VorgangsBeschreibung',),
'cleansing_func': clean_string_slim,
},
save_result=True,
@@ -106,7 +117,6 @@ def build_base_target_feature_pipe() -> Pipeline:
# ** Merge duplicates
def build_merge_duplicates_pipe() -> Pipeline:
pipe_merge = Pipeline(name='Merge_Duplicates', working_dir=SAVE_PATH_FOLDER)
- # pipe_merge.add(merge_similarity_dupl, save_result=True)
pipe_merge.add(
numeric_pre_filter_feature,
{
@@ -134,7 +144,7 @@ def build_tk_graph_pipe() -> Pipeline:
pipe_token_analysis.add(
build_token_graph,
{
- 'model': SPCY_MODEL,
+ 'model': SPACY_MODEL,
'target_feature': 'entry',
'weights_feature': 'num_occur',
'batch_idx_feature': 'batched_idxs',
diff --git a/src/lang_main/render/cytoscape.py b/src/lang_main/render/cytoscape.py
index d5201a9..f6a83dd 100644
--- a/src/lang_main/render/cytoscape.py
+++ b/src/lang_main/render/cytoscape.py
@@ -14,6 +14,7 @@ from lang_main.constants import (
CYTO_ITER_NEIGHBOUR_DEPTH,
CYTO_LAYOUT_NAME,
CYTO_LAYOUT_PROPERTIES,
+ CYTO_NETWORK_ZOOM_FACTOR,
CYTO_NUMBER_SUBGRAPHS,
CYTO_PATH_STYLESHEET,
CYTO_SANDBOX_NAME,
@@ -125,6 +126,17 @@ def reset_current_network_to_base() -> None:
p4c.set_current_network(CYTO_BASE_NETWORK_NAME)
+def fit_content(
+ zoom_factor: float = CYTO_NETWORK_ZOOM_FACTOR,
+ network_name: str = CYTO_BASE_NETWORK_NAME,
+) -> None:
+ p4c.hide_all_panels()
+ p4c.fit_content(selected_only=False, network=network_name)
+ zoom_current = p4c.get_network_zoom(network=network_name)
+ zoom_new = zoom_current * zoom_factor
+ p4c.set_network_zoom_bypass(zoom_new, bypass=False, network=network_name)
+
+
def export_network_to_image(
filename: str,
target_folder: Path = SAVE_PATH_FOLDER,
@@ -156,9 +168,10 @@ def export_network_to_image(
if filetype == 'SVG':
text_as_font = False
+ # close non-necessary windows and fit graph in frame before image display
+ fit_content(network_name=network_name)
# image is generated in sandbox directory and transferred to target destination
# (preparation for remote instances of Cytoscape)
- # TODO close non-necessary windows before image display
p4c.export_image(
filename=filename,
type=filetype,
@@ -168,7 +181,6 @@ def export_network_to_image(
export_text_as_font=text_as_font,
page_size=pdf_export_page_size,
)
- # TODO change back to Cytoscape 3.10 and above
# TODO remove if Cytoscape >= 3.10.* is running in container
# p4c.export_image(
# filename=filename,
@@ -211,7 +223,7 @@ def layout_network(
logger.debug('Applying layout to network...')
p4c.set_layout_properties(layout_name, layout_properties)
p4c.layout_network(layout_name=layout_name, network=network_name)
- p4c.fit_content(selected_only=False, network=network_name)
+ fit_content(network_name=network_name)
logger.debug('Layout application to network successful.')
@@ -245,7 +257,7 @@ def apply_style_to_network(
"""
logger.debug('Applying style to network...')
styles_avail = cast(list[str], p4c.get_visual_style_names())
- if CYTO_STYLESHEET_NAME not in styles_avail:
+ if style_name not in styles_avail:
if not pth_to_stylesheet.exists():
# existence for standard path verified at import, but not for other
# provided paths
@@ -278,7 +290,7 @@ def apply_style_to_network(
node_size_property,
number_scheme=scheme,
mapping_type='c',
- style_name='lang_main',
+ style_name=style_name,
default_number=min_node_size,
)
p4c.set_node_size_mapping(**node_size_map)
@@ -289,7 +301,7 @@ def apply_style_to_network(
# p4c.set_node_size_bypass(nodes_SUID, new_sizes=min_node_size, network=network_name)
# p4c.set_visual_style(style_name, network=network_name)
# time.sleep(1) # if not waited image export could be without applied style
- p4c.fit_content(selected_only=False, network=network_name)
+ fit_content(network_name=network_name)
logger.debug('Style application to network successful.')
@@ -384,7 +396,7 @@ def make_subnetwork(
network=network_name,
)
p4c.set_current_network(subnetwork_name)
- p4c.fit_content(selected_only=False, network=subnetwork_name)
+
if export_image:
time.sleep(1)
export_network_to_image(
diff --git a/src/lang_main/types.py b/src/lang_main/types.py
index 3e7f21b..ebd5c60 100644
--- a/src/lang_main/types.py
+++ b/src/lang_main/types.py
@@ -1,5 +1,5 @@
import enum
-from collections.abc import Hashable
+from collections.abc import Callable, Hashable
from typing import (
Any,
Literal,
@@ -10,9 +10,20 @@ from typing import (
import numpy as np
from pandas import DataFrame
+from sentence_transformers import SentenceTransformer
+from spacy.language import Language as SpacyModel
from spacy.tokens.doc import Doc as SpacyDoc
+from spacy.tokens.token import Token as SpacyToken
from torch import Tensor
+__all__ = [
+ 'SentenceTransformer',
+ 'SpacyModel',
+ 'SpacyDoc',
+ 'SpacyToken',
+ 'Tensor',
+]
+
# ** logging
class LoggingLevels(enum.IntEnum):
@@ -23,6 +34,24 @@ class LoggingLevels(enum.IntEnum):
CRITICAL = 50
+# ** models
+class LanguageModels(enum.StrEnum):
+ SENTENCE_TRANSFORMER = enum.auto()
+ SPACY = enum.auto()
+
+
+Model: TypeAlias = SentenceTransformer | SpacyModel
+ModelLoaderFunc: TypeAlias = Callable[..., Model]
+
+
+class ModelLoaderInfo(TypedDict):
+ func: ModelLoaderFunc
+ kwargs: dict[str, Any]
+
+
+ModelLoaderMap: TypeAlias = dict[LanguageModels, ModelLoaderInfo]
+
+
# ** devices
class STFRDeviceTypes(enum.StrEnum):
CPU = enum.auto()
diff --git a/test-notebooks/image.png b/test-notebooks/image.png
deleted file mode 100644
index c8863fd..0000000
Binary files a/test-notebooks/image.png and /dev/null differ