diff --git a/test-notebooks/Analyse_5-1_Timeline.ipynb b/notebooks/Analyse_5-1_Timeline.ipynb similarity index 100% rename from test-notebooks/Analyse_5-1_Timeline.ipynb rename to notebooks/Analyse_5-1_Timeline.ipynb diff --git a/test-notebooks/Preprocess_Pipeline.ipynb b/notebooks/Preprocess_Pipeline.ipynb similarity index 100% rename from test-notebooks/Preprocess_Pipeline.ipynb rename to notebooks/Preprocess_Pipeline.ipynb diff --git a/test-notebooks/Token_Analysis.ipynb b/notebooks/Token_Analysis.ipynb similarity index 100% rename from test-notebooks/Token_Analysis.ipynb rename to notebooks/Token_Analysis.ipynb diff --git a/test-notebooks/archive/Analyse.ipynb b/notebooks/archive/Analyse.ipynb similarity index 100% rename from test-notebooks/archive/Analyse.ipynb rename to notebooks/archive/Analyse.ipynb diff --git a/test-notebooks/archive/Analyse_2-2.ipynb b/notebooks/archive/Analyse_2-2.ipynb similarity index 100% rename from test-notebooks/archive/Analyse_2-2.ipynb rename to notebooks/archive/Analyse_2-2.ipynb diff --git a/test-notebooks/archive/Analyse_2.ipynb b/notebooks/archive/Analyse_2.ipynb similarity index 100% rename from test-notebooks/archive/Analyse_2.ipynb rename to notebooks/archive/Analyse_2.ipynb diff --git a/test-notebooks/archive/Analyse_3.ipynb b/notebooks/archive/Analyse_3.ipynb similarity index 100% rename from test-notebooks/archive/Analyse_3.ipynb rename to notebooks/archive/Analyse_3.ipynb diff --git a/test-notebooks/archive/Analyse_4-1.ipynb b/notebooks/archive/Analyse_4-1.ipynb similarity index 100% rename from test-notebooks/archive/Analyse_4-1.ipynb rename to notebooks/archive/Analyse_4-1.ipynb diff --git a/test-notebooks/archive/Analyse_4-2.ipynb b/notebooks/archive/Analyse_4-2.ipynb similarity index 100% rename from test-notebooks/archive/Analyse_4-2.ipynb rename to notebooks/archive/Analyse_4-2.ipynb diff --git a/test-notebooks/archive/test_new_dupl_merge.ipynb b/notebooks/archive/test_new_dupl_merge.ipynb similarity index 100% rename from test-notebooks/archive/test_new_dupl_merge.ipynb rename to notebooks/archive/test_new_dupl_merge.ipynb diff --git a/test-notebooks/archive/test_sentence_trf.ipynb b/notebooks/archive/test_sentence_trf.ipynb similarity index 100% rename from test-notebooks/archive/test_sentence_trf.ipynb rename to notebooks/archive/test_sentence_trf.ipynb diff --git a/test-notebooks/display_results.ipynb b/notebooks/display_results.ipynb similarity index 100% rename from test-notebooks/display_results.ipynb rename to notebooks/display_results.ipynb diff --git a/notebooks/dummy_data_generation.ipynb b/notebooks/dummy_data_generation.ipynb new file mode 100644 index 0000000..4ebf4c7 --- /dev/null +++ b/notebooks/dummy_data_generation.ipynb @@ -0,0 +1,1244 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "91e5d121-4267-4ee7-baaa-3cec3da1f869", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from collections import namedtuple\n", + "\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f7c989c6-67e1-4c34-bd08-34d6626cd33a", + "metadata": {}, + "outputs": [], + "source": [ + "N_SAMPLES = 1000\n", + "SEED = 42\n", + "RNG = np.random.default_rng(seed=SEED)\n", + "COLS_DUMMY_DATA = ['type', 'problem', 'action']\n", + "TOTAL_POSSIBILITY_FAILURES = 0.4\n", + "TYPE_MAPPING = {\n", + " 'Reguläre Wartung': 'Wartung',\n", + " 'Unerwarteter Fehler': 'Störungsmeldung',\n", + "}\n", + "OBJ_IDS_2_TXT = {\n", + " 1: 'Fräsmaschine-FS435X',\n", + " 2: 'Schleifmaschine-S4x87',\n", + " 3: 'Bohrbearbeitungszentrum-BBZ35',\n", + "}\n", + "STARTING_DATE = pd.to_datetime('2022-01-01')\n", + "ENDING_DATE = pd.to_datetime('2024-08-07')\n", + "DATASET_FEATURES = [\n", + " 'VorgangsID',\n", + " 'ObjektID',\n", + " 'HObjektText',\n", + " 'VorgangsTypName',\n", + " 'VorgangsBeschreibung',\n", + " 'ErledigungsBeschreibung',\n", + " 'ErstellungsDatum',\n", + " 'VorgangsDatum',\n", + " 'Arbeitsbeginn',\n", + " 'ErledigungsDatum',\n", + "]\n", + "DF_SKELLETON = {feat: [] for feat in DATASET_FEATURES}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0be70014-4fe0-45dd-8bd5-f731bd12cfe1", + "metadata": {}, + "outputs": [], + "source": [ + "source = '../data/Dummy_Data.xlsx'\n", + "dest = f'../data/Dummy_Dataset_N_{N_SAMPLES}.csv'\n", + "pth_source = Path(source)\n", + "pth_dest = Path(dest)\n", + "assert pth_source.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "193304e9-9db1-4697-ae48-836a716ce80e", + "metadata": {}, + "outputs": [], + "source": [ + "def read_dummy_data(pth_data, columns=COLS_DUMMY_DATA):\n", + " data = pd.read_excel(pth_data)\n", + " data.columns = columns.copy()\n", + " \n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "85ac2d6c-4eee-429a-8511-82f39d4e8716", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
typeproblemaction
0Reguläre WartungSchmierung der LagerNachfüllen des Schmiermittels
1Unerwarteter FehlerMotorüberhitzungAustausch des Kühlgebläses
2Reguläre WartungÜberprüfung der HydraulikReinigung und Nachfüllen der Hydraulikflüssigkeit
3Unerwarteter FehlerElektronikfehlerAustausch der defekten Platine
4Reguläre WartungKalibrierung der SensorenJustierung und Test der Sensoren
\n", + "
" + ], + "text/plain": [ + " type problem \\\n", + "0 Reguläre Wartung Schmierung der Lager \n", + "1 Unerwarteter Fehler Motorüberhitzung \n", + "2 Reguläre Wartung Überprüfung der Hydraulik \n", + "3 Unerwarteter Fehler Elektronikfehler \n", + "4 Reguläre Wartung Kalibrierung der Sensoren \n", + "\n", + " action \n", + "0 Nachfüllen des Schmiermittels \n", + "1 Austausch des Kühlgebläses \n", + "2 Reinigung und Nachfüllen der Hydraulikflüssigkeit \n", + "3 Austausch der defekten Platine \n", + "4 Justierung und Test der Sensoren " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = read_dummy_data(pth_source)\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "aa6d3af2-31c7-44ee-a3a8-4201b559038f", + "metadata": {}, + "outputs": [], + "source": [ + "def make_subset(data, target_type, type_mapping=TYPE_MAPPING):\n", + " Entry = namedtuple('ProblemActionPairs', ['type', 'problem', 'action'])\n", + " entries = []\n", + " data_subset = data.loc[data['type']==target_type,:].copy()\n", + "\n", + " for row in data_subset.itertuples(index=False):\n", + " type_mapped = type_mapping[row.type]\n", + " entries.append(Entry(type_mapped, row.problem, row.action))\n", + "\n", + " return entries" + ] + }, + { + "cell_type": "markdown", + "id": "79bb0e96-3e04-458e-bbfb-dbd11a5386b9", + "metadata": {}, + "source": [ + "## Activity Types\n", + "\n", + "- relevant activity types:\n", + " - 'Reparaturauftrag (Portal)'\n", + " - 'Störungsmeldung'\n", + " - 'Wartung'\n", + "- ``regular`` --> 'Wartung'\n", + "- ``failures`` --> 'Störungsmeldung'" + ] + }, + { + "cell_type": "markdown", + "id": "2ec7a69d-80a6-4ede-928f-3ad933d3e090", + "metadata": {}, + "source": [ + "### Failures" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "668c0275-c8d8-4390-8857-a2ada566d786", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ProblemActionPairs(type='Störungsmeldung', problem='Motorüberhitzung', action='Austausch des Kühlgebläses'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Elektronikfehler', action='Austausch der defekten Platine'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Getriebeausfall', action='Reparatur und Austausch der beschädigten Zahnräder'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Leckage in der Hydraulikleitung', action='Abdichtung der Leckstelle und Nachfüllen der Hydraulikflüssigkeit'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Ausfall der Steuerungseinheit', action='Neustart und Software-Update der Steuerungseinheit'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Bruch eines Zahnriemens', action='Austausch des Zahnriemens'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Kurzschluss im Schaltschrank', action='Austausch der Sicherungen und Kabel'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Vibrationsprobleme am Motor', action='Auswuchten des Motors und Austausch der Dämpfer'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Fehlfunktion der Hydraulikpumpe', action='Austausch der Hydraulikpumpe'),\n", + " ProblemActionPairs(type='Störungsmeldung', problem='Bruch eines Förderbands', action='Austausch des Förderbands')]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "failures = make_subset(data, target_type='Unerwarteter Fehler')\n", + "failures[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "4f784dba-5e0a-41aa-9005-3e310fda47cb", + "metadata": {}, + "source": [ + "### Regular Maintenance" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d4b8ca4a-4230-463e-bb74-f965b0732155", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ProblemActionPairs(type='Wartung', problem='Schmierung der Lager', action='Nachfüllen des Schmiermittels'),\n", + " ProblemActionPairs(type='Wartung', problem='Überprüfung der Hydraulik', action='Reinigung und Nachfüllen der Hydraulikflüssigkeit'),\n", + " ProblemActionPairs(type='Wartung', problem='Kalibrierung der Sensoren', action='Justierung und Test der Sensoren'),\n", + " ProblemActionPairs(type='Wartung', problem='Reinigung der Luftfilter', action='Austausch der Luftfilter'),\n", + " ProblemActionPairs(type='Wartung', problem='Überprüfung der Sicherheitsvorrichtungen', action='Funktionstest und Justierung der Sicherheitsvorrichtungen'),\n", + " ProblemActionPairs(type='Wartung', problem='Inspektion der Förderbänder', action='Einstellung und Austausch abgenutzter Teile'),\n", + " ProblemActionPairs(type='Wartung', problem='Überprüfung der Druckventile', action='Reinigung und Einstellung der Druckventile'),\n", + " ProblemActionPairs(type='Wartung', problem='Test der Not-Aus-Schalter', action='Test und Austausch defekter Not-Aus-Schalter'),\n", + " ProblemActionPairs(type='Wartung', problem='Überprüfung der Kühlmittelsysteme', action='Nachfüllen und Entlüftung des Kühlmittelsystems'),\n", + " ProblemActionPairs(type='Wartung', problem='Kontrolle der Lichtschranken', action='Reinigung und Neujustierung der Lichtschranken')]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regular = make_subset(data, target_type='Reguläre Wartung')\n", + "regular[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "aaf5b6bd-a7bf-4c6b-a969-566cd90d2353", + "metadata": {}, + "source": [ + "## ObjectIDs and HObjektTexts" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bf2d380d-4103-40b0-a99d-7770e73a9ef5", + "metadata": {}, + "outputs": [], + "source": [ + "def random_objects(mapping, rng, n_samples):\n", + " max_val = max(mapping.keys())\n", + " rands = rng.integers(1, max_val+1, size=n_samples)\n", + "\n", + " obj_ids = rands.tolist()\n", + " texts =[mapping[obj_id] for obj_id in obj_ids]\n", + "\n", + " return obj_ids, texts" + ] + }, + { + "cell_type": "markdown", + "id": "708521a4-a93c-4d29-9be7-19a29fd8aa7d", + "metadata": {}, + "source": [ + "## Random Dates" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "bff4fc9e-7a61-42df-abcb-1540e2d04b80", + "metadata": {}, + "outputs": [], + "source": [ + "def random_dates(start, end, rng, n_samples):\n", + "\n", + " start_u = start.value//10**9\n", + " end_u = end.value//10**9\n", + " days_to_finish = rng.exponential(1.3, n_samples).astype(np.int_)\n", + " td = pd.to_timedelta(days_to_finish, unit='day')\n", + "\n", + " creation_dates = pd.to_datetime(rng.integers(start_u, end_u, n_samples), unit='s').normalize()\n", + " done_dates = creation_dates + td\n", + "\n", + " return creation_dates.to_list(), done_dates.to_list()" + ] + }, + { + "cell_type": "markdown", + "id": "d2a77202-0eb9-4390-89af-e8e60e5a1e34", + "metadata": {}, + "source": [ + "## Random descriptions" + ] + }, + { + "cell_type": "markdown", + "id": "d360fe54-36f2-4f35-a42c-7ca09a7599c3", + "metadata": {}, + "source": [ + "proportions:\n", + "- regular: 0.6\n", + "- failure: 0.4" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "626a290b-a3ee-4c37-a754-b29ecca59f70", + "metadata": {}, + "outputs": [], + "source": [ + "def random_descriptions(failures, regular, target_prop_fail, rng, n_samples):\n", + " poss_per_entry_fail = target_prop_fail / len(failures)\n", + " poss_per_entry_regular = (1 - target_prop_fail) / len(regular)\n", + "\n", + " failure_possibilities = np.full(len(failures), poss_per_entry_fail)\n", + " regular_possibilities = np.full(len(regular), poss_per_entry_regular)\n", + " possibilities = np.concatenate((failure_possibilities, regular_possibilities))\n", + "\n", + " content_descriptions = failures.copy()\n", + " content_descriptions.extend(regular.copy())\n", + "\n", + " return rng.choice(content_descriptions, size=n_samples, p=possibilities)\n", + "\n", + "def description_parts(descriptions):\n", + " types = descriptions[:,0].tolist()\n", + " todo = descriptions[:,1].tolist()\n", + " dones = descriptions[:,2].tolist()\n", + "\n", + " return types, todo, dones" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8999cba-a460-4f67-901f-b7936f04cd74", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "39f72300-d73b-431f-89ee-af85e7bcdccc", + "metadata": {}, + "source": [ + "# Complete Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8eb838de-d28e-4499-a63a-a708a58e0c6f", + "metadata": {}, + "outputs": [], + "source": [ + "def create_dataset(df_skelleton, type_failure, type_regular, starting_date, ending_date, rng, n_samples):\n", + " df_dict = df_skelleton.copy()\n", + " \n", + " failures = make_subset(data, target_type=type_failure)\n", + " regular = make_subset(data, target_type=type_regular)\n", + " \n", + " event_ids = list(range(1,n_samples+1))\n", + " obj_ids, txts = random_objects(OBJ_IDS_2_TXT, rng, n_samples)\n", + " creation_dates, done_dates = random_dates(starting_date, ending_date, rng, n_samples)\n", + " process_date = creation_dates.copy()\n", + " done_start_date = done_dates.copy()\n", + " descriptions = random_descriptions(failures, regular, TOTAL_POSSIBILITY_FAILURES, rng, n_samples)\n", + " types, todo, dones = description_parts(descriptions)\n", + "\n", + " df_dict.update(\n", + " VorgangsID=event_ids,\n", + " ObjektID=obj_ids,\n", + " HObjektText=txts,\n", + " VorgangsTypName=types,\n", + " VorgangsBeschreibung=todo,\n", + " ErledigungsBeschreibung=dones,\n", + " ErstellungsDatum=creation_dates,\n", + " VorgangsDatum=process_date,\n", + " Arbeitsbeginn=done_start_date,\n", + " ErledigungsDatum=done_dates,\n", + " )\n", + " df = pd.DataFrame.from_dict(df_dict)\n", + " df = df.sort_values(by='ErstellungsDatum', ascending=True)\n", + " df = df.reset_index(drop=True)\n", + " df['VorgangsID'] = event_ids\n", + "\n", + " return df.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "195775b3-e44a-4d80-92bc-799093bd4ef2", + "metadata": {}, + "outputs": [], + "source": [ + "df = create_dataset(\n", + " df_skelleton=DF_SKELLETON,\n", + " type_failure='Unerwarteter Fehler',\n", + " type_regular='Reguläre Wartung',\n", + " starting_date=STARTING_DATE,\n", + " ending_date=ENDING_DATE,\n", + " rng=RNG,\n", + " n_samples=N_SAMPLES,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d3182c98-b57a-4619-aa41-8ab4a90bd1f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VorgangsIDObjektIDHObjektTextVorgangsTypNameVorgangsBeschreibungErledigungsBeschreibungErstellungsDatumVorgangsDatumArbeitsbeginnErledigungsDatum
012Schleifmaschine-S4x87StörungsmeldungÖlleckage durch undichten ÖlsumpfAbdichtung und Austausch des Ölsumpfs2022-01-012022-01-012022-01-012022-01-01
122Schleifmaschine-S4x87WartungÜberprüfung der SchwingungsdämpferAustausch und Justierung der Schwingungsdämpfer2022-01-032022-01-032022-01-032022-01-03
231Fräsmaschine-FS435XWartungÜberprüfung der KühlmittelsystemeNachfüllen und Entlüftung des Kühlmittelsystems2022-01-052022-01-052022-01-052022-01-05
343Bohrbearbeitungszentrum-BBZ35StörungsmeldungBlockierung der FörderschneckeBeseitigung der Blockierung und Überprüfung de...2022-01-062022-01-062022-01-072022-01-07
453Bohrbearbeitungszentrum-BBZ35StörungsmeldungÜberhitzung durch mangelnde KühlmittelzirkulationReinigung der Leitungen und Austausch des Kühl...2022-01-062022-01-062022-01-092022-01-09
.................................
9959961Fräsmaschine-FS435XWartungTest der Not-Aus-SchalterTest und Austausch defekter Not-Aus-Schalter2024-08-032024-08-032024-08-032024-08-03
9969972Schleifmaschine-S4x87StörungsmeldungFehlfunktion der HydraulikpumpeAustausch der Hydraulikpumpe2024-08-052024-08-052024-08-062024-08-06
9979983Bohrbearbeitungszentrum-BBZ35WartungKalibrierung der SensorenJustierung und Test der Sensoren2024-08-052024-08-052024-08-072024-08-07
9989992Schleifmaschine-S4x87WartungÜberprüfung der HydraulikzylinderNachjustierung und Schmierung der Hydraulikzyl...2024-08-052024-08-052024-08-052024-08-05
99910002Schleifmaschine-S4x87WartungInspektion der SchutzabdeckungenReparatur und Austausch beschädigter Abdeckungen2024-08-062024-08-062024-08-072024-08-07
\n", + "

1000 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " VorgangsID ObjektID HObjektText VorgangsTypName \\\n", + "0 1 2 Schleifmaschine-S4x87 Störungsmeldung \n", + "1 2 2 Schleifmaschine-S4x87 Wartung \n", + "2 3 1 Fräsmaschine-FS435X Wartung \n", + "3 4 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n", + "4 5 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n", + ".. ... ... ... ... \n", + "995 996 1 Fräsmaschine-FS435X Wartung \n", + "996 997 2 Schleifmaschine-S4x87 Störungsmeldung \n", + "997 998 3 Bohrbearbeitungszentrum-BBZ35 Wartung \n", + "998 999 2 Schleifmaschine-S4x87 Wartung \n", + "999 1000 2 Schleifmaschine-S4x87 Wartung \n", + "\n", + " VorgangsBeschreibung \\\n", + "0 Ölleckage durch undichten Ölsumpf \n", + "1 Überprüfung der Schwingungsdämpfer \n", + "2 Überprüfung der Kühlmittelsysteme \n", + "3 Blockierung der Förderschnecke \n", + "4 Überhitzung durch mangelnde Kühlmittelzirkulation \n", + ".. ... \n", + "995 Test der Not-Aus-Schalter \n", + "996 Fehlfunktion der Hydraulikpumpe \n", + "997 Kalibrierung der Sensoren \n", + "998 Überprüfung der Hydraulikzylinder \n", + "999 Inspektion der Schutzabdeckungen \n", + "\n", + " ErledigungsBeschreibung ErstellungsDatum \\\n", + "0 Abdichtung und Austausch des Ölsumpfs 2022-01-01 \n", + "1 Austausch und Justierung der Schwingungsdämpfer 2022-01-03 \n", + "2 Nachfüllen und Entlüftung des Kühlmittelsystems 2022-01-05 \n", + "3 Beseitigung der Blockierung und Überprüfung de... 2022-01-06 \n", + "4 Reinigung der Leitungen und Austausch des Kühl... 2022-01-06 \n", + ".. ... ... \n", + "995 Test und Austausch defekter Not-Aus-Schalter 2024-08-03 \n", + "996 Austausch der Hydraulikpumpe 2024-08-05 \n", + "997 Justierung und Test der Sensoren 2024-08-05 \n", + "998 Nachjustierung und Schmierung der Hydraulikzyl... 2024-08-05 \n", + "999 Reparatur und Austausch beschädigter Abdeckungen 2024-08-06 \n", + "\n", + " VorgangsDatum Arbeitsbeginn ErledigungsDatum \n", + "0 2022-01-01 2022-01-01 2022-01-01 \n", + "1 2022-01-03 2022-01-03 2022-01-03 \n", + "2 2022-01-05 2022-01-05 2022-01-05 \n", + "3 2022-01-06 2022-01-07 2022-01-07 \n", + "4 2022-01-06 2022-01-09 2022-01-09 \n", + ".. ... ... ... \n", + "995 2024-08-03 2024-08-03 2024-08-03 \n", + "996 2024-08-05 2024-08-06 2024-08-06 \n", + "997 2024-08-05 2024-08-07 2024-08-07 \n", + "998 2024-08-05 2024-08-05 2024-08-05 \n", + "999 2024-08-06 2024-08-07 2024-08-07 \n", + "\n", + "[1000 rows x 10 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2bb03fdb-ea45-46a1-81b4-525f7568355c", + "metadata": {}, + "outputs": [], + "source": [ + "# df.to_excel(pth_dest)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ff9f6f80-b709-4011-89fe-90c8812d7e7b", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(pth_dest, sep=';', encoding='cp1252', index=False, date_format='%d.%m.%Y')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2871889-f128-419c-8e89-d8eb48ceb2e1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "cdba82ad-d4d1-4266-ad41-8d90bb059956", + "metadata": {}, + "source": [ + "# Check processed data" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "af26cd9b-e5d1-46e1-b269-ac46de10dfe2", + "metadata": {}, + "outputs": [], + "source": [ + "pth_to_data = '../scripts/results/dummy_N_1000/'\n", + "pth_to_data = Path(pth_to_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c7338787-716c-43c0-9d11-03567459f594", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[WindowsPath('../scripts/results/dummy_N_1000/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TIMELINE.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TOKEN_ANALYSIS.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TK-GRAPH_POSTPROCESSING.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph.graphml'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TK-GRAPH_ANALYSIS.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph-filtered.graphml'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TK-GRAPH_ANALYSIS_RESCALED.pkl'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph-directed-rescaled.graphml'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/TokenGraph-undirected-rescaled.graphml'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/token_graph.svg'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_1.svg'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_2.svg'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_3.svg'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_4.svg'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/token_graph_sub_5.svg'),\n", + " WindowsPath('../scripts/results/dummy_N_1000/Pipe-Graph_Static-Rendering_Step-6_build_subnetworks.pkl')]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "files = list(pth_to_data.glob(r'*'))\n", + "files" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1dd0da25-9097-46a1-bac8-dce281e17c5b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", + " _torch_pytree._register_pytree_node(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-08-07 13:37:19 +0000 | lang_main:io:INFO | Loaded TOML config file successfully.\n" + ] + } + ], + "source": [ + "from lang_main import io" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "33ae3e52-f638-40a0-b243-6578cde52a19", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "WindowsPath('../scripts/results/dummy_N_1000/TIMELINE.pkl')" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "files[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "fc598842-f218-4895-8d1e-20b09f9e6d12", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-08-07 13:51:31 +0000 | lang_main:io:INFO | Loaded file successfully.\n" + ] + } + ], + "source": [ + "(data,) = io.load_pickle(files[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "3cbffa6c-4199-4a9f-b041-3c34fdbc7266", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VorgangsIDObjektIDHObjektTextVorgangsTypNameVorgangsBeschreibungErledigungsBeschreibungErstellungsDatumVorgangsDatumArbeitsbeginnErledigungsDatum
012Schleifmaschine-S4x87StörungsmeldungÖlleckage durch undichten ÖlsumpfAbdichtung und Austausch des Ölsumpfs2022-01-012022-01-012022-01-012022-01-01
122Schleifmaschine-S4x87WartungÜberprüfung der SchwingungsdämpferAustausch und Justierung der Schwingungsdämpfer2022-01-032022-01-032022-01-032022-01-03
231Fräsmaschine-FS435XWartungÜberprüfung der KühlmittelsystemeNachfüllen und Entlüftung des Kühlmittelsystems2022-01-052022-01-052022-01-052022-01-05
343Bohrbearbeitungszentrum-BBZ35StörungsmeldungBlockierung der FörderschneckeBeseitigung der Blockierung und Überprüfung de...2022-01-062022-01-062022-01-072022-01-07
453Bohrbearbeitungszentrum-BBZ35StörungsmeldungÜberhitzung durch mangelnde KühlmittelzirkulationReinigung der Leitungen und Austausch des Kühl...2022-01-062022-01-062022-01-092022-01-09
.................................
9959961Fräsmaschine-FS435XWartungTest der Not-Aus-SchalterTest und Austausch defekter Not-Aus-Schalter2024-08-032024-08-032024-08-032024-08-03
9969972Schleifmaschine-S4x87StörungsmeldungFehlfunktion der HydraulikpumpeAustausch der Hydraulikpumpe2024-08-052024-08-052024-08-062024-08-06
9979983Bohrbearbeitungszentrum-BBZ35WartungKalibrierung der SensorenJustierung und Test der Sensoren2024-08-052024-08-052024-08-072024-08-07
9989992Schleifmaschine-S4x87WartungÜberprüfung der HydraulikzylinderNachjustierung und Schmierung der Hydraulikzyl...2024-08-052024-08-052024-08-052024-08-05
99910002Schleifmaschine-S4x87WartungInspektion der SchutzabdeckungenReparatur und Austausch beschädigter Abdeckungen2024-08-062024-08-062024-08-072024-08-07
\n", + "

1000 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " VorgangsID ObjektID HObjektText VorgangsTypName \\\n", + "0 1 2 Schleifmaschine-S4x87 Störungsmeldung \n", + "1 2 2 Schleifmaschine-S4x87 Wartung \n", + "2 3 1 Fräsmaschine-FS435X Wartung \n", + "3 4 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n", + "4 5 3 Bohrbearbeitungszentrum-BBZ35 Störungsmeldung \n", + ".. ... ... ... ... \n", + "995 996 1 Fräsmaschine-FS435X Wartung \n", + "996 997 2 Schleifmaschine-S4x87 Störungsmeldung \n", + "997 998 3 Bohrbearbeitungszentrum-BBZ35 Wartung \n", + "998 999 2 Schleifmaschine-S4x87 Wartung \n", + "999 1000 2 Schleifmaschine-S4x87 Wartung \n", + "\n", + " VorgangsBeschreibung \\\n", + "0 Ölleckage durch undichten Ölsumpf \n", + "1 Überprüfung der Schwingungsdämpfer \n", + "2 Überprüfung der Kühlmittelsysteme \n", + "3 Blockierung der Förderschnecke \n", + "4 Überhitzung durch mangelnde Kühlmittelzirkulation \n", + ".. ... \n", + "995 Test der Not-Aus-Schalter \n", + "996 Fehlfunktion der Hydraulikpumpe \n", + "997 Kalibrierung der Sensoren \n", + "998 Überprüfung der Hydraulikzylinder \n", + "999 Inspektion der Schutzabdeckungen \n", + "\n", + " ErledigungsBeschreibung ErstellungsDatum \\\n", + "0 Abdichtung und Austausch des Ölsumpfs 2022-01-01 \n", + "1 Austausch und Justierung der Schwingungsdämpfer 2022-01-03 \n", + "2 Nachfüllen und Entlüftung des Kühlmittelsystems 2022-01-05 \n", + "3 Beseitigung der Blockierung und Überprüfung de... 2022-01-06 \n", + "4 Reinigung der Leitungen und Austausch des Kühl... 2022-01-06 \n", + ".. ... ... \n", + "995 Test und Austausch defekter Not-Aus-Schalter 2024-08-03 \n", + "996 Austausch der Hydraulikpumpe 2024-08-05 \n", + "997 Justierung und Test der Sensoren 2024-08-05 \n", + "998 Nachjustierung und Schmierung der Hydraulikzyl... 2024-08-05 \n", + "999 Reparatur und Austausch beschädigter Abdeckungen 2024-08-06 \n", + "\n", + " VorgangsDatum Arbeitsbeginn ErledigungsDatum \n", + "0 2022-01-01 2022-01-01 2022-01-01 \n", + "1 2022-01-03 2022-01-03 2022-01-03 \n", + "2 2022-01-05 2022-01-05 2022-01-05 \n", + "3 2022-01-06 2022-01-07 2022-01-07 \n", + "4 2022-01-06 2022-01-09 2022-01-09 \n", + ".. ... ... ... \n", + "995 2024-08-03 2024-08-03 2024-08-03 \n", + "996 2024-08-05 2024-08-06 2024-08-06 \n", + "997 2024-08-05 2024-08-07 2024-08-07 \n", + "998 2024-08-05 2024-08-05 2024-08-05 \n", + "999 2024-08-06 2024-08-07 2024-08-07 \n", + "\n", + "[1000 rows x 10 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "fd422d51-6118-47aa-80a1-6e80819a3205", + "metadata": {}, + "outputs": [], + "source": [ + "t = data.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "4225af01-b9df-4b27-aae2-b06257b0dd3a", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "Can only use .dt accessor with datetimelike values", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[37], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErledigungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdt\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py:6299\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6292\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 6293\u001b[0m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_internal_names_set\n\u001b[0;32m 6294\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata\n\u001b[0;32m 6295\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessors\n\u001b[0;32m 6296\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info_axis\u001b[38;5;241m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[0;32m 6297\u001b[0m ):\n\u001b[0;32m 6298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m[name]\n\u001b[1;32m-> 6299\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\accessor.py:224\u001b[0m, in \u001b[0;36mCachedAccessor.__get__\u001b[1;34m(self, obj, cls)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 222\u001b[0m \u001b[38;5;66;03m# we're accessing the attribute of the class, i.e., Dataset.geo\u001b[39;00m\n\u001b[0;32m 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessor\n\u001b[1;32m--> 224\u001b[0m accessor_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_accessor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# Replace the property with the accessor object. Inspired by:\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# https://www.pydanny.com/cached-property.html\u001b[39;00m\n\u001b[0;32m 227\u001b[0m \u001b[38;5;66;03m# We need to use object.__setattr__ because we overwrite __setattr__ on\u001b[39;00m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;66;03m# NDFrame\u001b[39;00m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28mobject\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__setattr__\u001b[39m(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name, accessor_obj)\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\accessors.py:643\u001b[0m, in \u001b[0;36mCombinedDatetimelikeProperties.__new__\u001b[1;34m(cls, data)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data\u001b[38;5;241m.\u001b[39mdtype, PeriodDtype):\n\u001b[0;32m 641\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PeriodProperties(data, orig)\n\u001b[1;32m--> 643\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan only use .dt accessor with datetimelike values\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mAttributeError\u001b[0m: Can only use .dt accessor with datetimelike values" + ] + } + ], + "source": [ + "t['ErledigungsDatum'].dt" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "9ad24677-b0be-4f4e-9067-b4746e0ba039", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "Can only use .dt accessor with datetimelike values", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[38], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErstellungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdt\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py:6299\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6292\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 6293\u001b[0m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_internal_names_set\n\u001b[0;32m 6294\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata\n\u001b[0;32m 6295\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessors\n\u001b[0;32m 6296\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info_axis\u001b[38;5;241m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[0;32m 6297\u001b[0m ):\n\u001b[0;32m 6298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m[name]\n\u001b[1;32m-> 6299\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\accessor.py:224\u001b[0m, in \u001b[0;36mCachedAccessor.__get__\u001b[1;34m(self, obj, cls)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 222\u001b[0m \u001b[38;5;66;03m# we're accessing the attribute of the class, i.e., Dataset.geo\u001b[39;00m\n\u001b[0;32m 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessor\n\u001b[1;32m--> 224\u001b[0m accessor_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_accessor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# Replace the property with the accessor object. Inspired by:\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# https://www.pydanny.com/cached-property.html\u001b[39;00m\n\u001b[0;32m 227\u001b[0m \u001b[38;5;66;03m# We need to use object.__setattr__ because we overwrite __setattr__ on\u001b[39;00m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;66;03m# NDFrame\u001b[39;00m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28mobject\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__setattr__\u001b[39m(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name, accessor_obj)\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\accessors.py:643\u001b[0m, in \u001b[0;36mCombinedDatetimelikeProperties.__new__\u001b[1;34m(cls, data)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data\u001b[38;5;241m.\u001b[39mdtype, PeriodDtype):\n\u001b[0;32m 641\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PeriodProperties(data, orig)\n\u001b[1;32m--> 643\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan only use .dt accessor with datetimelike values\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mAttributeError\u001b[0m: Can only use .dt accessor with datetimelike values" + ] + } + ], + "source": [ + "t['ErstellungsDatum'].dt" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "de697da1-2a4d-465f-988e-5d0a68840167", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for -: 'str' and 'str'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[1;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\computation\\expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[1;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[0;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[1;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\computation\\expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[1;34m(op, op_str, a, b)\u001b[0m\n\u001b[0;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'str' and 'str'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[36], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m t[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErledigungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mErstellungsDatum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[0;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[1;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\arraylike.py:194\u001b[0m, in \u001b[0;36mOpsMixin.__sub__\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 192\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__sub__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 193\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__sub__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[1;32m--> 194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msub\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[1;34m(self, other, op)\u001b[0m\n\u001b[0;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[0;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[1;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[1;34m(self, other, op)\u001b[0m\n\u001b[0;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[0;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[1;34m(left, right, op)\u001b[0m\n\u001b[0;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[0;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[1;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[1;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[0;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[0;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[0;32m 222\u001b[0m ):\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[1;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\pandas\\core\\ops\\array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[1;34m(x, y, op)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[0;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n", + "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'str' and 'str'" + ] + } + ], + "source": [ + "t['test'] = t['ErledigungsDatum'] - t['ErstellungsDatum']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2b1724e-f48d-41a3-98c6-710bef840ba5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test-notebooks/lang_main.xml b/notebooks/lang_main.xml similarity index 100% rename from test-notebooks/lang_main.xml rename to notebooks/lang_main.xml diff --git a/test-notebooks/lang_main_config.toml b/notebooks/lang_main_config.toml similarity index 100% rename from test-notebooks/lang_main_config.toml rename to notebooks/lang_main_config.toml diff --git a/test-notebooks/misc.ipynb b/notebooks/misc.ipynb similarity index 97% rename from test-notebooks/misc.ipynb rename to notebooks/misc.ipynb index 7d0193d..ae63ff3 100644 --- a/test-notebooks/misc.ipynb +++ b/notebooks/misc.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "d46b6ce8-b51b-49e0-b494-fc24fda0f73f", "metadata": {}, "outputs": [], @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "6fde72d3-b95b-4d37-be71-a7d3661dd3f5", "metadata": {}, "outputs": [ @@ -40,7 +40,7 @@ "'You are connected to Cytoscape!'" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -52,46 +52,41 @@ { "cell_type": "code", "execution_count": null, - "id": "43eeb870-6f97-4029-ac0d-210315ccaabf", + "id": "187ced81-6304-49bd-afc7-c18e656bc9a3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "af118d77-d87a-4687-be5b-e810a24c403e", "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-07-31 06:33:53 +0000 | io:INFO | Loaded TOML config file successfully.\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - " warnings.warn(\n", - "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - " warnings.warn(\n" + " _torch_pytree._register_pytree_node(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-08-07 07:36:54 +0000 | io:INFO | Loaded TOML config file successfully.\n" ] } ], "source": [ "from lang_main import io\n", "from lang_main.analysis.graphs import rescale_edge_weights, get_graph_metadata\n", + "from lang_main import model_loader\n", + "from lang_main.types import LanguageModels\n", + "from lang_main.constants import MODEL_LOADER_MAP\n", "\n", "from pathlib import Path\n", "import pickle\n", @@ -112,6 +107,88 @@ "#p4c.py4cytoscape_logger.detail_logger.addHandler(NullHandler())" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "59cbcf38-6fe1-403b-9c10-f107e28185f0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a33cb410-f774-4cc9-b972-bf05df36d3d7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", + " _torch_pytree._register_pytree_node(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "spacy imports\n", + "end\n", + "Nothing\n", + "2024-08-07 07:51:22 +0000 | io:INFO | Loaded TOML config file successfully.\n" + ] + } + ], + "source": [ + "from lang_main import __init__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f55780a-a91e-49ef-a24f-503eaf2efae8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2d91512f-8976-452e-acc9-4bff3dc33dd1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", + " _torch_pytree._register_pytree_node(\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_loader.instantiate_model(MODEL_LOADER_MAP, LanguageModels.SPACY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fe9337f-a11b-4eab-ae46-a8a4ccf3f461", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 3, @@ -3753,7 +3830,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "id": "fcd9247f-c4f9-4f73-9fd3-2ab56700073f", "metadata": {}, "outputs": [ @@ -3761,23 +3838,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Calling cytoscape_ping()\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀCalling cytoscape_version_info(base_url='http://127.0.0.1:1234/v1')\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀCalling cyrest_get('version', base_url='http://127.0.0.1:1234/v1')\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀHTTP GET(http://127.0.0.1:1234/v1/version)\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀOK[200], content: {\"apiVersion\":\"v1\",\"cytoscapeVersion\":\"3.10.2\"}\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀǀReturning 'cyrest_get': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2'}\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀReturning 'cytoscape_version_info': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2', 'automationAPIVersion': '1.9.0', 'py4cytoscapeVersion': '1.9.0'}\n", - "You are connected to Cytoscape!\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Returning 'cytoscape_ping': 'You are connected to Cytoscape!'\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | --------------------\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Calling cytoscape_version_info()\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀCalling cyrest_get('version', base_url='http://127.0.0.1:1234/v1')\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀHTTP GET(http://127.0.0.1:1234/v1/version)\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀOK[200], content: {\"apiVersion\":\"v1\",\"cytoscapeVersion\":\"3.10.2\"}\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | ǀReturning 'cyrest_get': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2'}\n", - "2024-07-10 11:19:15 +0000 | py4cytoscape_logger:DEBUG | Returning 'cytoscape_version_info': {'apiVersion': 'v1', 'cytoscapeVersion': '3.10.2', 'automationAPIVersion': '1.9.0', 'py4cytoscapeVersion': '1.9.0'}\n", - "2024-07-10 11:19:16 +0000 | py4cytoscape_logger:DEBUG | --------------------\n" + "You are connected to Cytoscape!\n" ] }, { @@ -3789,7 +3850,7 @@ " 'py4cytoscapeVersion': '1.9.0'}" ] }, - "execution_count": 11, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -3803,7 +3864,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "b9290659-e33c-47fc-8d89-7aa3dd6e843a", "metadata": {}, "outputs": [], @@ -3815,7 +3876,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "979d6def-83ac-47f6-ac6f-0d20ddf48d48", "metadata": {}, "outputs": [ @@ -3882,7 +3943,7 @@ "3 node 3 B 5" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -3893,7 +3954,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "81702429-5735-48de-96a4-1f32c7c7d68c", "metadata": {}, "outputs": [ @@ -3965,7 +4026,7 @@ "3 node 2 node 3 interacts 9.9" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -3976,7 +4037,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "6b29d561-fffd-4a5b-91c1-8fb6a075ae4f", "metadata": {}, "outputs": [ @@ -3994,7 +4055,7 @@ "128" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -4003,6 +4064,187 @@ "p4c.create_network_from_data_frames(nodes, edges, title=\"my first network\", collection=\"DataFrame Example\")" ] }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1e87b4d9-6ef0-4108-81ff-e0124e45b793", + "metadata": {}, + "outputs": [], + "source": [ + "p4c.hide_all_panels()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "23706ea9-b661-428e-a4de-ac4543aafc76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p4c.set_network_zoom_bypass(1.5, bypass=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "9c7e7fa0-8b17-43f6-9076-3e117748b06b", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "In cyrest_delete(): Bypass Visual Property does not exist: NETWORK_SCALE_FACTOR\n" + ] + }, + { + "ename": "CyError", + "evalue": "In cyrest_delete(): Bypass Visual Property does not exist: NETWORK_SCALE_FACTOR", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\commands.py:109\u001b[0m, in \u001b[0;36mcyrest_delete\u001b[1;34m(operation, parameters, base_url, require_json)\u001b[0m\n\u001b[0;32m 108\u001b[0m r \u001b[38;5;241m=\u001b[39m _do_request(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDELETE\u001b[39m\u001b[38;5;124m'\u001b[39m, url, params\u001b[38;5;241m=\u001b[39mparameters, base_url\u001b[38;5;241m=\u001b[39mbase_url)\n\u001b[1;32m--> 109\u001b[0m \u001b[43mr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 110\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\requests\\models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n", + "\u001b[1;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: http://127.0.0.1:1234/v1/networks/3004/views/3129/network/NETWORK_SCALE_FACTOR/bypass", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mCyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[25], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mp4c\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclear_network_zoom_bypass\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:133\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 133\u001b[0m \u001b[43mlog_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 134\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 135\u001b[0m log_finally()\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:130\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 128\u001b[0m log_incoming(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 130\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Call function being logged\u001b[39;00m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\style_bypasses.py:2726\u001b[0m, in \u001b[0;36mclear_network_zoom_bypass\u001b[1;34m(network, base_url)\u001b[0m\n\u001b[0;32m 2697\u001b[0m \u001b[38;5;129m@cy_log\u001b[39m\n\u001b[0;32m 2698\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mclear_network_zoom_bypass\u001b[39m(network\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, base_url\u001b[38;5;241m=\u001b[39mDEFAULT_BASE_URL):\n\u001b[0;32m 2699\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Clear the bypass value for the scale factor for the network, effectively restoring prior default values.\u001b[39;00m\n\u001b[0;32m 2700\u001b[0m \n\u001b[0;32m 2701\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2724\u001b[0m \u001b[38;5;124;03m clearing this property will throw an exception.\u001b[39;00m\n\u001b[0;32m 2725\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2726\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mclear_network_property_bypass\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNETWORK_SCALE_FACTOR\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnetwork\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnetwork\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbase_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2727\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:133\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 133\u001b[0m \u001b[43mlog_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 134\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 135\u001b[0m log_finally()\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:130\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 128\u001b[0m log_incoming(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 130\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Call function being logged\u001b[39;00m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\style_bypasses.py:516\u001b[0m, in \u001b[0;36mclear_network_property_bypass\u001b[1;34m(visual_property, network, base_url)\u001b[0m\n\u001b[0;32m 512\u001b[0m view_suid \u001b[38;5;241m=\u001b[39m network_views\u001b[38;5;241m.\u001b[39mget_network_views(net_suid, base_url\u001b[38;5;241m=\u001b[39mbase_url)[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 514\u001b[0m visual_property \u001b[38;5;241m=\u001b[39m normalize_prop_name(visual_property)\n\u001b[1;32m--> 516\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mcommands\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcyrest_delete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnetworks/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnet_suid\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/views/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mview_suid\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/network/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mvisual_property\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/bypass\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[43mbase_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:133\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 133\u001b[0m \u001b[43mlog_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 134\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 135\u001b[0m log_finally()\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\py4cytoscape_logger.py:130\u001b[0m, in \u001b[0;36mcy_log..wrapper_log\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 128\u001b[0m log_incoming(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 130\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Call function being logged\u001b[39;00m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m log_return(func, value)\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\commands.py:118\u001b[0m, in \u001b[0;36mcyrest_delete\u001b[1;34m(operation, parameters, base_url, require_json)\u001b[0m\n\u001b[0;32m 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\u001b[38;5;241m.\u001b[39mtext\n\u001b[0;32m 117\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 118\u001b[0m \u001b[43m_handle_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mA:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\py4cytoscape\\commands.py:683\u001b[0m, in \u001b[0;36m_handle_error\u001b[1;34m(e, force_cy_error)\u001b[0m\n\u001b[0;32m 681\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 682\u001b[0m show_error(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mIn \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcaller\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m--> 683\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", + "\u001b[1;31mCyError\u001b[0m: In cyrest_delete(): Bypass Visual Property does not exist: NETWORK_SCALE_FACTOR" + ] + } + ], + "source": [ + "p4c.clear_network_zoom_bypass()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "234855d1-a961-4dd7-9b2e-a96d7acc1142", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p4c.fit_content()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "79ccddf6-3b0f-4151-a333-05b6e7b5ed8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.631300425888794" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zoom = p4c.get_network_zoom()\n", + "zoom" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "dc8f2bcc-9836-4476-bfa4-1d83308375ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.5660484088532423" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zoom * 0.96" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "3e3106c6-d9f4-41cd-8a5a-452cd32b25c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p4c.set_network_zoom_bypass(zoom * 0.96, bypass=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05a19acc-8af7-45d0-8902-1e9776824a38", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4647c594-46f1-4e12-9927-e73ca5a0486a", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 10, diff --git a/test-notebooks/styles_template.xml b/notebooks/styles_template.xml similarity index 100% rename from test-notebooks/styles_template.xml rename to notebooks/styles_template.xml diff --git a/test-notebooks/timeline_analysis.ipynb b/notebooks/timeline_analysis.ipynb similarity index 100% rename from test-notebooks/timeline_analysis.ipynb rename to notebooks/timeline_analysis.ipynb diff --git a/test-notebooks/truncate_dataset.ipynb b/notebooks/truncate_dataset.ipynb similarity index 100% rename from test-notebooks/truncate_dataset.ipynb rename to notebooks/truncate_dataset.ipynb diff --git a/pdm.lock b/pdm.lock index 20f19b6..c916a9b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,13 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "notebooks", "trials", "trails", "dev"] -strategy = ["cross_platform", "inherit_metadata"] -lock_version = "4.4.2" -content_hash = "sha256:a9f1cc71f6ee89d2f0572ef7254c9f0be702dbd1a4957b2f0d00d3b83ccc20d4" +groups = ["default", "dev", "notebooks", "trails", "trials"] +strategy = ["inherit_metadata"] +lock_version = "4.5.0" +content_hash = "sha256:468a23f2e765abd2cf8760a33a219a4e475f1ebc73630f792eddf6563293720a" + +[[metadata.targets]] +requires_python = ">=3.11" [[package]] name = "annotated-types" @@ -13,6 +16,9 @@ version = "0.6.0" requires_python = ">=3.8" summary = "Reusable constraint types to use with typing.Annotated" groups = ["default"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.9\"", +] files = [ {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, @@ -25,8 +31,10 @@ requires_python = ">=3.8" summary = "High level compatibility layer for multiple asynchronous event loop implementations" groups = ["notebooks"] dependencies = [ + "exceptiongroup>=1.0.2; python_version < \"3.11\"", "idna>=2.8", "sniffio>=1.1", + "typing-extensions>=4.1; python_version < \"3.11\"", ] files = [ {file = "anyio-4.3.0-py3-none-any.whl", hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8"}, @@ -53,6 +61,7 @@ summary = "Argon2 for Python" groups = ["notebooks"] dependencies = [ "argon2-cffi-bindings", + "typing-extensions; python_version < \"3.8\"", ] files = [ {file = "argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea"}, @@ -114,6 +123,7 @@ summary = "Annotate AST trees with source code positions" groups = ["notebooks"] dependencies = [ "six>=1.12.0", + "typing; python_version < \"3.5\"", ] files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -126,6 +136,9 @@ version = "2.0.4" requires_python = ">=3.8" summary = "Simple LRU cache for asyncio" groups = ["notebooks"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.11\"", +] files = [ {file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"}, {file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"}, @@ -137,6 +150,9 @@ version = "23.2.0" requires_python = ">=3.7" summary = "Classes Without Boilerplate" groups = ["notebooks"] +dependencies = [ + "importlib-metadata; python_version < \"3.8\"", +] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -148,6 +164,9 @@ version = "2.15.0" requires_python = ">=3.8" summary = "Internationalization utilities" groups = ["notebooks"] +dependencies = [ + "pytz>=2015.7; python_version < \"3.9\"", +] files = [ {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, @@ -210,6 +229,7 @@ version = "0.7.11" summary = "The Blis BLAS-like linear algebra library, as a self-contained C-extension." groups = ["default"] dependencies = [ + "numpy>=1.15.0; python_version < \"3.9\"", "numpy>=1.19.0; python_version >= \"3.9\"", ] files = [ @@ -232,6 +252,10 @@ version = "2.0.10" requires_python = ">=3.6" summary = "Super lightweight function registries for your library" groups = ["default"] +dependencies = [ + "typing-extensions>=3.6.4; python_version < \"3.8\"", + "zipp>=0.5; python_version < \"3.8\"", +] files = [ {file = "catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f"}, {file = "catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15"}, @@ -342,6 +366,7 @@ summary = "Composable command line interface toolkit" groups = ["default"] dependencies = [ "colorama; platform_system == \"Windows\"", + "importlib-metadata; python_version < \"3.8\"", ] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, @@ -354,6 +379,10 @@ version = "0.16.0" requires_python = ">=3.7" summary = "pathlib-style classes for cloud storage services." groups = ["default"] +dependencies = [ + "importlib-metadata; python_version < \"3.8\"", + "typing-extensions>4; python_version < \"3.11\"", +] files = [ {file = "cloudpathlib-0.16.0-py3-none-any.whl", hash = "sha256:f46267556bf91f03db52b5df7a152548596a15aabca1c8731ef32b0b25a1a6a3"}, {file = "cloudpathlib-0.16.0.tar.gz", hash = "sha256:cdfcd35d46d529587d744154a0bdf962aca953b725c8784cd2ec478354ea63a3"}, @@ -417,6 +446,7 @@ groups = ["default"] dependencies = [ "pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4", "srsly<3.0.0,>=2.4.0", + "typing-extensions<4.5.0,>=3.7.4.1; python_version < \"3.8\"", ] files = [ {file = "confection-0.1.4-py3-none-any.whl", hash = "sha256:a658818d004939069c3e2b3db74a2cb9d956a5e61a1c9ad61788e0ee09a7090f"}, @@ -578,6 +608,17 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +requires_python = ">=3.6" +summary = "An implementation of lxml.xmlfile for the standard library" +groups = ["dev"] +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "executing" version = "2.0.1" @@ -621,6 +662,7 @@ dependencies = [ "Werkzeug>=3.0.0", "blinker>=1.6.2", "click>=8.1.3", + "importlib-metadata>=3.6.0; python_version < \"3.10\"", "itsdangerous>=2.1.2", ] files = [ @@ -634,6 +676,9 @@ version = "1.5.1" requires_python = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" summary = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" groups = ["notebooks"] +dependencies = [ + "cached-property>=1.3.0; python_version < \"3.8\"", +] files = [ {file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"}, {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, @@ -656,6 +701,9 @@ version = "0.14.0" requires_python = ">=3.7" summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" groups = ["notebooks"] +dependencies = [ + "typing-extensions; python_version < \"3.8\"", +] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -773,6 +821,7 @@ requires_python = ">=3.8" summary = "Read metadata from Python packages" groups = ["default"] dependencies = [ + "typing-extensions>=3.6.4; python_version < \"3.8\"", "zipp>=0.5", ] files = [ @@ -829,6 +878,7 @@ groups = ["notebooks"] dependencies = [ "colorama; sys_platform == \"win32\"", "decorator", + "exceptiongroup; python_version < \"3.11\"", "jedi>=0.16", "matplotlib-inline", "pexpect>4.3; sys_platform != \"win32\" and sys_platform != \"emscripten\"", @@ -955,7 +1005,9 @@ summary = "An implementation of JSON Schema validation for Python" groups = ["notebooks"] dependencies = [ "attrs>=22.2.0", + "importlib-resources>=1.4.0; python_version < \"3.9\"", "jsonschema-specifications>=2023.03.6", + "pkgutil-resolve-name>=1.3.10; python_version < \"3.9\"", "referencing>=0.28.4", "rpds-py>=0.7.1", ] @@ -971,6 +1023,7 @@ requires_python = ">=3.8" summary = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" groups = ["notebooks"] dependencies = [ + "importlib-resources>=1.4.0; python_version < \"3.9\"", "referencing>=0.31.0", ] files = [ @@ -1008,6 +1061,7 @@ requires_python = ">=3.8" summary = "Jupyter protocol implementation and client libraries" groups = ["notebooks"] dependencies = [ + "importlib-metadata>=4.8.3; python_version < \"3.10\"", "jupyter-core!=5.0.*,>=4.12", "python-dateutil>=2.8.2", "pyzmq>=23.0", @@ -1062,6 +1116,7 @@ requires_python = ">=3.8" summary = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server" groups = ["notebooks"] dependencies = [ + "importlib-metadata>=4.8.3; python_version < \"3.10\"", "jupyter-server>=1.1.2", ] files = [ @@ -1125,6 +1180,8 @@ groups = ["notebooks"] dependencies = [ "async-lru>=1.0.0", "httpx>=0.25.0", + "importlib-metadata>=4.8.3; python_version < \"3.10\"", + "importlib-resources>=1.4; python_version < \"3.9\"", "ipykernel>=6.5.0", "jinja2>=3.0.3", "jupyter-core", @@ -1134,6 +1191,7 @@ dependencies = [ "notebook-shim>=0.2", "packaging", "setuptools>=40.1.0", + "tomli>=1.2.2; python_version < \"3.11\"", "tornado>=6.2.0", "traitlets", ] @@ -1161,6 +1219,7 @@ summary = "A set of server components for JupyterLab and JupyterLab like applica groups = ["notebooks"] dependencies = [ "babel>=2.10", + "importlib-metadata>=4.8.3; python_version < \"3.10\"", "jinja2>=3.0.3", "json5>=0.9.0", "jsonschema>=4.18.0", @@ -1184,6 +1243,23 @@ files = [ {file = "jupyterlab_widgets-3.0.11.tar.gz", hash = "sha256:dd5ac679593c969af29c9bed054c24f26842baa51352114736756bc035deee27"}, ] +[[package]] +name = "kaleido" +version = "0.2.1" +summary = "Static image export for web-based visualization libraries with zero dependencies" +groups = ["default"] +dependencies = [ + "pathlib; python_version < \"3.4\"", +] +files = [ + {file = "kaleido-0.2.1-py2.py3-none-macosx_10_11_x86_64.whl", hash = "sha256:ca6f73e7ff00aaebf2843f73f1d3bacde1930ef5041093fe76b83a15785049a7"}, + {file = "kaleido-0.2.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bb9a5d1f710357d5d432ee240ef6658a6d124c3e610935817b4b42da9c787c05"}, + {file = "kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:aa21cf1bf1c78f8fa50a9f7d45e1003c387bd3d6fe0a767cfbbf344b95bdc3a8"}, + {file = "kaleido-0.2.1-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:845819844c8082c9469d9c17e42621fbf85c2b237ef8a86ec8a8527f98b6512a"}, + {file = "kaleido-0.2.1-py2.py3-none-win32.whl", hash = "sha256:ecc72635860be616c6b7161807a65c0dbd9b90c6437ac96965831e2e24066552"}, + {file = "kaleido-0.2.1-py2.py3-none-win_amd64.whl", hash = "sha256:4670985f28913c2d063c5734d125ecc28e40810141bdb0a46f15b76c1d45f23c"}, +] + [[package]] name = "langcodes" version = "3.4.0" @@ -1399,6 +1475,7 @@ dependencies = [ "beautifulsoup4", "bleach!=5.0.0", "defusedxml", + "importlib-metadata>=3.6; python_version < \"3.10\"", "jinja2>=3.0", "jupyter-core>=4.7", "jupyterlab-pygments", @@ -1653,12 +1730,29 @@ files = [ {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"}, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +requires_python = ">=3.8" +summary = "A Python library to read/write Excel 2010 xlsx/xlsm files" +groups = ["dev"] +dependencies = [ + "et-xmlfile", +] +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + [[package]] name = "overrides" version = "7.7.0" requires_python = ">=3.6" summary = "A decorator to automatically detect mismatch when overriding a method." groups = ["notebooks"] +dependencies = [ + "typing; python_version < \"3.5\"", +] files = [ {file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"}, {file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"}, @@ -1682,6 +1776,7 @@ requires_python = ">=3.9" summary = "Powerful data structures for data analysis, time series, and statistics" groups = ["default"] dependencies = [ + "numpy>=1.22.4; python_version < \"3.11\"", "numpy>=1.23.2; python_version == \"3.11\"", "numpy>=1.26.0; python_version >= \"3.12\"", "python-dateutil>=2.8.2", @@ -2562,6 +2657,7 @@ dependencies = [ "jinja2", "langcodes<4.0.0,>=3.2.0", "murmurhash<1.1.0,>=0.28.0", + "numpy>=1.15.0; python_version < \"3.9\"", "numpy>=1.19.0; python_version >= \"3.9\"", "packaging>=20.0", "preshed<3.1.0,>=3.0.2", @@ -2654,6 +2750,8 @@ requires_python = ">=3.7" summary = "spaCy pipelines for pre-trained BERT and other transformers" groups = ["default"] dependencies = [ + "dataclasses<1.0,>=0.6; python_version < \"3.7\"", + "numpy>=1.15.0; python_version < \"3.9\"", "numpy>=1.19.0; python_version >= \"3.9\"", "spacy-alignments<1.0.0,>=0.7.2", "spacy<4.1.0,>=3.5.0", @@ -2812,14 +2910,18 @@ dependencies = [ "blis<0.8.0,>=0.7.8", "catalogue<2.1.0,>=2.0.4", "confection<1.0.0,>=0.0.1", + "contextvars<3,>=2.4; python_version < \"3.7\"", "cymem<2.1.0,>=2.0.2", + "dataclasses<1.0,>=0.6; python_version < \"3.7\"", "murmurhash<1.1.0,>=1.0.2", + "numpy>=1.15.0; python_version < \"3.9\"", "numpy>=1.19.0; python_version >= \"3.9\"", "packaging>=20.0", "preshed<3.1.0,>=3.0.2", "pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4", "setuptools", "srsly<3.0.0,>=2.4.0", + "typing-extensions<4.5.0,>=3.7.4.1; python_version < \"3.8\"", "wasabi<1.2.0,>=0.8.1", ] files = [ @@ -3133,6 +3235,7 @@ summary = "A lightweight console printing and formatting toolkit" groups = ["default"] dependencies = [ "colorama>=0.4.6; sys_platform == \"win32\" and python_version >= \"3.7\"", + "typing-extensions<4.5.0,>=3.7.4.1; python_version < \"3.8\"", ] files = [ {file = "wasabi-1.1.2-py3-none-any.whl", hash = "sha256:0a3f933c4bf0ed3f93071132c1b87549733256d6c8de6473c5f7ed2e171b5cf9"}, @@ -3144,6 +3247,9 @@ name = "wcwidth" version = "0.2.13" summary = "Measures the displayed width of unicode strings in a terminal" groups = ["notebooks"] +dependencies = [ + "backports-functools-lru-cache>=1.2.1; python_version < \"3.2\"", +] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, diff --git a/pyproject.toml b/pyproject.toml index 27cd55f..5a7bb6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "dash>=2.17.0", "dash-cytoscape>=1.0.1", "py4cytoscape>=1.9.0", + "kaleido==0.2.1", ] requires-python = ">=3.11" readme = "README.md" @@ -44,6 +45,7 @@ trials = [ ] dev = [ "cython>=3.0.10", + "openpyxl>=3.1.5", ] [tool.ruff] diff --git a/scripts/dash_timeline_static.py b/scripts/dash_timeline_static.py index 6a04ae6..5dc2372 100644 --- a/scripts/dash_timeline_static.py +++ b/scripts/dash_timeline_static.py @@ -3,11 +3,11 @@ import webbrowser from collections.abc import Collection, Iterable from threading import Thread from typing import Any, Final, cast - -import pandas as pd +from pathlib import Path # import dash_cytoscape as cyto import plotly.express as px +import plotly.io from dash import ( Dash, Input, @@ -22,16 +22,17 @@ from pandas import DataFrame from plotly.graph_objects import Figure import lang_main.io +from lang_main import model_loader as m_load from lang_main.analysis import graphs, tokens from lang_main.analysis.timeline import ( calc_delta_to_next_failure, filter_timeline_cands, ) from lang_main.constants import ( + MODEL_LOADER_MAP, NAME_DELTA_FEAT_TO_NEXT_FAILURE, NAME_DELTA_FEAT_TO_REPAIR, SAVE_PATH_FOLDER, - SPCY_MODEL, ) from lang_main.errors import EmptyEdgesError, EmptyGraphError from lang_main.pipelines.predefined import ( @@ -43,10 +44,16 @@ from lang_main.types import ( EntryPoints, HTMLColumns, HTMLTable, + LanguageModels, ObjectID, TimelineCandidates, ) +# ** model +SPACY_MODEL = m_load.instantiate_model( + model_load_map=MODEL_LOADER_MAP, + model=LanguageModels.SPACY, +) # ** data # p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve() p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST) @@ -62,17 +69,42 @@ rescaling_pipe = build_tk_graph_rescaling_pipe( exit_point=EntryPoints.TIMELINE_TK_GRAPH_RESCALED, save_result=False, ) -BASE_NETWORK_NAME: Final[str] = 'test_timeline' +BASE_NETWORK_NAME: Final[str] = 'timeline_candidates' # RENDER_FOLDER: Final[Path] = Path.cwd() / 'assets/' graph_render_pipe = build_tk_graph_render_pipe( with_subgraphs=False, base_network_name=BASE_NETWORK_NAME, ) # PTH_RENDERED_GRAPH = f'assets/{BASE_NETWORK_NAME}.svg' +PTH_RENDERED_TIMELINE = lang_main.io.get_entry_point( + SAVE_PATH_FOLDER, + 'chart_timeline', + file_ext='.svg', + check_existence=False, +) +PTH_TABLE_TIMELINE = lang_main.io.get_entry_point( + SAVE_PATH_FOLDER, + 'table_timeline', + file_ext='.xlsx', + check_existence=False, +) +PTH_RENDERED_DELTA_REPAIR = lang_main.io.get_entry_point( + SAVE_PATH_FOLDER, + 'chart_delta_repair', + file_ext='.svg', + check_existence=False, +) +PTH_TABLE_DELTA_REPAIR = lang_main.io.get_entry_point( + SAVE_PATH_FOLDER, + 'table_delta_repair', + file_ext='.xlsx', + check_existence=False, +) PTH_RENDERED_GRAPH = lang_main.io.get_entry_point( SAVE_PATH_FOLDER, BASE_NETWORK_NAME, file_ext='.svg', + check_existence=False, ) # NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]' @@ -168,7 +200,7 @@ graph_layout = html.Div( id='static-graph-img', alt='static rendered graph', style={ - 'width': 'auto', + 'width': '900px', 'height': 'auto', }, ), @@ -212,7 +244,27 @@ app.layout = html.Div( children=[ html.H3(id='object-text'), dcc.Dropdown(id='selector-candidates'), + html.Button( + 'Download Diagramm', + id='bt-dl-timeline', + style={ + 'marginLeft': 'auto', + 'width': '300px', + 'marginTop': '1em', + }, + ), + dcc.Download(id='dl-timeline'), dcc.Graph(id='figure-occurrences'), + html.Button( + 'Download Diagramm', + id='bt-dl-deltarepair', + style={ + 'marginLeft': 'auto', + 'width': '300px', + 'marginTop': '1em', + }, + ), + dcc.Download(id='dl-deltarepair'), dcc.Graph(id='figure-delta'), ] ), @@ -221,6 +273,16 @@ app.layout = html.Div( html.Div( [ html.H5('Überblick ähnlicher Vorgänge'), + dcc.Download(id='dl-table-timeline'), + html.Button( + 'Download Table', + id='bt-table-timeline', + style={ + 'marginLeft': 'auto', + 'width': '300px', + 'marginTop': '1em', + }, + ), dash_table.DataTable(id='table-candidates'), ], style={'paddingBottom': '1em'}, @@ -233,6 +295,16 @@ app.layout = html.Div( 'bis zum nächsten Ereignis' ) ), + dcc.Download(id='dl-table-deltarepair'), + html.Button( + 'Download Table', + id='bt-table-deltarepair', + style={ + 'marginLeft': 'auto', + 'width': '300px', + 'marginTop': '1em', + }, + ), dash_table.DataTable(id='table-best-actions'), ] ), @@ -368,6 +440,7 @@ def transform_to_HTML_table( date_cols: Iterable[str] | None = None, sorting_feature: str | None = None, sorting_ascending: bool = True, + save_path: Path | None = None, ) -> tuple[HTMLColumns, HTMLTable]: target_features = list(target_features) data = data.copy() @@ -383,6 +456,9 @@ def transform_to_HTML_table( columns = [{'name': col, 'id': col} for col in data.columns] table_data = data.to_dict('records') + if save_path is not None: + data.to_excel(save_path) + return columns, table_data @@ -410,6 +486,7 @@ def update_tables_candidates( date_cols=TABLE_FEATS_DATES, sorting_feature='ErstellungsDatum', sorting_ascending=True, + save_path=PTH_TABLE_TIMELINE, ) # df = df.filter(items=TABLE_FEATS_OVERVIEW, axis=1).sort_values( # by='ErstellungsDatum', ascending=True @@ -430,6 +507,7 @@ def update_tables_candidates( data=cands_best_actions, target_features=TABLE_FEATS_BEST_ACTIONS, date_cols=TABLE_FEATS_DATES, + save_path=PTH_TABLE_DELTA_REPAIR, ) return overview_cols, overview_table, best_actions_cols, best_actions_table @@ -457,7 +535,7 @@ def display_candidates_as_graph(index, obj_id): t1 = time.perf_counter() tk_graph_cands, _ = tokens.build_token_graph( data=df, - model=SPCY_MODEL, + model=SPACY_MODEL, target_feature='VorgangsBeschreibung', build_map=False, logging_graph=False, @@ -496,10 +574,58 @@ def display_candidates_as_graph(index, obj_id): Input('bt-reset', 'n_clicks'), prevent_initial_call=True, ) -def func(n_clicks): +def download_graph(_): return dcc.send_file(path=PTH_RENDERED_GRAPH) +@callback( + Output('dl-timeline', 'data'), + Input('bt-dl-timeline', 'n_clicks'), + State('figure-occurrences', 'figure'), + prevent_initial_call=True, +) +def download_timeline(_, fig: dict): + # add these lines before fig = go.Figure(fig_raw) + if 'rangeslider' in fig['layout']['xaxis']: + del fig['layout']['xaxis']['rangeslider']['yaxis'] + figure = Figure(fig) + figure.write_image(PTH_RENDERED_TIMELINE) + return dcc.send_file(path=PTH_RENDERED_TIMELINE) + + +@callback( + Output('dl-deltarepair', 'data'), + Input('bt-dl-deltarepair', 'n_clicks'), + State('figure-delta', 'figure'), + prevent_initial_call=True, +) +def download_delta_repair(_, fig: dict): + # add these lines before fig = go.Figure(fig_raw) + if 'rangeslider' in fig['layout']['xaxis']: + del fig['layout']['xaxis']['rangeslider']['yaxis'] + figure = Figure(fig) + figure.write_image(PTH_RENDERED_DELTA_REPAIR) + return dcc.send_file(path=PTH_RENDERED_DELTA_REPAIR) + + +@callback( + Output('dl-table-timeline', 'data'), + Input('bt-table-timeline', 'n_clicks'), + prevent_initial_call=True, +) +def download_table_timeline(_): + return dcc.send_file(path=PTH_TABLE_TIMELINE) + + +@callback( + Output('dl-table-deltarepair', 'data'), + Input('bt-table-deltarepair', 'n_clicks'), + prevent_initial_call=True, +) +def download_table_delta_repair(_): + return dcc.send_file(path=PTH_TABLE_DELTA_REPAIR) + + def _start_webbrowser(): host = '127.0.0.1' port = '8050' diff --git a/scripts/lang_main_config.toml b/scripts/lang_main_config.toml index 14699c6..77bd396 100644 --- a/scripts/lang_main_config.toml +++ b/scripts/lang_main_config.toml @@ -2,8 +2,10 @@ [paths] inputs = './inputs/' -results = './results/test_20240619/' -dataset = '../data/02_202307/Export4.csv' +results = './results/dummy_N_1000/' +dataset = '../data/Dummy_Dataset_N_1000.csv' +# results = './results/test_20240807/' +# dataset = '../data/02_202307/Export4.csv' #results = './results/Export7/' #dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv' #results = './results/Export7_trunc/' @@ -12,12 +14,12 @@ dataset = '../data/02_202307/Export4.csv' # only debugging features, production-ready pipelines should always # be fully executed [control] -preprocessing_skip = true -token_analysis_skip = true -graph_postprocessing_skip = true -graph_rescaling_skip = true +preprocessing_skip = false +token_analysis_skip = false +graph_postprocessing_skip = false +graph_rescaling_skip = false graph_static_rendering_skip = false -time_analysis_skip = true +time_analysis_skip = false #[export_filenames] #filename_cossim_filter_candidates = 'CosSim-FilterCandidates' @@ -34,7 +36,7 @@ threshold_amount_characters = 5 threshold_similarity = 0.8 [graph_postprocessing] -threshold_edge_weight = 150 +threshold_edge_weight = 1 [time_analysis.uniqueness] threshold_unique_texts = 4 diff --git a/src/lang_main/__init__.py b/src/lang_main/__init__.py index 6d1d346..f10c95c 100644 --- a/src/lang_main/__init__.py +++ b/src/lang_main/__init__.py @@ -18,7 +18,7 @@ p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler()) # ** lang-main config logging.Formatter.converter = gmtime -LOG_FMT: Final[str] = '%(asctime)s | %(module)s:%(levelname)s | %(message)s' +LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s' LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000' logging.basicConfig( stream=sys.stdout, diff --git a/src/lang_main/analysis/preprocessing.py b/src/lang_main/analysis/preprocessing.py index 9130bb1..b5a3eea 100644 --- a/src/lang_main/analysis/preprocessing.py +++ b/src/lang_main/analysis/preprocessing.py @@ -70,7 +70,7 @@ def load_raw_data( filepath_or_buffer=path, sep=';', encoding='cp1252', - parse_dates=date_cols, + parse_dates=list(date_cols), dayfirst=True, ) logger.info('Loaded dataset successfully.') @@ -278,7 +278,8 @@ def merge_similarity_dupl( return (merged_data,) -##################################################################### +# ** ################################################################################# +# TODO check removal def build_embedding_map( data: Series, model: GermanSpacyModel | SentenceTransformer, diff --git a/src/lang_main/analysis/timeline.py b/src/lang_main/analysis/timeline.py index 4e339a0..9339a7d 100644 --- a/src/lang_main/analysis/timeline.py +++ b/src/lang_main/analysis/timeline.py @@ -8,10 +8,13 @@ from tqdm.auto import tqdm # TODO: check deletion from lang_main.analysis.shared import ( candidates_by_index, entry_wise_cleansing, - pattern_escape_seq_sentences, similar_index_connection_graph, similar_index_groups, ) +from lang_main.constants import ( + NAME_DELTA_FEAT_TO_NEXT_FAILURE, + NAME_DELTA_FEAT_TO_REPAIR, +) from lang_main.loggers import logger_timeline as logger from lang_main.types import ( DataFrameTLFiltered, @@ -94,7 +97,7 @@ def calc_delta_to_repair( data: DataFrame, date_feature_start: str = 'ErstellungsDatum', date_feature_end: str = 'ErledigungsDatum', - name_delta_feature: str = 'delta_to_repair', + name_delta_feature: str = NAME_DELTA_FEAT_TO_REPAIR, convert_to_days: bool = True, ) -> tuple[DataFrame]: logger.info('Calculating time differences between start and end of operations...') @@ -316,7 +319,7 @@ def filter_timeline_cands( def calc_delta_to_next_failure( data: DataFrameTLFiltered, date_feature: str = 'ErstellungsDatum', - name_delta_feature: str = 'delta_to_next_failure', + name_delta_feature: str = NAME_DELTA_FEAT_TO_NEXT_FAILURE, convert_to_days: bool = True, ) -> DataFrameTLFiltered: data = data.copy() diff --git a/src/lang_main/analysis/tokens.py b/src/lang_main/analysis/tokens.py index aaf0534..7e6dedf 100644 --- a/src/lang_main/analysis/tokens.py +++ b/src/lang_main/analysis/tokens.py @@ -5,9 +5,6 @@ from typing import Literal, cast, overload from dateutil.parser import parse from pandas import DataFrame -from spacy.language import Language as GermanSpacyModel -from spacy.tokens.doc import Doc as SpacyDoc -from spacy.tokens.token import Token as SpacyToken from tqdm.auto import tqdm from lang_main.analysis.graphs import ( @@ -15,7 +12,12 @@ from lang_main.analysis.graphs import ( update_graph, ) from lang_main.loggers import logger_token_analysis as logger -from lang_main.types import PandasIndex +from lang_main.types import ( + PandasIndex, + SpacyDoc, + SpacyModel, + SpacyToken, +) # ** POS # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'ADJ', 'VERB', 'AUX']) @@ -147,7 +149,7 @@ def add_doc_info_to_graph( @overload def build_token_graph( data: DataFrame, - model: GermanSpacyModel, + model: SpacyModel, *, target_feature: str = ..., weights_feature: str | None = ..., @@ -161,7 +163,7 @@ def build_token_graph( @overload def build_token_graph( data: DataFrame, - model: GermanSpacyModel, + model: SpacyModel, *, target_feature: str = ..., weights_feature: str | None = ..., @@ -174,7 +176,7 @@ def build_token_graph( def build_token_graph( data: DataFrame, - model: GermanSpacyModel, + model: SpacyModel, *, target_feature: str = 'entry', weights_feature: str | None = None, @@ -233,7 +235,7 @@ def build_token_graph( def build_token_graph_simple( data: DataFrame, - model: GermanSpacyModel, + model: SpacyModel, ) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]: graph = TokenGraph() model_input = cast(tuple[str], tuple(data['entry'].to_list())) @@ -264,7 +266,7 @@ def build_token_graph_simple( def build_token_graph_old( data: DataFrame, - model: GermanSpacyModel, + model: SpacyModel, ) -> tuple[TokenGraph]: # empty NetworkX directed graph # graph = nx.DiGraph() diff --git a/src/lang_main/constants.py b/src/lang_main/constants.py index 9d0e52b..88a789c 100644 --- a/src/lang_main/constants.py +++ b/src/lang_main/constants.py @@ -1,12 +1,19 @@ from pathlib import Path from typing import Final -import spacy -from sentence_transformers import SentenceTransformer -from spacy.language import Language as GermanSpacyModel - +# TODO check removal +# import spacy +# from sentence_transformers import SentenceTransformer +# from spacy.language import Language as GermanSpacyModel from lang_main import CONFIG, CYTO_PATH_STYLESHEET -from lang_main.types import CytoLayoutProperties, CytoLayouts, STFRDeviceTypes +from lang_main import model_loader as m_load +from lang_main.types import ( + CytoLayoutProperties, + CytoLayouts, + LanguageModels, + ModelLoaderMap, + STFRDeviceTypes, +) __all__ = [ 'CONFIG', @@ -38,14 +45,33 @@ SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip'] # ** models -# ** sentence_transformers +# ** loading +SPACY_MODEL_NAME: Final[str] = 'de_dep_news_trf' +STFR_MODEL_NAME: Final[str] = 'sentence-transformers/all-mpnet-base-v2' STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU -STFR_MODEL: Final[SentenceTransformer] = SentenceTransformer( - 'sentence-transformers/all-mpnet-base-v2', device=STFR_DEVICE -) +MODEL_LOADER_MAP: Final[ModelLoaderMap] = { + LanguageModels.SENTENCE_TRANSFORMER: { + 'func': m_load.load_sentence_transformer, + 'kwargs': { + 'model_name': STFR_MODEL_NAME, + 'device': STFR_DEVICE, + }, + }, + LanguageModels.SPACY: { + 'func': m_load.load_spacy, + 'kwargs': { + 'model_name': SPACY_MODEL_NAME, + }, + }, +} +# ** sentence_transformers + +# STFR_MODEL: Final[SentenceTransformer] = SentenceTransformer( +# 'sentence-transformers/all-mpnet-base-v2', device=STFR_DEVICE +# ) # ** spacy -SPCY_MODEL: Final[GermanSpacyModel] = spacy.load('de_dep_news_trf') +# SPCY_MODEL: Final[GermanSpacyModel] = spacy.load('de_dep_news_trf') # ** export # ** preprocessing @@ -82,6 +108,7 @@ CYTO_STYLESHEET_NAME: Final[str] = 'lang_main' CYTO_SELECTION_PROPERTY: Final[str] = 'node_selection' CYTO_NUMBER_SUBGRAPHS: Final[int] = 5 CYTO_ITER_NEIGHBOUR_DEPTH: Final[int] = 2 +CYTO_NETWORK_ZOOM_FACTOR: Final[float] = 0.96 # ** time_analysis.uniqueness THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['uniqueness'][ diff --git a/src/lang_main/cytoscape_config/template_test.cys b/src/lang_main/cytoscape_config/template_test.cys index d245d1e..8cb5259 100644 Binary files a/src/lang_main/cytoscape_config/template_test.cys and b/src/lang_main/cytoscape_config/template_test.cys differ diff --git a/src/lang_main/io.py b/src/lang_main/io.py index 21322cd..402323e 100644 --- a/src/lang_main/io.py +++ b/src/lang_main/io.py @@ -93,9 +93,10 @@ def get_entry_point( saving_path: Path, filename: str, file_ext: str = '.pkl', + check_existence: bool = True, ) -> Path: entry_point_path = (saving_path / filename).with_suffix(file_ext) - if not entry_point_path.exists(): + if check_existence and not entry_point_path.exists(): raise FileNotFoundError( f'Could not find provided entry data under path: >>{entry_point_path}<<' ) diff --git a/src/lang_main/model_loader.py b/src/lang_main/model_loader.py new file mode 100644 index 0000000..fcac638 --- /dev/null +++ b/src/lang_main/model_loader.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from typing import Literal, overload + +import spacy +from sentence_transformers import SentenceTransformer + +from lang_main.types import ( + LanguageModels, + Model, + ModelLoaderMap, + SpacyModel, + STFRDeviceTypes, +) + + +@overload +def instantiate_model( + model_load_map: ModelLoaderMap, + model: Literal[LanguageModels.SENTENCE_TRANSFORMER], +) -> SentenceTransformer: ... + + +@overload +def instantiate_model( + model_load_map: ModelLoaderMap, + model: Literal[LanguageModels.SPACY], +) -> SpacyModel: ... + + +def instantiate_model( + model_load_map: ModelLoaderMap, + model: LanguageModels, +) -> Model: + if model not in model_load_map: + raise KeyError(f'Model >>{model}<< not known. Choose from: {model_load_map.keys()}') + builder_func = model_load_map[model]['func'] + func_kwargs = model_load_map[model]['kwargs'] + + return builder_func(**func_kwargs) + + +def load_spacy( + model_name: str, +) -> SpacyModel: + return spacy.load(model_name) + + +def load_sentence_transformer( + model_name: str, + device: STFRDeviceTypes, +) -> SentenceTransformer: + return SentenceTransformer(model_name_or_path=model_name, device=device) diff --git a/src/lang_main/pipelines/predefined.py b/src/lang_main/pipelines/predefined.py index f4d404f..c074ebe 100644 --- a/src/lang_main/pipelines/predefined.py +++ b/src/lang_main/pipelines/predefined.py @@ -1,5 +1,6 @@ from pathlib import Path +from lang_main import model_loader as m_load from lang_main.analysis import graphs from lang_main.analysis.preprocessing import ( analyse_feature, @@ -29,10 +30,9 @@ from lang_main.constants import ( DATE_COLS, FEATURE_NAME_OBJ_ID, MODEL_INPUT_FEATURES, + MODEL_LOADER_MAP, NAME_DELTA_FEAT_TO_REPAIR, SAVE_PATH_FOLDER, - SPCY_MODEL, - STFR_MODEL, THRESHOLD_AMOUNT_CHARACTERS, THRESHOLD_EDGE_WEIGHT, THRESHOLD_NUM_ACTIVITIES, @@ -43,7 +43,18 @@ from lang_main.constants import ( ) from lang_main.pipelines.base import Pipeline from lang_main.render import cytoscape as cyto -from lang_main.types import EntryPoints +from lang_main.types import EntryPoints, LanguageModels + +# ** Models +STFR_MODEL = m_load.instantiate_model( + model_load_map=MODEL_LOADER_MAP, + model=LanguageModels.SENTENCE_TRANSFORMER, +) + +SPACY_MODEL = m_load.instantiate_model( + model_load_map=MODEL_LOADER_MAP, + model=LanguageModels.SPACY, +) # ** pipeline configuration @@ -61,7 +72,7 @@ def build_base_target_feature_pipe() -> Pipeline: pipe_target_feat.add( entry_wise_cleansing, { - 'target_feature': ('VorgangsBeschreibung',), + 'target_features': ('VorgangsBeschreibung',), 'cleansing_func': clean_string_slim, }, save_result=True, @@ -106,7 +117,6 @@ def build_base_target_feature_pipe() -> Pipeline: # ** Merge duplicates def build_merge_duplicates_pipe() -> Pipeline: pipe_merge = Pipeline(name='Merge_Duplicates', working_dir=SAVE_PATH_FOLDER) - # pipe_merge.add(merge_similarity_dupl, save_result=True) pipe_merge.add( numeric_pre_filter_feature, { @@ -134,7 +144,7 @@ def build_tk_graph_pipe() -> Pipeline: pipe_token_analysis.add( build_token_graph, { - 'model': SPCY_MODEL, + 'model': SPACY_MODEL, 'target_feature': 'entry', 'weights_feature': 'num_occur', 'batch_idx_feature': 'batched_idxs', diff --git a/src/lang_main/render/cytoscape.py b/src/lang_main/render/cytoscape.py index d5201a9..f6a83dd 100644 --- a/src/lang_main/render/cytoscape.py +++ b/src/lang_main/render/cytoscape.py @@ -14,6 +14,7 @@ from lang_main.constants import ( CYTO_ITER_NEIGHBOUR_DEPTH, CYTO_LAYOUT_NAME, CYTO_LAYOUT_PROPERTIES, + CYTO_NETWORK_ZOOM_FACTOR, CYTO_NUMBER_SUBGRAPHS, CYTO_PATH_STYLESHEET, CYTO_SANDBOX_NAME, @@ -125,6 +126,17 @@ def reset_current_network_to_base() -> None: p4c.set_current_network(CYTO_BASE_NETWORK_NAME) +def fit_content( + zoom_factor: float = CYTO_NETWORK_ZOOM_FACTOR, + network_name: str = CYTO_BASE_NETWORK_NAME, +) -> None: + p4c.hide_all_panels() + p4c.fit_content(selected_only=False, network=network_name) + zoom_current = p4c.get_network_zoom(network=network_name) + zoom_new = zoom_current * zoom_factor + p4c.set_network_zoom_bypass(zoom_new, bypass=False, network=network_name) + + def export_network_to_image( filename: str, target_folder: Path = SAVE_PATH_FOLDER, @@ -156,9 +168,10 @@ def export_network_to_image( if filetype == 'SVG': text_as_font = False + # close non-necessary windows and fit graph in frame before image display + fit_content(network_name=network_name) # image is generated in sandbox directory and transferred to target destination # (preparation for remote instances of Cytoscape) - # TODO close non-necessary windows before image display p4c.export_image( filename=filename, type=filetype, @@ -168,7 +181,6 @@ def export_network_to_image( export_text_as_font=text_as_font, page_size=pdf_export_page_size, ) - # TODO change back to Cytoscape 3.10 and above # TODO remove if Cytoscape >= 3.10.* is running in container # p4c.export_image( # filename=filename, @@ -211,7 +223,7 @@ def layout_network( logger.debug('Applying layout to network...') p4c.set_layout_properties(layout_name, layout_properties) p4c.layout_network(layout_name=layout_name, network=network_name) - p4c.fit_content(selected_only=False, network=network_name) + fit_content(network_name=network_name) logger.debug('Layout application to network successful.') @@ -245,7 +257,7 @@ def apply_style_to_network( """ logger.debug('Applying style to network...') styles_avail = cast(list[str], p4c.get_visual_style_names()) - if CYTO_STYLESHEET_NAME not in styles_avail: + if style_name not in styles_avail: if not pth_to_stylesheet.exists(): # existence for standard path verified at import, but not for other # provided paths @@ -278,7 +290,7 @@ def apply_style_to_network( node_size_property, number_scheme=scheme, mapping_type='c', - style_name='lang_main', + style_name=style_name, default_number=min_node_size, ) p4c.set_node_size_mapping(**node_size_map) @@ -289,7 +301,7 @@ def apply_style_to_network( # p4c.set_node_size_bypass(nodes_SUID, new_sizes=min_node_size, network=network_name) # p4c.set_visual_style(style_name, network=network_name) # time.sleep(1) # if not waited image export could be without applied style - p4c.fit_content(selected_only=False, network=network_name) + fit_content(network_name=network_name) logger.debug('Style application to network successful.') @@ -384,7 +396,7 @@ def make_subnetwork( network=network_name, ) p4c.set_current_network(subnetwork_name) - p4c.fit_content(selected_only=False, network=subnetwork_name) + if export_image: time.sleep(1) export_network_to_image( diff --git a/src/lang_main/types.py b/src/lang_main/types.py index 3e7f21b..ebd5c60 100644 --- a/src/lang_main/types.py +++ b/src/lang_main/types.py @@ -1,5 +1,5 @@ import enum -from collections.abc import Hashable +from collections.abc import Callable, Hashable from typing import ( Any, Literal, @@ -10,9 +10,20 @@ from typing import ( import numpy as np from pandas import DataFrame +from sentence_transformers import SentenceTransformer +from spacy.language import Language as SpacyModel from spacy.tokens.doc import Doc as SpacyDoc +from spacy.tokens.token import Token as SpacyToken from torch import Tensor +__all__ = [ + 'SentenceTransformer', + 'SpacyModel', + 'SpacyDoc', + 'SpacyToken', + 'Tensor', +] + # ** logging class LoggingLevels(enum.IntEnum): @@ -23,6 +34,24 @@ class LoggingLevels(enum.IntEnum): CRITICAL = 50 +# ** models +class LanguageModels(enum.StrEnum): + SENTENCE_TRANSFORMER = enum.auto() + SPACY = enum.auto() + + +Model: TypeAlias = SentenceTransformer | SpacyModel +ModelLoaderFunc: TypeAlias = Callable[..., Model] + + +class ModelLoaderInfo(TypedDict): + func: ModelLoaderFunc + kwargs: dict[str, Any] + + +ModelLoaderMap: TypeAlias = dict[LanguageModels, ModelLoaderInfo] + + # ** devices class STFRDeviceTypes(enum.StrEnum): CPU = enum.auto() diff --git a/test-notebooks/image.png b/test-notebooks/image.png deleted file mode 100644 index c8863fd..0000000 Binary files a/test-notebooks/image.png and /dev/null differ