{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "9b6daaf7-0e46-4a4d-bfba-6433e41a767e", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "id": "63a28773-c5fe-4eea-906d-1d34c445ed43", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\foersterflorian\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n" ] }, { "data": { "text/plain": [ "[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph.graphml')]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from ihm_analyse.lib import token_analysis as toka\n", "from ihm_analyse.lib.helpers import (\n", " save_pickle, \n", " load_pickle, \n", " create_saving_folder,\n", ")\n", "\n", "from sentence_transformers import SentenceTransformer\n", "import spacy\n", "from pathlib import Path\n", "import networkx as nx\n", "import sys\n", "import numpy as np\n", "\n", "import os\n", "from pathlib import Path\n", "\n", "import networkx as nx\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from ihm_analyse import load_pickle\n", "from ihm_analyse.lib.preprocess import merge_similarity_dupl\n", "from ihm_analyse.lib.graphs import update_graph, get_graph_metadata\n", "\n", "\n", "str_path = os.getcwd()\n", "p = Path(str_path)\n", "folder = list((p / 'results' / 'test_new2').glob('*'))\n", "folder" ] }, { "cell_type": "code", "execution_count": 113, "id": "4ec20b56-521c-4c1c-82e0-af6a00232349", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Path >>./results/Export4/token_analysis/<< already exists and remained unchanged. If you want to overwrite this path, use parameter >>overwrite_existing<<.\n" ] } ], "source": [ "# constants and other pre-defined variables\n", "DATA_SET_ID = 'Export4'\n", "\n", "SAVE_PATH_FOLDER = f'./results/{DATA_SET_ID}/token_analysis/'\n", "create_saving_folder(saving_path_folder=SAVE_PATH_FOLDER)\n", "\n", "path = Path(SAVE_PATH_FOLDER)" ] }, { "cell_type": "code", "execution_count": 95, "id": "2ecb0043-0f6b-49ea-a142-44f9359e66ff", "metadata": {}, "outputs": [], "source": [ "nlp = spacy.load('de_dep_news_trf')" ] }, { "cell_type": "code", "execution_count": 96, "id": "e4a4b656-fc1b-4d00-a357-4d66d10e24ff", "metadata": {}, "outputs": [], "source": [ "test_string = \"\"\"\n", "Das ist ein Test mit mehreren Sätzen. Ich bin so gut aufgelegt, dass ich jetzt einfach die Waschmaschine reparieren muss. \n", "Denn die Waschmaschine zu reparieren, ist eine Lebensaufgabe.\n", "Und in diesem Leben schreibe ich mehrfache Anwendungen, weil ich noch in der Lagerhalle einkaufen muss. Das wird er sicher noch tun müssen.\n", "Die Wartungsaufgabe wurde an Herrn Müller übertragen.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 97, "id": "83c9c539-87ca-4217-b859-1240faeabfd4", "metadata": {}, "outputs": [], "source": [ "doc = nlp(test_string)" ] }, { "cell_type": "code", "execution_count": 98, "id": "ed0a59d0-bcfa-4c5b-9d56-1e826f93e565", "metadata": {}, "outputs": [], "source": [ "G = nx.DiGraph()" ] }, { "cell_type": "code", "execution_count": 99, "id": "b86d898e-5736-4307-8564-e2cd3680adf2", "metadata": {}, "outputs": [], "source": [ "toka.add_doc_info_to_graph(\n", " graph=G,\n", " doc=doc,\n", " weight=10,\n", ")" ] }, { "cell_type": "code", "execution_count": 100, "id": "a92a3ca6-383c-455e-982a-e9f5c39b91ac", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AdjacencyView({'Test': {'Satz': {'weight': 20}}, 'Satz': {}, 'auflegen': {'Waschmaschine': {'weight': 10}}, 'Waschmaschine': {'Lebensaufgabe': {'weight': 10}}, 'reparieren': {'Waschmaschine': {'weight': 20}}, 'Lebensaufgabe': {}, 'schreiben': {'Leben': {'weight': 10}, 'Anwendung': {'weight': 10}, 'Lagerhalle': {'weight': 10}}, 'Leben': {}, 'Anwendung': {}, 'Lagerhalle': {}, 'einkaufen': {'Lagerhalle': {'weight': 10}}, 'Wartungsaufgabe': {'Herr': {'weight': 10}, 'Müller': {'weight': 10}}, 'Herr': {'Müller': {'weight': 20}}, 'Müller': {}, 'übertragen': {'Herr': {'weight': 10}, 'Müller': {'weight': 10}}})" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "G.adj" ] }, { "cell_type": "code", "execution_count": 101, "id": "0d2a115b-ad3f-483e-a297-b00e2a41cae1", "metadata": {}, "outputs": [], "source": [ "path_to_graph_export = SAVE_PATH_FOLDER + 'Directed_Graph_test3.graphml'\n", "nx.write_graphml(G, path_to_graph_export)" ] }, { "cell_type": "code", "execution_count": 102, "id": "6af41720-3371-44d3-84c8-59e2ad767be0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestSatzauflegenWaschmaschinereparierenLebensaufgabeschreibenLebenAnwendungLagerhalleeinkaufenWartungsaufgabeHerrMüllerübertragen
Test0200000000000000
Satz000000000000000
auflegen0001000000000000
Waschmaschine0000010000000000
reparieren0002000000000000
Lebensaufgabe000000000000000
schreiben000000010101000000
Leben000000000000000
Anwendung000000000000000
Lagerhalle000000000000000
einkaufen0000000001000000
Wartungsaufgabe00000000000010100
Herr0000000000000200
Müller000000000000000
übertragen00000000000010100
\n", "
" ], "text/plain": [ " Test Satz auflegen Waschmaschine reparieren \\\n", "Test 0 20 0 0 0 \n", "Satz 0 0 0 0 0 \n", "auflegen 0 0 0 10 0 \n", "Waschmaschine 0 0 0 0 0 \n", "reparieren 0 0 0 20 0 \n", "Lebensaufgabe 0 0 0 0 0 \n", "schreiben 0 0 0 0 0 \n", "Leben 0 0 0 0 0 \n", "Anwendung 0 0 0 0 0 \n", "Lagerhalle 0 0 0 0 0 \n", "einkaufen 0 0 0 0 0 \n", "Wartungsaufgabe 0 0 0 0 0 \n", "Herr 0 0 0 0 0 \n", "Müller 0 0 0 0 0 \n", "übertragen 0 0 0 0 0 \n", "\n", " Lebensaufgabe schreiben Leben Anwendung Lagerhalle \\\n", "Test 0 0 0 0 0 \n", "Satz 0 0 0 0 0 \n", "auflegen 0 0 0 0 0 \n", "Waschmaschine 10 0 0 0 0 \n", "reparieren 0 0 0 0 0 \n", "Lebensaufgabe 0 0 0 0 0 \n", "schreiben 0 0 10 10 10 \n", "Leben 0 0 0 0 0 \n", "Anwendung 0 0 0 0 0 \n", "Lagerhalle 0 0 0 0 0 \n", "einkaufen 0 0 0 0 10 \n", "Wartungsaufgabe 0 0 0 0 0 \n", "Herr 0 0 0 0 0 \n", "Müller 0 0 0 0 0 \n", "übertragen 0 0 0 0 0 \n", "\n", " einkaufen Wartungsaufgabe Herr Müller übertragen \n", "Test 0 0 0 0 0 \n", "Satz 0 0 0 0 0 \n", "auflegen 0 0 0 0 0 \n", "Waschmaschine 0 0 0 0 0 \n", "reparieren 0 0 0 0 0 \n", "Lebensaufgabe 0 0 0 0 0 \n", "schreiben 0 0 0 0 0 \n", "Leben 0 0 0 0 0 \n", "Anwendung 0 0 0 0 0 \n", "Lagerhalle 0 0 0 0 0 \n", "einkaufen 0 0 0 0 0 \n", "Wartungsaufgabe 0 0 10 10 0 \n", "Herr 0 0 0 20 0 \n", "Müller 0 0 0 0 0 \n", "übertragen 0 0 10 10 0 " ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = nx.to_pandas_adjacency(G, dtype=np.uint64)\n", "df" ] }, { "cell_type": "code", "execution_count": 103, "id": "f1be6ab2-a5b5-43ff-93a8-88f874d47e47", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.token_analysis:Successfully converted graph to undirected edges. There are 13 edges in the graph.\n" ] } ], "source": [ "G_undir = toka.convert_graph_to_undirected(graph=G)" ] }, { "cell_type": "code", "execution_count": 89, "id": "93310ec2-72c5-4601-8403-ef023c72c2bb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestSatzauflegenWaschmaschinereparierenLebensaufgabeschreibenLebenAnwendungLagerhalleeinkaufenWartungsaufgabeHerrMüllerübertragen
Test0200000000000000
Satz2000000000000000
auflegen0001000000000000
Waschmaschine001002010000000000
reparieren0002000000000000
Lebensaufgabe0001000000000000
schreiben000000010101000000
Leben0000001000000000
Anwendung0000001000000000
Lagerhalle00000010000100000
einkaufen0000000001000000
Wartungsaufgabe00000000000010100
Herr000000000001002010
Müller000000000001020010
übertragen00000000000010100
\n", "
" ], "text/plain": [ " Test Satz auflegen Waschmaschine reparieren \\\n", "Test 0 20 0 0 0 \n", "Satz 20 0 0 0 0 \n", "auflegen 0 0 0 10 0 \n", "Waschmaschine 0 0 10 0 20 \n", "reparieren 0 0 0 20 0 \n", "Lebensaufgabe 0 0 0 10 0 \n", "schreiben 0 0 0 0 0 \n", "Leben 0 0 0 0 0 \n", "Anwendung 0 0 0 0 0 \n", "Lagerhalle 0 0 0 0 0 \n", "einkaufen 0 0 0 0 0 \n", "Wartungsaufgabe 0 0 0 0 0 \n", "Herr 0 0 0 0 0 \n", "Müller 0 0 0 0 0 \n", "übertragen 0 0 0 0 0 \n", "\n", " Lebensaufgabe schreiben Leben Anwendung Lagerhalle \\\n", "Test 0 0 0 0 0 \n", "Satz 0 0 0 0 0 \n", "auflegen 0 0 0 0 0 \n", "Waschmaschine 10 0 0 0 0 \n", "reparieren 0 0 0 0 0 \n", "Lebensaufgabe 0 0 0 0 0 \n", "schreiben 0 0 10 10 10 \n", "Leben 0 10 0 0 0 \n", "Anwendung 0 10 0 0 0 \n", "Lagerhalle 0 10 0 0 0 \n", "einkaufen 0 0 0 0 10 \n", "Wartungsaufgabe 0 0 0 0 0 \n", "Herr 0 0 0 0 0 \n", "Müller 0 0 0 0 0 \n", "übertragen 0 0 0 0 0 \n", "\n", " einkaufen Wartungsaufgabe Herr Müller übertragen \n", "Test 0 0 0 0 0 \n", "Satz 0 0 0 0 0 \n", "auflegen 0 0 0 0 0 \n", "Waschmaschine 0 0 0 0 0 \n", "reparieren 0 0 0 0 0 \n", "Lebensaufgabe 0 0 0 0 0 \n", "schreiben 0 0 0 0 0 \n", "Leben 0 0 0 0 0 \n", "Anwendung 0 0 0 0 0 \n", "Lagerhalle 10 0 0 0 0 \n", "einkaufen 0 0 0 0 0 \n", "Wartungsaufgabe 0 0 10 10 0 \n", "Herr 0 10 0 20 10 \n", "Müller 0 10 20 0 10 \n", "übertragen 0 0 10 10 0 " ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = nx.to_pandas_adjacency(G_undir, dtype=np.uint64)\n", "df" ] }, { "cell_type": "code", "execution_count": 92, "id": "2eae6efa-22e4-4bbc-9d00-7bd11a934773", "metadata": {}, "outputs": [], "source": [ "path_to_graph_export = SAVE_PATH_FOLDER + 'Undirected_Graph_test3.graphml'\n", "nx.write_graphml(G_undir, path_to_graph_export)" ] }, { "cell_type": "code", "execution_count": null, "id": "58aa35b3-c842-4120-b82c-ce06172ab031", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "6bb0c1d7-0815-465c-bb4a-ed51bb31b436", "metadata": {}, "source": [ "---" ] }, { "cell_type": "code", "execution_count": 114, "id": "14f0ba09-5a47-44ec-b314-0e9a3aef11e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[WindowsPath('results/Export4/Pipe-Preprocess1_Step-3_remove_NA.pkl'),\n", " WindowsPath('results/Export4/Pipe-Preprocess1_Step-5_analyse_feature.pkl'),\n", " WindowsPath('results/Export4/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n", " WindowsPath('results/Export4/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n", " WindowsPath('results/Export4/dupl_idx_pairs.pkl'),\n", " WindowsPath('results/Export4/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(path.parents[0].glob('*.pkl'))" ] }, { "cell_type": "code", "execution_count": 115, "id": "016d60d7-072e-4526-a12a-5254c1b93b39", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded file successfully.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung392163[301, 304, 305, 313, 314, 323, 329, 331, 332, ...27
131Tägliche Überprüfung der Ölabscheider371619[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der WC-Anlagen371265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2681vom Eisenkernvorrichtung (Teil vom Kettenlauf ...1361[515]1
2680Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...2601[311]1
2679Zahnräder der Laufkatze verschlissen Ersatztei...1701[415]1
2677Schalter für Bühne Schwenken abgerissen, bitte...1261[323]1
2676Docke angefahren!171[176]1
\n", "

5090 rows × 5 columns

\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 2163 \n", "131 Tägliche Überprüfung der Ölabscheider 37 1619 \n", "160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n", "140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n", "... ... ... ... \n", "2681 vom Eisenkernvorrichtung (Teil vom Kettenlauf ... 136 1 \n", "2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n", "2676 Docke angefahren! 17 1 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n", "33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 27 \n", "131 [0, 970, 2134, 2137] 4 \n", "160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n", "140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n", "... ... ... \n", "2681 [515] 1 \n", "2680 [311] 1 \n", "2679 [415] 1 \n", "2677 [323] 1 \n", "2676 [176] 1 \n", "\n", "[5090 rows x 5 columns]" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prep_data = load_pickle(path='./results/Export4/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')\n", "prep_data" ] }, { "cell_type": "code", "execution_count": 116, "id": "ab083fac-cd74-4049-8d8e-29e8edbe1cba", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrynum_occur
162Tägliche Wartungstätigkeiten nach Vorgabe des ...92592
33Wöchentliche Sichtkontrolle / Reinigung2163
131Tägliche Überprüfung der Ölabscheider1619
160Wöchentliche Kontrolle der WC-Anlagen1265
140Halbjährliche Kontrolle des Stabbreithalters687
.........
2681vom Eisenkernvorrichtung (Teil vom Kettenlauf ...1
2680Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...1
2679Zahnräder der Laufkatze verschlissen Ersatztei...1
2677Schalter für Bühne Schwenken abgerissen, bitte...1
2676Docke angefahren!1
\n", "

5090 rows × 2 columns

\n", "
" ], "text/plain": [ " entry num_occur\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 92592\n", "33 Wöchentliche Sichtkontrolle / Reinigung 2163\n", "131 Tägliche Überprüfung der Ölabscheider 1619\n", "160 Wöchentliche Kontrolle der WC-Anlagen 1265\n", "140 Halbjährliche Kontrolle des Stabbreithalters 687\n", "... ... ...\n", "2681 vom Eisenkernvorrichtung (Teil vom Kettenlauf ... 1\n", "2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 1\n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 1\n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 1\n", "2676 Docke angefahren! 1\n", "\n", "[5090 rows x 2 columns]" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "description_entries = prep_data[['entry', 'num_occur']]\n", "description_entries" ] }, { "cell_type": "code", "execution_count": 117, "id": "7b3a2590-9629-4b2d-9753-5573b6ad0bdb", "metadata": {}, "outputs": [], "source": [ "data_token_analysis = description_entries.iloc[:1000,:].copy()\n", "data_token_analysis = description_entries.copy()" ] }, { "cell_type": "code", "execution_count": 118, "id": "4f97af36-e386-4009-b4ee-c4c76bfb2a79", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████| 5090/5090 [03:58<00:00, 21.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.token_analysis:Graph properties: 6383 Nodes, 19489 Edges\n", "INFO:ihm_analyse.token_analysis:Node memory: 393.35 KB\n", "INFO:ihm_analyse.token_analysis:Edge memory: 1065.80 KB\n", "INFO:ihm_analyse.token_analysis:Total memory: 1459.16 KB\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "graph = toka.build_token_graph(data=data_token_analysis, model=nlp)" ] }, { "cell_type": "code", "execution_count": 123, "id": "8e3f1e74-4c74-4439-81b3-bcf9e4a522eb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.token_analysis:Successfully converted graph to one with undirected edges.\n", "INFO:ihm_analyse.token_analysis:Undirected Graph properties: 6383 Nodes, 18977 Edges\n" ] } ], "source": [ "path_to_graph_dir_export = SAVE_PATH_FOLDER + 'Directed_Graph_full.graphml'\n", "nx.write_graphml(graph, path_to_graph_dir_export)\n", "path_to_graph_undir_export = SAVE_PATH_FOLDER + 'Undirected_Graph_full.graphml'\n", "graph_undir = toka.convert_graph_to_undirected(graph=graph)\n", "nx.write_graphml(graph_undir, path_to_graph_undir_export)" ] }, { "cell_type": "code", "execution_count": 110, "id": "88bb99ca-d3f3-42b5-a993-423bda6f5d8e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6383" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(graph.nodes)" ] }, { "cell_type": "code", "execution_count": 111, "id": "5a850568-e4ef-45ad-9d95-4b70627a866e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19489" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(graph.edges)" ] }, { "cell_type": "code", "execution_count": null, "id": "5d9ec266-1961-4b5b-8d48-7cce48808d09", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f665fa3a-9622-4cd0-bb7a-122d2c2e2971", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "id": "54b60d01-67ca-4ed7-a0ea-390be8676649", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n", "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "e9372ae3-8a9f-4f27-bd22-6d7e7ff8de9a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "id": "760474bb-4041-440c-a6c7-2f05a53ba990", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph.graphml'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Token_Analysis-TokenGraph.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph-filtered.graphml'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Token_Analysis-TokenGraph-filtered.pickle')]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "from pathlib import Path\n", "\n", "from ihm_analyse import SAVE_PATH_FOLDER, THRESHOLD_EDGE_WEIGHT, TokenGraph, load_pickle\n", "from ihm_analyse.predefined_pipes import pipe_token_analysis\n", "from ihm_analyse.lib.helpers import load_toml_config\n", "\n", "str_path = os.getcwd()\n", "p = Path(str_path)\n", "folder = list((p / 'results' / 'test_new2').glob('*'))\n", "folder" ] }, { "cell_type": "code", "execution_count": 2, "id": "e340f859-173e-4225-86ed-4b59a2e9ee41", "metadata": {}, "outputs": [], "source": [ "def run_token_analysis(\n", " preprocessed_data,\n", "):\n", " (token_graph,) = pipe_token_analysis.run(starting_values=(preprocessed_data,))\n", " token_graph.save_graph(SAVE_PATH_FOLDER, directed=False)\n", " token_graph.to_pickle(SAVE_PATH_FOLDER, \n", " filename=f'{pipe_token_analysis.name}-TokenGraph')\n", " token_graph.save_graph(SAVE_PATH_FOLDER, directed=False)\n", " token_graph.to_pickle(SAVE_PATH_FOLDER, \n", " filename=f'{pipe_token_analysis.name}-TokenGraph')\n", " # filter graph by edge weight and remove single nodes (no connection)\n", " tk_graph_filtered = token_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT)\n", " tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)\n", " tk_graph_filtered.save_graph(SAVE_PATH_FOLDER,\n", " filename='TokenGraph-filtered',\n", " directed=False)\n", " tk_graph_filtered.to_pickle(SAVE_PATH_FOLDER,\n", " filename=f'{pipe_token_analysis.name}-TokenGraph-filtered')\n", " \n", " return token_graph, tk_graph_filtered" ] }, { "cell_type": "code", "execution_count": null, "id": "d4b0937b-620c-4be1-9dd2-cff2fd1c2ef3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 19, "id": "16f2ce70-56b4-4fac-8508-07abd739df1d", "metadata": {}, "outputs": [], "source": [ "str_path = os.getcwd()\n", "p = Path(str_path)\n", "config_p = p / 'ihm_analyse' / 'config.toml'" ] }, { "cell_type": "code", "execution_count": 20, "id": "dc523d1d-c41d-4e87-8364-f8c8da788c3c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/ihm_analyse/config.toml')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config_p" ] }, { "cell_type": "code", "execution_count": 22, "id": "604d03c4-046f-4310-b03c-e16fce277cc3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n" ] }, { "data": { "text/plain": [ "{'common': {'paths': {'results': './results/test_new2/',\n", " 'dataset': './01_2_Rohdaten_neu/Export4.csv'}},\n", " 'export_filenames': {'filename_cossim_filter_candidates': 'CosSim-FilterCandidates'},\n", " 'preprocess': {'date_cols': ['VorgangsDatum',\n", " 'ErledigungsDatum',\n", " 'Arbeitsbeginn',\n", " 'ErstellungsDatum'],\n", " 'threshold_amount_characters': 5,\n", " 'threshold_similarity': 0.8},\n", " 'token_analysis': {'threshold_edge_weight': 150}}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conf = load_toml_config(config_p)\n", "conf" ] }, { "cell_type": "code", "execution_count": 24, "id": "d6cce369-16d4-4fb4-a288-2f05c28404ea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "WindowsPath('A:/Arbeitsaufgaben/Instandhaltung')" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p" ] }, { "cell_type": "code", "execution_count": 28, "id": "4ec47f8a-47ca-43f8-90c2-3f6d03d511cb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "WindowsPath('results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle')" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filename = r'*Pipe-Merge_Duplicates_Step-1*'\n", "folder = list(SAVE_PATH_FOLDER.glob(filename))[0]\n", "folder" ] }, { "cell_type": "code", "execution_count": null, "id": "8b3a098b-62af-4278-a1af-29602195872a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "5bddaca9-9e7d-4af5-9dbe-ffb9e985a846", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "id": "16832937-f237-4938-b698-423e8844331f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded file successfully.\n" ] } ], "source": [ "# dataset\n", "res = load_pickle(folder[-6])\n", "data = res[0]" ] }, { "cell_type": "code", "execution_count": 7, "id": "75410ca5-fb20-4d16-8d9c-ed82b7f918c1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung393111[301, 304, 305, 313, 314, 323, 329, 331, 332, ...74
131Tägliche Überprüfung der Ölabscheider371619[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der WC-Anlagen371265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2680Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...2601[311]1
2679Zahnräder der Laufkatze verschlissen Ersatztei...1701[415]1
2677Schalter für Bühne Schwenken abgerissen, bitte...1261[323]1
2676Docke angefahren!171[176]1
6799Befestigung Deckel für Batteriefach defekt ...1072[306, 326]2
\n", "

4582 rows × 5 columns

\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n", "131 Tägliche Überprüfung der Ölabscheider 37 1619 \n", "160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n", "140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n", "... ... ... ... \n", "2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n", "2676 Docke angefahren! 17 1 \n", "6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n", "33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n", "131 [0, 970, 2134, 2137] 4 \n", "160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n", "140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n", "... ... ... \n", "2680 [311] 1 \n", "2679 [415] 1 \n", "2677 [323] 1 \n", "2676 [176] 1 \n", "6799 [306, 326] 2 \n", "\n", "[4582 rows x 5 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 8, "id": "77e86a89-8d5e-4ac7-8d97-4625dc01c0ad", "metadata": {}, "outputs": [], "source": [ "preprocessed_data_trunc = data[['entry', 'num_occur']].iloc[:20,:].copy()\n", "preprocessed_data_trunc = data[['entry', 'num_occur']].copy()" ] }, { "cell_type": "code", "execution_count": 9, "id": "4db4a705-c277-4c10-b9e4-981ed838c3da", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.pipelines:Starting processing pipeline >>Token_Analysis<<...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████| 4582/4582 [03:36<00:00, 21.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.graphs:Graph properties: 6200 Nodes, 18770 Edges\n", "INFO:ihm_analyse.graphs:Node memory: 381.88 KB\n", "INFO:ihm_analyse.graphs:Edge memory: 1026.48 KB\n", "INFO:ihm_analyse.graphs:Total memory: 1408.36 KB\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.graphs:Successfully converted graph to one with undirected edges.\n", "INFO:ihm_analyse.graphs:Graph properties: 6200 Nodes, 18297 Edges\n", "INFO:ihm_analyse.graphs:Node memory: 381.88 KB\n", "INFO:ihm_analyse.graphs:Edge memory: 1000.62 KB\n", "INFO:ihm_analyse.graphs:Total memory: 1382.50 KB\n", "INFO:ihm_analyse.graphs:Graph properties: 6200 Nodes, 18297 Edges\n", "INFO:ihm_analyse.graphs:Node memory: 381.88 KB\n", "INFO:ihm_analyse.graphs:Edge memory: 1000.62 KB\n", "INFO:ihm_analyse.graphs:Total memory: 1382.50 KB\n", "INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Pipe-Token_Analysis_Step-1_build_token_graph.pickle\n", "INFO:ihm_analyse.pipelines:Processing pipeline >>Token_Analysis<< successfully ended.\n", "INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under results\\test_new2\\TokenGraph.graphml.\n", "INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Token_Analysis-TokenGraph.pickle\n", "INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under results\\test_new2\\TokenGraph.graphml.\n", "INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Token_Analysis-TokenGraph.pickle\n", "INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under results\\test_new2\\TokenGraph-filtered.graphml.\n", "INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Token_Analysis-TokenGraph-filtered.pickle\n" ] } ], "source": [ "tk_graph, tk_graph_filtered = run_token_analysis(preprocessed_data_trunc)" ] }, { "cell_type": "code", "execution_count": 10, "id": "bce025fc-f7c5-4dc9-aecf-111a9970a658", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TokenGraph(name: TokenGraph, number of nodes: 6200, number of edges: 18770)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tk_graph" ] }, { "cell_type": "code", "execution_count": 29, "id": "0f05ead2-1223-4877-961b-f76fe835fbea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num_nodes': 6200,\n", " 'num_edges': 18770,\n", " 'min_edge_weight': 1,\n", " 'max_edge_weight': 92690,\n", " 'node_memory': 391043,\n", " 'edge_memory': 1051120,\n", " 'total_memory': 1442163}" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tk_graph.metadata_directed" ] }, { "cell_type": "code", "execution_count": 30, "id": "c607384d-9636-4bd1-9271-5bc1f63b0f7d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num_nodes': 6200,\n", " 'num_edges': 18297,\n", " 'min_edge_weight': 1,\n", " 'max_edge_weight': 92690,\n", " 'node_memory': 391043,\n", " 'edge_memory': 1024632,\n", " 'total_memory': 1415675}" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tk_graph.metadata_undirected" ] }, { "cell_type": "code", "execution_count": 15, "id": "21281f47-6f01-42e6-ad23-10f408e017f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TokenGraph(name: TokenGraph, number of nodes: 147, number of edges: 179)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tk_graph_filtered" ] }, { "cell_type": "code", "execution_count": 13, "id": "b58c5a82-e1d0-4c96-abe6-8e44e23aa50b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num_nodes': 147,\n", " 'num_edges': 177,\n", " 'min_edge_weight': 153,\n", " 'max_edge_weight': 92690,\n", " 'node_memory': 9487,\n", " 'edge_memory': 9912,\n", " 'total_memory': 19399}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tk_graph_filtered.metadata_undirected" ] }, { "cell_type": "code", "execution_count": null, "id": "73a52733-3249-4c58-a6c3-412c5659911a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "580aca34-5d07-41bf-838d-8d299a107543", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "id": "4ce0491c-2c2b-472f-8974-5af9b8660a37", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph.graphml')]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "folder = list((p / 'results' / 'test_new2').glob('*'))\n", "folder" ] }, { "cell_type": "code", "execution_count": 3, "id": "ddb120e5-f34c-4ed5-8cdd-d2d30c3436f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = folder[-2]\n", "p" ] }, { "cell_type": "code", "execution_count": 4, "id": "3af49383-394f-4a6b-bf11-4da7f4474a6c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded file successfully.\n" ] }, { "data": { "text/plain": [ "(,)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret = load_pickle(p)\n", "ret" ] }, { "cell_type": "code", "execution_count": 5, "id": "390e5dd4-b3ec-4f7e-be0f-639afaa6fba6", "metadata": {}, "outputs": [], "source": [ "tk_graph = ret[0]" ] }, { "cell_type": "code", "execution_count": 9, "id": "967b49c3-11cc-429d-937a-8b746ee40799", "metadata": { "scrolled": true }, "outputs": [], "source": [ "deg_view = tk_graph.degree" ] }, { "cell_type": "code", "execution_count": 14, "id": "6bca0ef4-040c-47f6-8855-7e51ce67d4a5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8\n", "20\n", "2\n", "23\n", "47\n", "123\n" ] } ], "source": [ "for i, view in enumerate(tk_graph.nodes):\n", " print(tk_graph.degree[view])\n", " if i == 5:\n", " break" ] }, { "cell_type": "code", "execution_count": 12, "id": "753217df-a07c-43b5-8feb-eeb2976b9d34", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tk_graph.degree['Wartungstätigkeit']" ] }, { "cell_type": "code", "execution_count": null, "id": "d34e34cf-3dc3-4df7-9758-b84347896536", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }