2276 lines
73 KiB
Plaintext
2276 lines
73 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "9b6daaf7-0e46-4a4d-bfba-6433e41a767e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%load_ext autoreload\n",
|
||
"%autoreload 2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "63a28773-c5fe-4eea-906d-1d34c445ed43",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\foersterflorian\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph.graphml')]"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"from ihm_analyse.lib import token_analysis as toka\n",
|
||
"from ihm_analyse.lib.helpers import (\n",
|
||
" save_pickle, \n",
|
||
" load_pickle, \n",
|
||
" create_saving_folder,\n",
|
||
")\n",
|
||
"\n",
|
||
"from sentence_transformers import SentenceTransformer\n",
|
||
"import spacy\n",
|
||
"from pathlib import Path\n",
|
||
"import networkx as nx\n",
|
||
"import sys\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"import os\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"import networkx as nx\n",
|
||
"import numpy as np\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"from ihm_analyse import load_pickle\n",
|
||
"from ihm_analyse.lib.preprocess import merge_similarity_dupl\n",
|
||
"from ihm_analyse.lib.graphs import update_graph, get_graph_metadata\n",
|
||
"\n",
|
||
"\n",
|
||
"str_path = os.getcwd()\n",
|
||
"p = Path(str_path)\n",
|
||
"folder = list((p / 'results' / 'test_new2').glob('*'))\n",
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"id": "4ec20b56-521c-4c1c-82e0-af6a00232349",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Path >>./results/Export4/token_analysis/<< already exists and remained unchanged. If you want to overwrite this path, use parameter >>overwrite_existing<<.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# constants and other pre-defined variables\n",
|
||
"DATA_SET_ID = 'Export4'\n",
|
||
"\n",
|
||
"SAVE_PATH_FOLDER = f'./results/{DATA_SET_ID}/token_analysis/'\n",
|
||
"create_saving_folder(saving_path_folder=SAVE_PATH_FOLDER)\n",
|
||
"\n",
|
||
"path = Path(SAVE_PATH_FOLDER)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"id": "2ecb0043-0f6b-49ea-a142-44f9359e66ff",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"nlp = spacy.load('de_dep_news_trf')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"id": "e4a4b656-fc1b-4d00-a357-4d66d10e24ff",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_string = \"\"\"\n",
|
||
"Das ist ein Test mit mehreren Sätzen. Ich bin so gut aufgelegt, dass ich jetzt einfach die Waschmaschine reparieren muss. \n",
|
||
"Denn die Waschmaschine zu reparieren, ist eine Lebensaufgabe.\n",
|
||
"Und in diesem Leben schreibe ich mehrfache Anwendungen, weil ich noch in der Lagerhalle einkaufen muss. Das wird er sicher noch tun müssen.\n",
|
||
"Die Wartungsaufgabe wurde an Herrn Müller übertragen.\n",
|
||
"\"\"\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"id": "83c9c539-87ca-4217-b859-1240faeabfd4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"doc = nlp(test_string)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "ed0a59d0-bcfa-4c5b-9d56-1e826f93e565",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"G = nx.DiGraph()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"id": "b86d898e-5736-4307-8564-e2cd3680adf2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"toka.add_doc_info_to_graph(\n",
|
||
" graph=G,\n",
|
||
" doc=doc,\n",
|
||
" weight=10,\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"id": "a92a3ca6-383c-455e-982a-e9f5c39b91ac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"AdjacencyView({'Test': {'Satz': {'weight': 20}}, 'Satz': {}, 'auflegen': {'Waschmaschine': {'weight': 10}}, 'Waschmaschine': {'Lebensaufgabe': {'weight': 10}}, 'reparieren': {'Waschmaschine': {'weight': 20}}, 'Lebensaufgabe': {}, 'schreiben': {'Leben': {'weight': 10}, 'Anwendung': {'weight': 10}, 'Lagerhalle': {'weight': 10}}, 'Leben': {}, 'Anwendung': {}, 'Lagerhalle': {}, 'einkaufen': {'Lagerhalle': {'weight': 10}}, 'Wartungsaufgabe': {'Herr': {'weight': 10}, 'Müller': {'weight': 10}}, 'Herr': {'Müller': {'weight': 20}}, 'Müller': {}, 'übertragen': {'Herr': {'weight': 10}, 'Müller': {'weight': 10}}})"
|
||
]
|
||
},
|
||
"execution_count": 100,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"G.adj"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"id": "0d2a115b-ad3f-483e-a297-b00e2a41cae1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"path_to_graph_export = SAVE_PATH_FOLDER + 'Directed_Graph_test3.graphml'\n",
|
||
"nx.write_graphml(G, path_to_graph_export)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"id": "6af41720-3371-44d3-84c8-59e2ad767be0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Test</th>\n",
|
||
" <th>Satz</th>\n",
|
||
" <th>auflegen</th>\n",
|
||
" <th>Waschmaschine</th>\n",
|
||
" <th>reparieren</th>\n",
|
||
" <th>Lebensaufgabe</th>\n",
|
||
" <th>schreiben</th>\n",
|
||
" <th>Leben</th>\n",
|
||
" <th>Anwendung</th>\n",
|
||
" <th>Lagerhalle</th>\n",
|
||
" <th>einkaufen</th>\n",
|
||
" <th>Wartungsaufgabe</th>\n",
|
||
" <th>Herr</th>\n",
|
||
" <th>Müller</th>\n",
|
||
" <th>übertragen</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Test</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Satz</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>auflegen</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Waschmaschine</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>reparieren</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Lebensaufgabe</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>schreiben</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Leben</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Anwendung</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Lagerhalle</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>einkaufen</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Wartungsaufgabe</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Herr</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Müller</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>übertragen</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Test Satz auflegen Waschmaschine reparieren \\\n",
|
||
"Test 0 20 0 0 0 \n",
|
||
"Satz 0 0 0 0 0 \n",
|
||
"auflegen 0 0 0 10 0 \n",
|
||
"Waschmaschine 0 0 0 0 0 \n",
|
||
"reparieren 0 0 0 20 0 \n",
|
||
"Lebensaufgabe 0 0 0 0 0 \n",
|
||
"schreiben 0 0 0 0 0 \n",
|
||
"Leben 0 0 0 0 0 \n",
|
||
"Anwendung 0 0 0 0 0 \n",
|
||
"Lagerhalle 0 0 0 0 0 \n",
|
||
"einkaufen 0 0 0 0 0 \n",
|
||
"Wartungsaufgabe 0 0 0 0 0 \n",
|
||
"Herr 0 0 0 0 0 \n",
|
||
"Müller 0 0 0 0 0 \n",
|
||
"übertragen 0 0 0 0 0 \n",
|
||
"\n",
|
||
" Lebensaufgabe schreiben Leben Anwendung Lagerhalle \\\n",
|
||
"Test 0 0 0 0 0 \n",
|
||
"Satz 0 0 0 0 0 \n",
|
||
"auflegen 0 0 0 0 0 \n",
|
||
"Waschmaschine 10 0 0 0 0 \n",
|
||
"reparieren 0 0 0 0 0 \n",
|
||
"Lebensaufgabe 0 0 0 0 0 \n",
|
||
"schreiben 0 0 10 10 10 \n",
|
||
"Leben 0 0 0 0 0 \n",
|
||
"Anwendung 0 0 0 0 0 \n",
|
||
"Lagerhalle 0 0 0 0 0 \n",
|
||
"einkaufen 0 0 0 0 10 \n",
|
||
"Wartungsaufgabe 0 0 0 0 0 \n",
|
||
"Herr 0 0 0 0 0 \n",
|
||
"Müller 0 0 0 0 0 \n",
|
||
"übertragen 0 0 0 0 0 \n",
|
||
"\n",
|
||
" einkaufen Wartungsaufgabe Herr Müller übertragen \n",
|
||
"Test 0 0 0 0 0 \n",
|
||
"Satz 0 0 0 0 0 \n",
|
||
"auflegen 0 0 0 0 0 \n",
|
||
"Waschmaschine 0 0 0 0 0 \n",
|
||
"reparieren 0 0 0 0 0 \n",
|
||
"Lebensaufgabe 0 0 0 0 0 \n",
|
||
"schreiben 0 0 0 0 0 \n",
|
||
"Leben 0 0 0 0 0 \n",
|
||
"Anwendung 0 0 0 0 0 \n",
|
||
"Lagerhalle 0 0 0 0 0 \n",
|
||
"einkaufen 0 0 0 0 0 \n",
|
||
"Wartungsaufgabe 0 0 10 10 0 \n",
|
||
"Herr 0 0 0 20 0 \n",
|
||
"Müller 0 0 0 0 0 \n",
|
||
"übertragen 0 0 10 10 0 "
|
||
]
|
||
},
|
||
"execution_count": 102,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = nx.to_pandas_adjacency(G, dtype=np.uint64)\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"id": "f1be6ab2-a5b5-43ff-93a8-88f874d47e47",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.token_analysis:Successfully converted graph to undirected edges. There are 13 edges in the graph.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"G_undir = toka.convert_graph_to_undirected(graph=G)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"id": "93310ec2-72c5-4601-8403-ef023c72c2bb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Test</th>\n",
|
||
" <th>Satz</th>\n",
|
||
" <th>auflegen</th>\n",
|
||
" <th>Waschmaschine</th>\n",
|
||
" <th>reparieren</th>\n",
|
||
" <th>Lebensaufgabe</th>\n",
|
||
" <th>schreiben</th>\n",
|
||
" <th>Leben</th>\n",
|
||
" <th>Anwendung</th>\n",
|
||
" <th>Lagerhalle</th>\n",
|
||
" <th>einkaufen</th>\n",
|
||
" <th>Wartungsaufgabe</th>\n",
|
||
" <th>Herr</th>\n",
|
||
" <th>Müller</th>\n",
|
||
" <th>übertragen</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Test</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Satz</th>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>auflegen</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Waschmaschine</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>reparieren</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Lebensaufgabe</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>schreiben</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Leben</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Anwendung</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Lagerhalle</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>einkaufen</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Wartungsaufgabe</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Herr</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Müller</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>übertragen</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Test Satz auflegen Waschmaschine reparieren \\\n",
|
||
"Test 0 20 0 0 0 \n",
|
||
"Satz 20 0 0 0 0 \n",
|
||
"auflegen 0 0 0 10 0 \n",
|
||
"Waschmaschine 0 0 10 0 20 \n",
|
||
"reparieren 0 0 0 20 0 \n",
|
||
"Lebensaufgabe 0 0 0 10 0 \n",
|
||
"schreiben 0 0 0 0 0 \n",
|
||
"Leben 0 0 0 0 0 \n",
|
||
"Anwendung 0 0 0 0 0 \n",
|
||
"Lagerhalle 0 0 0 0 0 \n",
|
||
"einkaufen 0 0 0 0 0 \n",
|
||
"Wartungsaufgabe 0 0 0 0 0 \n",
|
||
"Herr 0 0 0 0 0 \n",
|
||
"Müller 0 0 0 0 0 \n",
|
||
"übertragen 0 0 0 0 0 \n",
|
||
"\n",
|
||
" Lebensaufgabe schreiben Leben Anwendung Lagerhalle \\\n",
|
||
"Test 0 0 0 0 0 \n",
|
||
"Satz 0 0 0 0 0 \n",
|
||
"auflegen 0 0 0 0 0 \n",
|
||
"Waschmaschine 10 0 0 0 0 \n",
|
||
"reparieren 0 0 0 0 0 \n",
|
||
"Lebensaufgabe 0 0 0 0 0 \n",
|
||
"schreiben 0 0 10 10 10 \n",
|
||
"Leben 0 10 0 0 0 \n",
|
||
"Anwendung 0 10 0 0 0 \n",
|
||
"Lagerhalle 0 10 0 0 0 \n",
|
||
"einkaufen 0 0 0 0 10 \n",
|
||
"Wartungsaufgabe 0 0 0 0 0 \n",
|
||
"Herr 0 0 0 0 0 \n",
|
||
"Müller 0 0 0 0 0 \n",
|
||
"übertragen 0 0 0 0 0 \n",
|
||
"\n",
|
||
" einkaufen Wartungsaufgabe Herr Müller übertragen \n",
|
||
"Test 0 0 0 0 0 \n",
|
||
"Satz 0 0 0 0 0 \n",
|
||
"auflegen 0 0 0 0 0 \n",
|
||
"Waschmaschine 0 0 0 0 0 \n",
|
||
"reparieren 0 0 0 0 0 \n",
|
||
"Lebensaufgabe 0 0 0 0 0 \n",
|
||
"schreiben 0 0 0 0 0 \n",
|
||
"Leben 0 0 0 0 0 \n",
|
||
"Anwendung 0 0 0 0 0 \n",
|
||
"Lagerhalle 10 0 0 0 0 \n",
|
||
"einkaufen 0 0 0 0 0 \n",
|
||
"Wartungsaufgabe 0 0 10 10 0 \n",
|
||
"Herr 0 10 0 20 10 \n",
|
||
"Müller 0 10 20 0 10 \n",
|
||
"übertragen 0 0 10 10 0 "
|
||
]
|
||
},
|
||
"execution_count": 89,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = nx.to_pandas_adjacency(G_undir, dtype=np.uint64)\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"id": "2eae6efa-22e4-4bbc-9d00-7bd11a934773",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"path_to_graph_export = SAVE_PATH_FOLDER + 'Undirected_Graph_test3.graphml'\n",
|
||
"nx.write_graphml(G_undir, path_to_graph_export)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "58aa35b3-c842-4120-b82c-ce06172ab031",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "6bb0c1d7-0815-465c-bb4a-ed51bb31b436",
|
||
"metadata": {},
|
||
"source": [
|
||
"---"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"id": "14f0ba09-5a47-44ec-b314-0e9a3aef11e3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('results/Export4/Pipe-Preprocess1_Step-3_remove_NA.pkl'),\n",
|
||
" WindowsPath('results/Export4/Pipe-Preprocess1_Step-5_analyse_feature.pkl'),\n",
|
||
" WindowsPath('results/Export4/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n",
|
||
" WindowsPath('results/Export4/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n",
|
||
" WindowsPath('results/Export4/dupl_idx_pairs.pkl'),\n",
|
||
" WindowsPath('results/Export4/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]"
|
||
]
|
||
},
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"list(path.parents[0].glob('*.pkl'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"id": "016d60d7-072e-4526-a12a-5254c1b93b39",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>2163</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>27</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1619</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2681</th>\n",
|
||
" <td>vom Eisenkernvorrichtung (Teil vom Kettenlauf ...</td>\n",
|
||
" <td>136</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[515]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2680</th>\n",
|
||
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
|
||
" <td>260</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[311]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[415]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[323]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[176]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5090 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 2163 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2681 vom Eisenkernvorrichtung (Teil vom Kettenlauf ... 136 1 \n",
|
||
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
|
||
"2676 Docke angefahren! 17 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 27 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2681 [515] 1 \n",
|
||
"2680 [311] 1 \n",
|
||
"2679 [415] 1 \n",
|
||
"2677 [323] 1 \n",
|
||
"2676 [176] 1 \n",
|
||
"\n",
|
||
"[5090 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 115,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"prep_data = load_pickle(path='./results/Export4/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')\n",
|
||
"prep_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 116,
|
||
"id": "ab083fac-cd74-4049-8d8e-29e8edbe1cba",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>92592</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>2163</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>1619</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>1265</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>687</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2681</th>\n",
|
||
" <td>vom Eisenkernvorrichtung (Teil vom Kettenlauf ...</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2680</th>\n",
|
||
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5090 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry num_occur\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 92592\n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 2163\n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 1619\n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 1265\n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 687\n",
|
||
"... ... ...\n",
|
||
"2681 vom Eisenkernvorrichtung (Teil vom Kettenlauf ... 1\n",
|
||
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 1\n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 1\n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 1\n",
|
||
"2676 Docke angefahren! 1\n",
|
||
"\n",
|
||
"[5090 rows x 2 columns]"
|
||
]
|
||
},
|
||
"execution_count": 116,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"description_entries = prep_data[['entry', 'num_occur']]\n",
|
||
"description_entries"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 117,
|
||
"id": "7b3a2590-9629-4b2d-9753-5573b6ad0bdb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_token_analysis = description_entries.iloc[:1000,:].copy()\n",
|
||
"data_token_analysis = description_entries.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 118,
|
||
"id": "4f97af36-e386-4009-b4ee-c4c76bfb2a79",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"100%|██████████████████████████████████████████████████████████████████████████████| 5090/5090 [03:58<00:00, 21.30it/s]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.token_analysis:Graph properties: 6383 Nodes, 19489 Edges\n",
|
||
"INFO:ihm_analyse.token_analysis:Node memory: 393.35 KB\n",
|
||
"INFO:ihm_analyse.token_analysis:Edge memory: 1065.80 KB\n",
|
||
"INFO:ihm_analyse.token_analysis:Total memory: 1459.16 KB\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"graph = toka.build_token_graph(data=data_token_analysis, model=nlp)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 123,
|
||
"id": "8e3f1e74-4c74-4439-81b3-bcf9e4a522eb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.token_analysis:Successfully converted graph to one with undirected edges.\n",
|
||
"INFO:ihm_analyse.token_analysis:Undirected Graph properties: 6383 Nodes, 18977 Edges\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"path_to_graph_dir_export = SAVE_PATH_FOLDER + 'Directed_Graph_full.graphml'\n",
|
||
"nx.write_graphml(graph, path_to_graph_dir_export)\n",
|
||
"path_to_graph_undir_export = SAVE_PATH_FOLDER + 'Undirected_Graph_full.graphml'\n",
|
||
"graph_undir = toka.convert_graph_to_undirected(graph=graph)\n",
|
||
"nx.write_graphml(graph_undir, path_to_graph_undir_export)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"id": "88bb99ca-d3f3-42b5-a993-423bda6f5d8e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"6383"
|
||
]
|
||
},
|
||
"execution_count": 110,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(graph.nodes)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 111,
|
||
"id": "5a850568-e4ef-45ad-9d95-4b70627a866e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"19489"
|
||
]
|
||
},
|
||
"execution_count": 111,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(graph.edges)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5d9ec266-1961-4b5b-8d48-7cce48808d09",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f665fa3a-9622-4cd0-bb7a-122d2c2e2971",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "54b60d01-67ca-4ed7-a0ea-390be8676649",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
|
||
"INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu\n"
|
||
]
|
||
}
|
||
],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e9372ae3-8a9f-4f27-bd22-6d7e7ff8de9a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "760474bb-4041-440c-a6c7-2f05a53ba990",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph.graphml'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Token_Analysis-TokenGraph.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph-filtered.graphml'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Token_Analysis-TokenGraph-filtered.pickle')]"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import os\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"from ihm_analyse import SAVE_PATH_FOLDER, THRESHOLD_EDGE_WEIGHT, TokenGraph, load_pickle\n",
|
||
"from ihm_analyse.predefined_pipes import pipe_token_analysis\n",
|
||
"from ihm_analyse.lib.helpers import load_toml_config\n",
|
||
"\n",
|
||
"str_path = os.getcwd()\n",
|
||
"p = Path(str_path)\n",
|
||
"folder = list((p / 'results' / 'test_new2').glob('*'))\n",
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "e340f859-173e-4225-86ed-4b59a2e9ee41",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def run_token_analysis(\n",
|
||
" preprocessed_data,\n",
|
||
"):\n",
|
||
" (token_graph,) = pipe_token_analysis.run(starting_values=(preprocessed_data,))\n",
|
||
" token_graph.save_graph(SAVE_PATH_FOLDER, directed=False)\n",
|
||
" token_graph.to_pickle(SAVE_PATH_FOLDER, \n",
|
||
" filename=f'{pipe_token_analysis.name}-TokenGraph')\n",
|
||
" token_graph.save_graph(SAVE_PATH_FOLDER, directed=False)\n",
|
||
" token_graph.to_pickle(SAVE_PATH_FOLDER, \n",
|
||
" filename=f'{pipe_token_analysis.name}-TokenGraph')\n",
|
||
" # filter graph by edge weight and remove single nodes (no connection)\n",
|
||
" tk_graph_filtered = token_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT)\n",
|
||
" tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)\n",
|
||
" tk_graph_filtered.save_graph(SAVE_PATH_FOLDER,\n",
|
||
" filename='TokenGraph-filtered',\n",
|
||
" directed=False)\n",
|
||
" tk_graph_filtered.to_pickle(SAVE_PATH_FOLDER,\n",
|
||
" filename=f'{pipe_token_analysis.name}-TokenGraph-filtered')\n",
|
||
" \n",
|
||
" return token_graph, tk_graph_filtered"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d4b0937b-620c-4be1-9dd2-cff2fd1c2ef3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "16f2ce70-56b4-4fac-8508-07abd739df1d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"str_path = os.getcwd()\n",
|
||
"p = Path(str_path)\n",
|
||
"config_p = p / 'ihm_analyse' / 'config.toml'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "dc523d1d-c41d-4e87-8364-f8c8da788c3c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/ihm_analyse/config.toml')"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"config_p"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "604d03c4-046f-4310-b03c-e16fce277cc3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'common': {'paths': {'results': './results/test_new2/',\n",
|
||
" 'dataset': './01_2_Rohdaten_neu/Export4.csv'}},\n",
|
||
" 'export_filenames': {'filename_cossim_filter_candidates': 'CosSim-FilterCandidates'},\n",
|
||
" 'preprocess': {'date_cols': ['VorgangsDatum',\n",
|
||
" 'ErledigungsDatum',\n",
|
||
" 'Arbeitsbeginn',\n",
|
||
" 'ErstellungsDatum'],\n",
|
||
" 'threshold_amount_characters': 5,\n",
|
||
" 'threshold_similarity': 0.8},\n",
|
||
" 'token_analysis': {'threshold_edge_weight': 150}}"
|
||
]
|
||
},
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"conf = load_toml_config(config_p)\n",
|
||
"conf"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "d6cce369-16d4-4fb4-a288-2f05c28404ea",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung')"
|
||
]
|
||
},
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"p"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "4ec47f8a-47ca-43f8-90c2-3f6d03d511cb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle')"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"filename = r'*Pipe-Merge_Duplicates_Step-1*'\n",
|
||
"folder = list(SAVE_PATH_FOLDER.glob(filename))[0]\n",
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8b3a098b-62af-4278-a1af-29602195872a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5bddaca9-9e7d-4af5-9dbe-ffb9e985a846",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "16832937-f237-4938-b698-423e8844331f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# dataset\n",
|
||
"res = load_pickle(folder[-6])\n",
|
||
"data = res[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "75410ca5-fb20-4d16-8d9c-ed82b7f918c1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>3111</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1619</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2680</th>\n",
|
||
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
|
||
" <td>260</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[311]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[415]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[323]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[176]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6799</th>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||
" <td>107</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>[306, 326]</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>4582 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
|
||
"2676 Docke angefahren! 17 1 \n",
|
||
"6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2680 [311] 1 \n",
|
||
"2679 [415] 1 \n",
|
||
"2677 [323] 1 \n",
|
||
"2676 [176] 1 \n",
|
||
"6799 [306, 326] 2 \n",
|
||
"\n",
|
||
"[4582 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "77e86a89-8d5e-4ac7-8d97-4625dc01c0ad",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"preprocessed_data_trunc = data[['entry', 'num_occur']].iloc[:20,:].copy()\n",
|
||
"preprocessed_data_trunc = data[['entry', 'num_occur']].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "4db4a705-c277-4c10-b9e4-981ed838c3da",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.pipelines:Starting processing pipeline >>Token_Analysis<<...\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"100%|██████████████████████████████████████████████████████████████████████████████| 4582/4582 [03:36<00:00, 21.16it/s]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.graphs:Graph properties: 6200 Nodes, 18770 Edges\n",
|
||
"INFO:ihm_analyse.graphs:Node memory: 381.88 KB\n",
|
||
"INFO:ihm_analyse.graphs:Edge memory: 1026.48 KB\n",
|
||
"INFO:ihm_analyse.graphs:Total memory: 1408.36 KB\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.graphs:Successfully converted graph to one with undirected edges.\n",
|
||
"INFO:ihm_analyse.graphs:Graph properties: 6200 Nodes, 18297 Edges\n",
|
||
"INFO:ihm_analyse.graphs:Node memory: 381.88 KB\n",
|
||
"INFO:ihm_analyse.graphs:Edge memory: 1000.62 KB\n",
|
||
"INFO:ihm_analyse.graphs:Total memory: 1382.50 KB\n",
|
||
"INFO:ihm_analyse.graphs:Graph properties: 6200 Nodes, 18297 Edges\n",
|
||
"INFO:ihm_analyse.graphs:Node memory: 381.88 KB\n",
|
||
"INFO:ihm_analyse.graphs:Edge memory: 1000.62 KB\n",
|
||
"INFO:ihm_analyse.graphs:Total memory: 1382.50 KB\n",
|
||
"INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Pipe-Token_Analysis_Step-1_build_token_graph.pickle\n",
|
||
"INFO:ihm_analyse.pipelines:Processing pipeline >>Token_Analysis<< successfully ended.\n",
|
||
"INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under results\\test_new2\\TokenGraph.graphml.\n",
|
||
"INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Token_Analysis-TokenGraph.pickle\n",
|
||
"INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under results\\test_new2\\TokenGraph.graphml.\n",
|
||
"INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Token_Analysis-TokenGraph.pickle\n",
|
||
"INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under results\\test_new2\\TokenGraph-filtered.graphml.\n",
|
||
"INFO:ihm_analyse.helpers:Saved file successfully under results\\test_new2\\Token_Analysis-TokenGraph-filtered.pickle\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph, tk_graph_filtered = run_token_analysis(preprocessed_data_trunc)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "bce025fc-f7c5-4dc9-aecf-111a9970a658",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"TokenGraph(name: TokenGraph, number of nodes: 6200, number of edges: 18770)"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "0f05ead2-1223-4877-961b-f76fe835fbea",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'num_nodes': 6200,\n",
|
||
" 'num_edges': 18770,\n",
|
||
" 'min_edge_weight': 1,\n",
|
||
" 'max_edge_weight': 92690,\n",
|
||
" 'node_memory': 391043,\n",
|
||
" 'edge_memory': 1051120,\n",
|
||
" 'total_memory': 1442163}"
|
||
]
|
||
},
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph.metadata_directed"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "c607384d-9636-4bd1-9271-5bc1f63b0f7d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'num_nodes': 6200,\n",
|
||
" 'num_edges': 18297,\n",
|
||
" 'min_edge_weight': 1,\n",
|
||
" 'max_edge_weight': 92690,\n",
|
||
" 'node_memory': 391043,\n",
|
||
" 'edge_memory': 1024632,\n",
|
||
" 'total_memory': 1415675}"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph.metadata_undirected"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "21281f47-6f01-42e6-ad23-10f408e017f8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"TokenGraph(name: TokenGraph, number of nodes: 147, number of edges: 179)"
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph_filtered"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "b58c5a82-e1d0-4c96-abe6-8e44e23aa50b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'num_nodes': 147,\n",
|
||
" 'num_edges': 177,\n",
|
||
" 'min_edge_weight': 153,\n",
|
||
" 'max_edge_weight': 92690,\n",
|
||
" 'node_memory': 9487,\n",
|
||
" 'edge_memory': 9912,\n",
|
||
" 'total_memory': 19399}"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph_filtered.metadata_undirected"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "73a52733-3249-4c58-a6c3-412c5659911a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "580aca34-5d07-41bf-838d-8d299a107543",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "4ce0491c-2c2b-472f-8974-5af9b8660a37",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/TokenGraph.graphml')]"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"folder = list((p / 'results' / 'test_new2').glob('*'))\n",
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "ddb120e5-f34c-4ed5-8cdd-d2d30c3436f8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Token_Analysis_Step-1_build_token_graph.pickle')"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"p = folder[-2]\n",
|
||
"p"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "3af49383-394f-4a6b-bf11-4da7f4474a6c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(<ihm_analyse.lib.graphs.TokenGraph at 0x2451a5a07d0>,)"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ret = load_pickle(p)\n",
|
||
"ret"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "390e5dd4-b3ec-4f7e-be0f-639afaa6fba6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tk_graph = ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "967b49c3-11cc-429d-937a-8b746ee40799",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"deg_view = tk_graph.degree"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "6bca0ef4-040c-47f6-8855-7e51ce67d4a5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"8\n",
|
||
"20\n",
|
||
"2\n",
|
||
"23\n",
|
||
"47\n",
|
||
"123\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for i, view in enumerate(tk_graph.nodes):\n",
|
||
" print(tk_graph.degree[view])\n",
|
||
" if i == 5:\n",
|
||
" break"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "753217df-a07c-43b5-8feb-eeb2976b9d34",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"8"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph.degree['Wartungstätigkeit']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d34e34cf-3dc3-4df7-9758-b84347896536",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.9"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|