1470 lines
49 KiB
Plaintext
1470 lines
49 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "147e39b9-0066-4cca-9561-8ed0c994850c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"from pathlib import Path"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "f095f9ff-f7c0-4446-97cb-c208a1ae62c6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'A:\\\\Arbeitsaufgaben\\\\Instandhaltung'"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"str_path = os.getcwd()\n",
|
||
"str_path"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "e9610b62-667c-4322-b936-bee6d45c17cf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"p = Path(str_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"id": "b1258614-d8e9-4205-992d-b16a5406f049",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"folder = list((p / 'results' / 'test_new2').glob('*'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"id": "a4cec1df-cc16-481b-9e3c-f12747283bd8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]"
|
||
]
|
||
},
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "510b2262-edab-4874-878d-f736a6076e79",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%load_ext autoreload\n",
|
||
"%autoreload 2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "f0841940-2285-4bc6-bc08-8a04844d7fd3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import os\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"import networkx as nx\n",
|
||
"import numpy as np\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"from ihm_analyse import load_pickle\n",
|
||
"from ihm_analyse.lib.preprocess import merge_similarity_dupl\n",
|
||
"from ihm_analyse.lib.graphs import update_graph, get_graph_metadata\n",
|
||
"\n",
|
||
"\n",
|
||
"str_path = os.getcwd()\n",
|
||
"p = Path(str_path)\n",
|
||
"folder = list((p / 'results' / 'test_new2').glob('*'))\n",
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "8e51a545-228a-4f51-8440-53db05551d69",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n",
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# dataset\n",
|
||
"res = load_pickle(folder[1])\n",
|
||
"data = res[0]\n",
|
||
"# dupl IDs\n",
|
||
"res = load_pickle(folder[-2])\n",
|
||
"dupl_ids = res[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "b95631d0-018a-4a0d-9d94-dec5db33dff4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"9331"
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(dupl_ids)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "e1e2a149-a1d4-47c7-a9fa-a96db09f7144",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sub_ids = dupl_ids.copy()\n",
|
||
"sub_ids = dupl_ids[:20]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "35097dd4-e19a-4478-abe2-74135fec9fdc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# build index graph to obtain graph of connected (similar) indices\n",
|
||
"# use this graph to obtain connected components (indices which belong together)\n",
|
||
"# retain semantic connection on whole dataset\n",
|
||
"dupl_id_graph = nx.Graph()\n",
|
||
"\n",
|
||
"for (idx1, idx2) in sub_ids:\n",
|
||
" # inplace operation, parent/child do not really exist in undirected graph\n",
|
||
" update_graph(graph=dupl_id_graph, parent=idx1, child=idx2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"id": "0a6c6e61-91da-4f67-a3a5-b20072a8c1f6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.graphs:Graph properties: 24 Nodes, 20 Edges\n",
|
||
"INFO:ihm_analyse.graphs:Node memory: 0.66 KB\n",
|
||
"INFO:ihm_analyse.graphs:Edge memory: 1.09 KB\n",
|
||
"INFO:ihm_analyse.graphs:Total memory: 1.75 KB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"graph_meta = get_graph_metadata(graph=dupl_id_graph)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "92e9ba06-428b-412d-90e8-1f161c93b681",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"conn_ids = nx.connected_components(dupl_id_graph)\n",
|
||
"conn_ids_tpl = tuple(conn_ids)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "379f983e-776e-4520-9753-61adbeac968c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{33, 487, 5703, 176, 247, 345, 157}\n",
|
||
"{882, 131}\n",
|
||
"{561, 332, 558}\n",
|
||
"{104, 4003}\n",
|
||
"{5298, 132}\n",
|
||
"{34, 3121, 3122, 3123, 63}\n",
|
||
"{168, 6378, 1068}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for id_set in conn_ids:\n",
|
||
" print(id_set)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "5edec07e-9618-4650-b806-e49de3301262",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[{33, 157, 176, 247, 345, 487, 5703},\n",
|
||
" {131, 882},\n",
|
||
" {332, 558, 561},\n",
|
||
" {104, 4003},\n",
|
||
" {132, 5298},\n",
|
||
" {34, 63, 3121, 3122, 3123},\n",
|
||
" {168, 1068, 6378}]"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"conn_ids_lst"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "f37dfc52-2734-45ff-8939-03eb47465d41",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>1654</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
|
||
" <td>18</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1616</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[415]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2678</th>\n",
|
||
" <td>Bitte 8 Scheiben nach Muster anfertigen. Danke.</td>\n",
|
||
" <td>48</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[140]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[323]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[176]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6799</th>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||
" <td>107</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[326]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6800 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1616 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
|
||
"2678 Bitte 8 Scheiben nach Muster anfertigen. Danke. 48 1 \n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
|
||
"2676 Docke angefahren! 17 1 \n",
|
||
"6799 Befestigung Deckel für Batteriefach defekt ... 107 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2679 [415] 1 \n",
|
||
"2678 [140] 1 \n",
|
||
"2677 [323] 1 \n",
|
||
"2676 [176] 1 \n",
|
||
"6799 [326] 1 \n",
|
||
"\n",
|
||
"[6800 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "5da4d722-fae2-4835-821c-70ed348bb71a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_ids = list(conn_ids_tpl[0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"id": "9eef53d4-2b0f-40cb-b4c3-6a2992d7ec09",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sub_data = data.loc[test_ids,:].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"id": "42aa3a0a-85ff-40a1-8473-6b927ade9fe6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# obtain bunch\n",
|
||
"# filter for bunch\n",
|
||
"# merge bunch\n",
|
||
"# remove all but merged entry from whole dataset"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 116,
|
||
"id": "684eeaf7-f86b-4159-bca9-a345601a2d2b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>1654</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
|
||
" <td>18</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>157</th>\n",
|
||
" <td>Monatliche Sichtkontrolle</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>634</td>\n",
|
||
" <td>[1038, 1040, 1041, 1042, 1043, 1044, 1045, 121...</td>\n",
|
||
" <td>24</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>176</th>\n",
|
||
" <td>Wöchentliche Sichtprüfung / Reinigung</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>361</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 421, 1003,...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>247</th>\n",
|
||
" <td>Monatliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>113</td>\n",
|
||
" <td>[899, 906, 1052, 1169, 1170, 1725]</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>487</th>\n",
|
||
" <td>Wöchentliche Sichtprüfung</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>35</td>\n",
|
||
" <td>[1666]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>345</th>\n",
|
||
" <td>Monatliche Sichtprüfung / Reinigung</td>\n",
|
||
" <td>35</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>[885, 899, 906, 945, 946, 970, 1052, 1169, 1170]</td>\n",
|
||
" <td>9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5703</th>\n",
|
||
" <td>monatliche Sichtkontrolle</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1725]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
|
||
"157 Monatliche Sichtkontrolle 25 634 \n",
|
||
"176 Wöchentliche Sichtprüfung / Reinigung 37 361 \n",
|
||
"247 Monatliche Sichtkontrolle / Reinigung 37 113 \n",
|
||
"487 Wöchentliche Sichtprüfung 25 35 \n",
|
||
"345 Monatliche Sichtprüfung / Reinigung 35 33 \n",
|
||
"5703 monatliche Sichtkontrolle 25 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
|
||
"157 [1038, 1040, 1041, 1042, 1043, 1044, 1045, 121... 24 \n",
|
||
"176 [301, 304, 305, 313, 314, 323, 329, 421, 1003,... 11 \n",
|
||
"247 [899, 906, 1052, 1169, 1170, 1725] 6 \n",
|
||
"487 [1666] 1 \n",
|
||
"345 [885, 899, 906, 945, 946, 970, 1052, 1169, 1170] 9 \n",
|
||
"5703 [1725] 1 "
|
||
]
|
||
},
|
||
"execution_count": 116,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sub_data = sub_data.sort_values(by=['num_occur', 'num_assoc_obj_ids', 'len'], ascending=[False, False, False])\n",
|
||
"sub_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 117,
|
||
"id": "5e048cc8-ca58-48cc-8b6f-57a0fbe88485",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# keep first entry with max number occurrences, then number of oassociated objects,\n",
|
||
"# then length of entry\n",
|
||
"data_idx = sub_data.index[0]\n",
|
||
"#entry = sub_data.iat[0,0]\n",
|
||
"#sub_data.at[data_idx, 'len'] = len(entry)\n",
|
||
"sub_data.at[data_idx, 'num_occur'] = sub_data['num_occur'].sum()\n",
|
||
"# assoc IDs\n",
|
||
"assoc_obj_ids = sub_data['assoc_obj_ids'].to_numpy()\n",
|
||
"assoc_obj_ids = np.concatenate(assoc_obj_ids)\n",
|
||
"assoc_obj_ids = np.unique(assoc_obj_ids)\n",
|
||
"sub_data.at[data_idx, 'assoc_obj_ids'] = assoc_obj_ids\n",
|
||
"sub_data.at[data_idx, 'num_assoc_obj_ids'] = len(assoc_ids_uni)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 118,
|
||
"id": "0fac40c5-102b-40d8-a45b-1c0bd89bb672",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>2831</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>54</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>157</th>\n",
|
||
" <td>Monatliche Sichtkontrolle</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>634</td>\n",
|
||
" <td>[1038, 1040, 1041, 1042, 1043, 1044, 1045, 121...</td>\n",
|
||
" <td>24</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>176</th>\n",
|
||
" <td>Wöchentliche Sichtprüfung / Reinigung</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>361</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 421, 1003,...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>247</th>\n",
|
||
" <td>Monatliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>113</td>\n",
|
||
" <td>[899, 906, 1052, 1169, 1170, 1725]</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>487</th>\n",
|
||
" <td>Wöchentliche Sichtprüfung</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>35</td>\n",
|
||
" <td>[1666]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>345</th>\n",
|
||
" <td>Monatliche Sichtprüfung / Reinigung</td>\n",
|
||
" <td>35</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>[885, 899, 906, 945, 946, 970, 1052, 1169, 1170]</td>\n",
|
||
" <td>9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5703</th>\n",
|
||
" <td>monatliche Sichtkontrolle</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1725]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 2831 \n",
|
||
"157 Monatliche Sichtkontrolle 25 634 \n",
|
||
"176 Wöchentliche Sichtprüfung / Reinigung 37 361 \n",
|
||
"247 Monatliche Sichtkontrolle / Reinigung 37 113 \n",
|
||
"487 Wöchentliche Sichtprüfung 25 35 \n",
|
||
"345 Monatliche Sichtprüfung / Reinigung 35 33 \n",
|
||
"5703 monatliche Sichtkontrolle 25 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 54 \n",
|
||
"157 [1038, 1040, 1041, 1042, 1043, 1044, 1045, 121... 24 \n",
|
||
"176 [301, 304, 305, 313, 314, 323, 329, 421, 1003,... 11 \n",
|
||
"247 [899, 906, 1052, 1169, 1170, 1725] 6 \n",
|
||
"487 [1666] 1 \n",
|
||
"345 [885, 899, 906, 945, 946, 970, 1052, 1169, 1170] 9 \n",
|
||
"5703 [1725] 1 "
|
||
]
|
||
},
|
||
"execution_count": 118,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sub_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 121,
|
||
"id": "de617dbf-0c96-4702-aa5f-c72af2b004e8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_ids.remove(data_idx)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 122,
|
||
"id": "1279561e-c46d-48a6-a679-dcbcb7c72761",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[487, 5703, 176, 247, 345, 157]"
|
||
]
|
||
},
|
||
"execution_count": 122,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test_ids"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 123,
|
||
"id": "acfffecf-2576-4979-8b19-6bd31d0e0d64",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>2831</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>54</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 2831 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 54 "
|
||
]
|
||
},
|
||
"execution_count": 123,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sub_data2 = sub_data.drop(index=test_ids)\n",
|
||
"sub_data2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "eb240df2-044c-44b9-8d4e-c5fd0d157c07",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "ab927a8c-fed3-42f2-a15d-9403184b1f8c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[(33, 176), (33, 247), (33, 487), (131, 882), (332, 558)]"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test_ids = dupl_ids[:5]\n",
|
||
"test_ids"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "adc463b7-9ea2-48e1-84e4-972ef45b5f9b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.graphs:Graph properties: 2695 Nodes, 9331 Edges\n",
|
||
"INFO:ihm_analyse.graphs:Node memory: 73.69 KB\n",
|
||
"INFO:ihm_analyse.graphs:Edge memory: 510.29 KB\n",
|
||
"INFO:ihm_analyse.graphs:Total memory: 583.98 KB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"ret = merge_similarity_dupl_test(data=data, dupl_idx_pairs=dupl_ids)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "f2d91678-ea68-49e7-91c1-1cbc8a4fe0cc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>3111</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1619</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2680</th>\n",
|
||
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
|
||
" <td>260</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[311]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[415]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[323]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[176]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6799</th>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||
" <td>107</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>[306, 326]</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>4582 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
|
||
"2676 Docke angefahren! 17 1 \n",
|
||
"6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2680 [311] 1 \n",
|
||
"2679 [415] 1 \n",
|
||
"2677 [323] 1 \n",
|
||
"2676 [176] 1 \n",
|
||
"6799 [306, 326] 2 \n",
|
||
"\n",
|
||
"[4582 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "cb13e547-5107-4f7b-a92d-ea52e7ce2fd4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>1654</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
|
||
" <td>18</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1616</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[415]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2678</th>\n",
|
||
" <td>Bitte 8 Scheiben nach Muster anfertigen. Danke.</td>\n",
|
||
" <td>48</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[140]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[323]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[176]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6799</th>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||
" <td>107</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[326]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6800 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1616 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
|
||
"2678 Bitte 8 Scheiben nach Muster anfertigen. Danke. 48 1 \n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
|
||
"2676 Docke angefahren! 17 1 \n",
|
||
"6799 Befestigung Deckel für Batteriefach defekt ... 107 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2679 [415] 1 \n",
|
||
"2678 [140] 1 \n",
|
||
"2677 [323] 1 \n",
|
||
"2676 [176] 1 \n",
|
||
"6799 [326] 1 \n",
|
||
"\n",
|
||
"[6800 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e5f16846-f92e-4a85-8cba-830e34705837",
|
||
"metadata": {},
|
||
"source": [
|
||
"## New Merge Duplicates in Pipeline"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "ed62a563-886f-4269-ab27-237ff39ea0da",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# dataset\n",
|
||
"res = load_pickle(folder[-1])\n",
|
||
"data = res[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "1e82810d-8cda-439d-ae26-4e65bad351d9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>3111</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1619</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2680</th>\n",
|
||
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
|
||
" <td>260</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[311]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2679</th>\n",
|
||
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[415]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2677</th>\n",
|
||
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[323]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2676</th>\n",
|
||
" <td>Docke angefahren!</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[176]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6799</th>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||
" <td>107</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>[306, 326]</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>4582 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n",
|
||
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
|
||
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
|
||
"2676 Docke angefahren! 17 1 \n",
|
||
"6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2680 [311] 1 \n",
|
||
"2679 [415] 1 \n",
|
||
"2677 [323] 1 \n",
|
||
"2676 [176] 1 \n",
|
||
"6799 [306, 326] 2 \n",
|
||
"\n",
|
||
"[4582 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "154111fe-24cc-47a1-9de2-56e1dcf36f67",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|