{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "147e39b9-0066-4cca-9561-8ed0c994850c", "metadata": {}, "outputs": [], "source": [ "import os\n", "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": 2, "id": "f095f9ff-f7c0-4446-97cb-c208a1ae62c6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'A:\\\\Arbeitsaufgaben\\\\Instandhaltung'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "str_path = os.getcwd()\n", "str_path" ] }, { "cell_type": "code", "execution_count": 5, "id": "e9610b62-667c-4322-b936-bee6d45c17cf", "metadata": {}, "outputs": [], "source": [ "p = Path(str_path)" ] }, { "cell_type": "code", "execution_count": 78, "id": "b1258614-d8e9-4205-992d-b16a5406f049", "metadata": {}, "outputs": [], "source": [ "folder = list((p / 'results' / 'test_new2').glob('*'))" ] }, { "cell_type": "code", "execution_count": 79, "id": "a4cec1df-cc16-481b-9e3c-f12747283bd8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "folder" ] }, { "cell_type": "code", "execution_count": 1, "id": "510b2262-edab-4874-878d-f736a6076e79", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 3, "id": "f0841940-2285-4bc6-bc08-8a04844d7fd3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "from pathlib import Path\n", "\n", "import networkx as nx\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from ihm_analyse import load_pickle\n", "from ihm_analyse.lib.preprocess import merge_similarity_dupl\n", "from ihm_analyse.lib.graphs import update_graph, get_graph_metadata\n", "\n", "\n", "str_path = os.getcwd()\n", "p = Path(str_path)\n", "folder = list((p / 'results' / 'test_new2').glob('*'))\n", "folder" ] }, { "cell_type": "code", "execution_count": 3, "id": "8e51a545-228a-4f51-8440-53db05551d69", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded file successfully.\n", "INFO:ihm_analyse.helpers:Loaded file successfully.\n" ] } ], "source": [ "# dataset\n", "res = load_pickle(folder[1])\n", "data = res[0]\n", "# dupl IDs\n", "res = load_pickle(folder[-2])\n", "dupl_ids = res[0]" ] }, { "cell_type": "code", "execution_count": 15, "id": "b95631d0-018a-4a0d-9d94-dec5db33dff4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9331" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(dupl_ids)" ] }, { "cell_type": "code", "execution_count": 27, "id": "e1e2a149-a1d4-47c7-a9fa-a96db09f7144", "metadata": {}, "outputs": [], "source": [ "sub_ids = dupl_ids.copy()\n", "sub_ids = dupl_ids[:20]" ] }, { "cell_type": "code", "execution_count": 28, "id": "35097dd4-e19a-4478-abe2-74135fec9fdc", "metadata": {}, "outputs": [], "source": [ "# build index graph to obtain graph of connected (similar) indices\n", "# use this graph to obtain connected components (indices which belong together)\n", "# retain semantic connection on whole dataset\n", "dupl_id_graph = nx.Graph()\n", "\n", "for (idx1, idx2) in sub_ids:\n", " # inplace operation, parent/child do not really exist in undirected graph\n", " update_graph(graph=dupl_id_graph, parent=idx1, child=idx2)" ] }, { "cell_type": "code", "execution_count": 34, "id": "0a6c6e61-91da-4f67-a3a5-b20072a8c1f6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.graphs:Graph properties: 24 Nodes, 20 Edges\n", "INFO:ihm_analyse.graphs:Node memory: 0.66 KB\n", "INFO:ihm_analyse.graphs:Edge memory: 1.09 KB\n", "INFO:ihm_analyse.graphs:Total memory: 1.75 KB\n" ] } ], "source": [ "graph_meta = get_graph_metadata(graph=dupl_id_graph)" ] }, { "cell_type": "code", "execution_count": 37, "id": "92e9ba06-428b-412d-90e8-1f161c93b681", "metadata": {}, "outputs": [], "source": [ "conn_ids = nx.connected_components(dupl_id_graph)\n", "conn_ids_tpl = tuple(conn_ids)" ] }, { "cell_type": "code", "execution_count": 31, "id": "379f983e-776e-4520-9753-61adbeac968c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{33, 487, 5703, 176, 247, 345, 157}\n", "{882, 131}\n", "{561, 332, 558}\n", "{104, 4003}\n", "{5298, 132}\n", "{34, 3121, 3122, 3123, 63}\n", "{168, 6378, 1068}\n" ] } ], "source": [ "for id_set in conn_ids:\n", " print(id_set)" ] }, { "cell_type": "code", "execution_count": 36, "id": "5edec07e-9618-4650-b806-e49de3301262", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{33, 157, 176, 247, 345, 487, 5703},\n", " {131, 882},\n", " {332, 558, 561},\n", " {104, 4003},\n", " {132, 5298},\n", " {34, 63, 3121, 3122, 3123},\n", " {168, 1068, 6378}]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conn_ids_lst" ] }, { "cell_type": "code", "execution_count": 33, "id": "f37dfc52-2734-45ff-8939-03eb47465d41", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung391654[301, 304, 305, 313, 314, 331, 332, 510, 511, ...18
131Tägliche Überprüfung der Ölabscheider371616[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der WC-Anlagen371265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2679Zahnräder der Laufkatze verschlissen Ersatztei...1701[415]1
2678Bitte 8 Scheiben nach Muster anfertigen. Danke.481[140]1
2677Schalter für Bühne Schwenken abgerissen, bitte...1261[323]1
2676Docke angefahren!171[176]1
6799Befestigung Deckel für Batteriefach defekt ...1071[326]1
\n", "

6800 rows × 5 columns

\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n", "131 Tägliche Überprüfung der Ölabscheider 37 1616 \n", "160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n", "140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n", "... ... ... ... \n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n", "2678 Bitte 8 Scheiben nach Muster anfertigen. Danke. 48 1 \n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n", "2676 Docke angefahren! 17 1 \n", "6799 Befestigung Deckel für Batteriefach defekt ... 107 1 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n", "33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n", "131 [0, 970, 2134, 2137] 4 \n", "160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n", "140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n", "... ... ... \n", "2679 [415] 1 \n", "2678 [140] 1 \n", "2677 [323] 1 \n", "2676 [176] 1 \n", "6799 [326] 1 \n", "\n", "[6800 rows x 5 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 41, "id": "5da4d722-fae2-4835-821c-70ed348bb71a", "metadata": {}, "outputs": [], "source": [ "test_ids = list(conn_ids_tpl[0])" ] }, { "cell_type": "code", "execution_count": 114, "id": "9eef53d4-2b0f-40cb-b4c3-6a2992d7ec09", "metadata": {}, "outputs": [], "source": [ "sub_data = data.loc[test_ids,:].copy()" ] }, { "cell_type": "code", "execution_count": 115, "id": "42aa3a0a-85ff-40a1-8473-6b927ade9fe6", "metadata": {}, "outputs": [], "source": [ "# obtain bunch\n", "# filter for bunch\n", "# merge bunch\n", "# remove all but merged entry from whole dataset" ] }, { "cell_type": "code", "execution_count": 116, "id": "684eeaf7-f86b-4159-bca9-a345601a2d2b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
33Wöchentliche Sichtkontrolle / Reinigung391654[301, 304, 305, 313, 314, 331, 332, 510, 511, ...18
157Monatliche Sichtkontrolle25634[1038, 1040, 1041, 1042, 1043, 1044, 1045, 121...24
176Wöchentliche Sichtprüfung / Reinigung37361[301, 304, 305, 313, 314, 323, 329, 421, 1003,...11
247Monatliche Sichtkontrolle / Reinigung37113[899, 906, 1052, 1169, 1170, 1725]6
487Wöchentliche Sichtprüfung2535[1666]1
345Monatliche Sichtprüfung / Reinigung3533[885, 899, 906, 945, 946, 970, 1052, 1169, 1170]9
5703monatliche Sichtkontrolle251[1725]1
\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n", "157 Monatliche Sichtkontrolle 25 634 \n", "176 Wöchentliche Sichtprüfung / Reinigung 37 361 \n", "247 Monatliche Sichtkontrolle / Reinigung 37 113 \n", "487 Wöchentliche Sichtprüfung 25 35 \n", "345 Monatliche Sichtprüfung / Reinigung 35 33 \n", "5703 monatliche Sichtkontrolle 25 1 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n", "157 [1038, 1040, 1041, 1042, 1043, 1044, 1045, 121... 24 \n", "176 [301, 304, 305, 313, 314, 323, 329, 421, 1003,... 11 \n", "247 [899, 906, 1052, 1169, 1170, 1725] 6 \n", "487 [1666] 1 \n", "345 [885, 899, 906, 945, 946, 970, 1052, 1169, 1170] 9 \n", "5703 [1725] 1 " ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_data = sub_data.sort_values(by=['num_occur', 'num_assoc_obj_ids', 'len'], ascending=[False, False, False])\n", "sub_data" ] }, { "cell_type": "code", "execution_count": 117, "id": "5e048cc8-ca58-48cc-8b6f-57a0fbe88485", "metadata": {}, "outputs": [], "source": [ "# keep first entry with max number occurrences, then number of oassociated objects,\n", "# then length of entry\n", "data_idx = sub_data.index[0]\n", "#entry = sub_data.iat[0,0]\n", "#sub_data.at[data_idx, 'len'] = len(entry)\n", "sub_data.at[data_idx, 'num_occur'] = sub_data['num_occur'].sum()\n", "# assoc IDs\n", "assoc_obj_ids = sub_data['assoc_obj_ids'].to_numpy()\n", "assoc_obj_ids = np.concatenate(assoc_obj_ids)\n", "assoc_obj_ids = np.unique(assoc_obj_ids)\n", "sub_data.at[data_idx, 'assoc_obj_ids'] = assoc_obj_ids\n", "sub_data.at[data_idx, 'num_assoc_obj_ids'] = len(assoc_ids_uni)" ] }, { "cell_type": "code", "execution_count": 118, "id": "0fac40c5-102b-40d8-a45b-1c0bd89bb672", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
33Wöchentliche Sichtkontrolle / Reinigung392831[301, 304, 305, 313, 314, 323, 329, 331, 332, ...54
157Monatliche Sichtkontrolle25634[1038, 1040, 1041, 1042, 1043, 1044, 1045, 121...24
176Wöchentliche Sichtprüfung / Reinigung37361[301, 304, 305, 313, 314, 323, 329, 421, 1003,...11
247Monatliche Sichtkontrolle / Reinigung37113[899, 906, 1052, 1169, 1170, 1725]6
487Wöchentliche Sichtprüfung2535[1666]1
345Monatliche Sichtprüfung / Reinigung3533[885, 899, 906, 945, 946, 970, 1052, 1169, 1170]9
5703monatliche Sichtkontrolle251[1725]1
\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 2831 \n", "157 Monatliche Sichtkontrolle 25 634 \n", "176 Wöchentliche Sichtprüfung / Reinigung 37 361 \n", "247 Monatliche Sichtkontrolle / Reinigung 37 113 \n", "487 Wöchentliche Sichtprüfung 25 35 \n", "345 Monatliche Sichtprüfung / Reinigung 35 33 \n", "5703 monatliche Sichtkontrolle 25 1 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 54 \n", "157 [1038, 1040, 1041, 1042, 1043, 1044, 1045, 121... 24 \n", "176 [301, 304, 305, 313, 314, 323, 329, 421, 1003,... 11 \n", "247 [899, 906, 1052, 1169, 1170, 1725] 6 \n", "487 [1666] 1 \n", "345 [885, 899, 906, 945, 946, 970, 1052, 1169, 1170] 9 \n", "5703 [1725] 1 " ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_data" ] }, { "cell_type": "code", "execution_count": 121, "id": "de617dbf-0c96-4702-aa5f-c72af2b004e8", "metadata": {}, "outputs": [], "source": [ "test_ids.remove(data_idx)" ] }, { "cell_type": "code", "execution_count": 122, "id": "1279561e-c46d-48a6-a679-dcbcb7c72761", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[487, 5703, 176, 247, 345, 157]" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_ids" ] }, { "cell_type": "code", "execution_count": 123, "id": "acfffecf-2576-4979-8b19-6bd31d0e0d64", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
33Wöchentliche Sichtkontrolle / Reinigung392831[301, 304, 305, 313, 314, 323, 329, 331, 332, ...54
\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 2831 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 54 " ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_data2 = sub_data.drop(index=test_ids)\n", "sub_data2" ] }, { "cell_type": "code", "execution_count": null, "id": "eb240df2-044c-44b9-8d4e-c5fd0d157c07", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "id": "ab927a8c-fed3-42f2-a15d-9403184b1f8c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(33, 176), (33, 247), (33, 487), (131, 882), (332, 558)]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_ids = dupl_ids[:5]\n", "test_ids" ] }, { "cell_type": "code", "execution_count": 5, "id": "adc463b7-9ea2-48e1-84e4-972ef45b5f9b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.graphs:Graph properties: 2695 Nodes, 9331 Edges\n", "INFO:ihm_analyse.graphs:Node memory: 73.69 KB\n", "INFO:ihm_analyse.graphs:Edge memory: 510.29 KB\n", "INFO:ihm_analyse.graphs:Total memory: 583.98 KB\n" ] } ], "source": [ "ret = merge_similarity_dupl_test(data=data, dupl_idx_pairs=dupl_ids)" ] }, { "cell_type": "code", "execution_count": 6, "id": "f2d91678-ea68-49e7-91c1-1cbc8a4fe0cc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung393111[301, 304, 305, 313, 314, 323, 329, 331, 332, ...74
131Tägliche Überprüfung der Ölabscheider371619[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der WC-Anlagen371265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2680Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...2601[311]1
2679Zahnräder der Laufkatze verschlissen Ersatztei...1701[415]1
2677Schalter für Bühne Schwenken abgerissen, bitte...1261[323]1
2676Docke angefahren!171[176]1
6799Befestigung Deckel für Batteriefach defekt ...1072[306, 326]2
\n", "

4582 rows × 5 columns

\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n", "131 Tägliche Überprüfung der Ölabscheider 37 1619 \n", "160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n", "140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n", "... ... ... ... \n", "2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n", "2676 Docke angefahren! 17 1 \n", "6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n", "33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n", "131 [0, 970, 2134, 2137] 4 \n", "160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n", "140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n", "... ... ... \n", "2680 [311] 1 \n", "2679 [415] 1 \n", "2677 [323] 1 \n", "2676 [176] 1 \n", "6799 [306, 326] 2 \n", "\n", "[4582 rows x 5 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret[0]" ] }, { "cell_type": "code", "execution_count": 7, "id": "cb13e547-5107-4f7b-a92d-ea52e7ce2fd4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung391654[301, 304, 305, 313, 314, 331, 332, 510, 511, ...18
131Tägliche Überprüfung der Ölabscheider371616[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der WC-Anlagen371265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2679Zahnräder der Laufkatze verschlissen Ersatztei...1701[415]1
2678Bitte 8 Scheiben nach Muster anfertigen. Danke.481[140]1
2677Schalter für Bühne Schwenken abgerissen, bitte...1261[323]1
2676Docke angefahren!171[176]1
6799Befestigung Deckel für Batteriefach defekt ...1071[326]1
\n", "

6800 rows × 5 columns

\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n", "131 Tägliche Überprüfung der Ölabscheider 37 1616 \n", "160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n", "140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n", "... ... ... ... \n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n", "2678 Bitte 8 Scheiben nach Muster anfertigen. Danke. 48 1 \n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n", "2676 Docke angefahren! 17 1 \n", "6799 Befestigung Deckel für Batteriefach defekt ... 107 1 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n", "33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n", "131 [0, 970, 2134, 2137] 4 \n", "160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n", "140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n", "... ... ... \n", "2679 [415] 1 \n", "2678 [140] 1 \n", "2677 [323] 1 \n", "2676 [176] 1 \n", "6799 [326] 1 \n", "\n", "[6800 rows x 5 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "markdown", "id": "e5f16846-f92e-4a85-8cba-830e34705837", "metadata": {}, "source": [ "## New Merge Duplicates in Pipeline" ] }, { "cell_type": "code", "execution_count": 4, "id": "ed62a563-886f-4269-ab27-237ff39ea0da", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:ihm_analyse.helpers:Loaded file successfully.\n" ] } ], "source": [ "# dataset\n", "res = load_pickle(folder[-1])\n", "data = res[0]" ] }, { "cell_type": "code", "execution_count": 5, "id": "1e82810d-8cda-439d-ae26-4e65bad351d9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung393111[301, 304, 305, 313, 314, 323, 329, 331, 332, ...74
131Tägliche Überprüfung der Ölabscheider371619[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der WC-Anlagen371265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2680Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...2601[311]1
2679Zahnräder der Laufkatze verschlissen Ersatztei...1701[415]1
2677Schalter für Bühne Schwenken abgerissen, bitte...1261[323]1
2676Docke angefahren!171[176]1
6799Befestigung Deckel für Batteriefach defekt ...1072[306, 326]2
\n", "

4582 rows × 5 columns

\n", "
" ], "text/plain": [ " entry len num_occur \\\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n", "33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n", "131 Tägliche Überprüfung der Ölabscheider 37 1619 \n", "160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n", "140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n", "... ... ... ... \n", "2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n", "2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n", "2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n", "2676 Docke angefahren! 17 1 \n", "6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n", "\n", " assoc_obj_ids num_assoc_obj_ids \n", "162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n", "33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n", "131 [0, 970, 2134, 2137] 4 \n", "160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n", "140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n", "... ... ... \n", "2680 [311] 1 \n", "2679 [415] 1 \n", "2677 [323] 1 \n", "2676 [176] 1 \n", "6799 [306, 326] 2 \n", "\n", "[4582 rows x 5 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "id": "154111fe-24cc-47a1-9de2-56e1dcf36f67", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }