lang-main/test-notebooks/archive/test_new_dupl_merge.ipynb
Florian Förster 9edcd5be4e initial commit
2024-05-08 14:46:43 +02:00

1470 lines
49 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "147e39b9-0066-4cca-9561-8ed0c994850c",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f095f9ff-f7c0-4446-97cb-c208a1ae62c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'A:\\\\Arbeitsaufgaben\\\\Instandhaltung'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"str_path = os.getcwd()\n",
"str_path"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e9610b62-667c-4322-b936-bee6d45c17cf",
"metadata": {},
"outputs": [],
"source": [
"p = Path(str_path)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "b1258614-d8e9-4205-992d-b16a5406f049",
"metadata": {},
"outputs": [],
"source": [
"folder = list((p / 'results' / 'test_new2').glob('*'))"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "a4cec1df-cc16-481b-9e3c-f12747283bd8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"folder"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "510b2262-edab-4874-878d-f736a6076e79",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f0841940-2285-4bc6-bc08-8a04844d7fd3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-1_build_cosSim_matrix.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step_3_CosSim-FilterCandidates.xlsx'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/test_new2/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from pathlib import Path\n",
"\n",
"import networkx as nx\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from ihm_analyse import load_pickle\n",
"from ihm_analyse.lib.preprocess import merge_similarity_dupl\n",
"from ihm_analyse.lib.graphs import update_graph, get_graph_metadata\n",
"\n",
"\n",
"str_path = os.getcwd()\n",
"p = Path(str_path)\n",
"folder = list((p / 'results' / 'test_new2').glob('*'))\n",
"folder"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8e51a545-228a-4f51-8440-53db05551d69",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:ihm_analyse.helpers:Loaded file successfully.\n",
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
]
}
],
"source": [
"# dataset\n",
"res = load_pickle(folder[1])\n",
"data = res[0]\n",
"# dupl IDs\n",
"res = load_pickle(folder[-2])\n",
"dupl_ids = res[0]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b95631d0-018a-4a0d-9d94-dec5db33dff4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9331"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dupl_ids)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "e1e2a149-a1d4-47c7-a9fa-a96db09f7144",
"metadata": {},
"outputs": [],
"source": [
"sub_ids = dupl_ids.copy()\n",
"sub_ids = dupl_ids[:20]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "35097dd4-e19a-4478-abe2-74135fec9fdc",
"metadata": {},
"outputs": [],
"source": [
"# build index graph to obtain graph of connected (similar) indices\n",
"# use this graph to obtain connected components (indices which belong together)\n",
"# retain semantic connection on whole dataset\n",
"dupl_id_graph = nx.Graph()\n",
"\n",
"for (idx1, idx2) in sub_ids:\n",
" # inplace operation, parent/child do not really exist in undirected graph\n",
" update_graph(graph=dupl_id_graph, parent=idx1, child=idx2)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "0a6c6e61-91da-4f67-a3a5-b20072a8c1f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:ihm_analyse.graphs:Graph properties: 24 Nodes, 20 Edges\n",
"INFO:ihm_analyse.graphs:Node memory: 0.66 KB\n",
"INFO:ihm_analyse.graphs:Edge memory: 1.09 KB\n",
"INFO:ihm_analyse.graphs:Total memory: 1.75 KB\n"
]
}
],
"source": [
"graph_meta = get_graph_metadata(graph=dupl_id_graph)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "92e9ba06-428b-412d-90e8-1f161c93b681",
"metadata": {},
"outputs": [],
"source": [
"conn_ids = nx.connected_components(dupl_id_graph)\n",
"conn_ids_tpl = tuple(conn_ids)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "379f983e-776e-4520-9753-61adbeac968c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{33, 487, 5703, 176, 247, 345, 157}\n",
"{882, 131}\n",
"{561, 332, 558}\n",
"{104, 4003}\n",
"{5298, 132}\n",
"{34, 3121, 3122, 3123, 63}\n",
"{168, 6378, 1068}\n"
]
}
],
"source": [
"for id_set in conn_ids:\n",
" print(id_set)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "5edec07e-9618-4650-b806-e49de3301262",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{33, 157, 176, 247, 345, 487, 5703},\n",
" {131, 882},\n",
" {332, 558, 561},\n",
" {104, 4003},\n",
" {132, 5298},\n",
" {34, 63, 3121, 3122, 3123},\n",
" {168, 1068, 6378}]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"conn_ids_lst"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "f37dfc52-2734-45ff-8939-03eb47465d41",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>1654</td>\n",
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1616</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
" <td>37</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2679</th>\n",
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
" <td>170</td>\n",
" <td>1</td>\n",
" <td>[415]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2678</th>\n",
" <td>Bitte 8 Scheiben nach Muster anfertigen. Danke.</td>\n",
" <td>48</td>\n",
" <td>1</td>\n",
" <td>[140]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2677</th>\n",
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
" <td>126</td>\n",
" <td>1</td>\n",
" <td>[323]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2676</th>\n",
" <td>Docke angefahren!</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>[176]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6799</th>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>107</td>\n",
" <td>1</td>\n",
" <td>[326]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6800 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
"131 Tägliche Überprüfung der Ölabscheider 37 1616 \n",
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
"... ... ... ... \n",
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
"2678 Bitte 8 Scheiben nach Muster anfertigen. Danke. 48 1 \n",
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
"2676 Docke angefahren! 17 1 \n",
"6799 Befestigung Deckel für Batteriefach defekt ... 107 1 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
"131 [0, 970, 2134, 2137] 4 \n",
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
"... ... ... \n",
"2679 [415] 1 \n",
"2678 [140] 1 \n",
"2677 [323] 1 \n",
"2676 [176] 1 \n",
"6799 [326] 1 \n",
"\n",
"[6800 rows x 5 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "5da4d722-fae2-4835-821c-70ed348bb71a",
"metadata": {},
"outputs": [],
"source": [
"test_ids = list(conn_ids_tpl[0])"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "9eef53d4-2b0f-40cb-b4c3-6a2992d7ec09",
"metadata": {},
"outputs": [],
"source": [
"sub_data = data.loc[test_ids,:].copy()"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "42aa3a0a-85ff-40a1-8473-6b927ade9fe6",
"metadata": {},
"outputs": [],
"source": [
"# obtain bunch\n",
"# filter for bunch\n",
"# merge bunch\n",
"# remove all but merged entry from whole dataset"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "684eeaf7-f86b-4159-bca9-a345601a2d2b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>1654</td>\n",
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>157</th>\n",
" <td>Monatliche Sichtkontrolle</td>\n",
" <td>25</td>\n",
" <td>634</td>\n",
" <td>[1038, 1040, 1041, 1042, 1043, 1044, 1045, 121...</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>176</th>\n",
" <td>Wöchentliche Sichtprüfung / Reinigung</td>\n",
" <td>37</td>\n",
" <td>361</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 421, 1003,...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>247</th>\n",
" <td>Monatliche Sichtkontrolle / Reinigung</td>\n",
" <td>37</td>\n",
" <td>113</td>\n",
" <td>[899, 906, 1052, 1169, 1170, 1725]</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>487</th>\n",
" <td>Wöchentliche Sichtprüfung</td>\n",
" <td>25</td>\n",
" <td>35</td>\n",
" <td>[1666]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>345</th>\n",
" <td>Monatliche Sichtprüfung / Reinigung</td>\n",
" <td>35</td>\n",
" <td>33</td>\n",
" <td>[885, 899, 906, 945, 946, 970, 1052, 1169, 1170]</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5703</th>\n",
" <td>monatliche Sichtkontrolle</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>[1725]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
"157 Monatliche Sichtkontrolle 25 634 \n",
"176 Wöchentliche Sichtprüfung / Reinigung 37 361 \n",
"247 Monatliche Sichtkontrolle / Reinigung 37 113 \n",
"487 Wöchentliche Sichtprüfung 25 35 \n",
"345 Monatliche Sichtprüfung / Reinigung 35 33 \n",
"5703 monatliche Sichtkontrolle 25 1 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
"157 [1038, 1040, 1041, 1042, 1043, 1044, 1045, 121... 24 \n",
"176 [301, 304, 305, 313, 314, 323, 329, 421, 1003,... 11 \n",
"247 [899, 906, 1052, 1169, 1170, 1725] 6 \n",
"487 [1666] 1 \n",
"345 [885, 899, 906, 945, 946, 970, 1052, 1169, 1170] 9 \n",
"5703 [1725] 1 "
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sub_data = sub_data.sort_values(by=['num_occur', 'num_assoc_obj_ids', 'len'], ascending=[False, False, False])\n",
"sub_data"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "5e048cc8-ca58-48cc-8b6f-57a0fbe88485",
"metadata": {},
"outputs": [],
"source": [
"# keep first entry with max number occurrences, then number of oassociated objects,\n",
"# then length of entry\n",
"data_idx = sub_data.index[0]\n",
"#entry = sub_data.iat[0,0]\n",
"#sub_data.at[data_idx, 'len'] = len(entry)\n",
"sub_data.at[data_idx, 'num_occur'] = sub_data['num_occur'].sum()\n",
"# assoc IDs\n",
"assoc_obj_ids = sub_data['assoc_obj_ids'].to_numpy()\n",
"assoc_obj_ids = np.concatenate(assoc_obj_ids)\n",
"assoc_obj_ids = np.unique(assoc_obj_ids)\n",
"sub_data.at[data_idx, 'assoc_obj_ids'] = assoc_obj_ids\n",
"sub_data.at[data_idx, 'num_assoc_obj_ids'] = len(assoc_ids_uni)"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "0fac40c5-102b-40d8-a45b-1c0bd89bb672",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>2831</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>157</th>\n",
" <td>Monatliche Sichtkontrolle</td>\n",
" <td>25</td>\n",
" <td>634</td>\n",
" <td>[1038, 1040, 1041, 1042, 1043, 1044, 1045, 121...</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>176</th>\n",
" <td>Wöchentliche Sichtprüfung / Reinigung</td>\n",
" <td>37</td>\n",
" <td>361</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 421, 1003,...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>247</th>\n",
" <td>Monatliche Sichtkontrolle / Reinigung</td>\n",
" <td>37</td>\n",
" <td>113</td>\n",
" <td>[899, 906, 1052, 1169, 1170, 1725]</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>487</th>\n",
" <td>Wöchentliche Sichtprüfung</td>\n",
" <td>25</td>\n",
" <td>35</td>\n",
" <td>[1666]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>345</th>\n",
" <td>Monatliche Sichtprüfung / Reinigung</td>\n",
" <td>35</td>\n",
" <td>33</td>\n",
" <td>[885, 899, 906, 945, 946, 970, 1052, 1169, 1170]</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5703</th>\n",
" <td>monatliche Sichtkontrolle</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>[1725]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 2831 \n",
"157 Monatliche Sichtkontrolle 25 634 \n",
"176 Wöchentliche Sichtprüfung / Reinigung 37 361 \n",
"247 Monatliche Sichtkontrolle / Reinigung 37 113 \n",
"487 Wöchentliche Sichtprüfung 25 35 \n",
"345 Monatliche Sichtprüfung / Reinigung 35 33 \n",
"5703 monatliche Sichtkontrolle 25 1 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 54 \n",
"157 [1038, 1040, 1041, 1042, 1043, 1044, 1045, 121... 24 \n",
"176 [301, 304, 305, 313, 314, 323, 329, 421, 1003,... 11 \n",
"247 [899, 906, 1052, 1169, 1170, 1725] 6 \n",
"487 [1666] 1 \n",
"345 [885, 899, 906, 945, 946, 970, 1052, 1169, 1170] 9 \n",
"5703 [1725] 1 "
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sub_data"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "de617dbf-0c96-4702-aa5f-c72af2b004e8",
"metadata": {},
"outputs": [],
"source": [
"test_ids.remove(data_idx)"
]
},
{
"cell_type": "code",
"execution_count": 122,
"id": "1279561e-c46d-48a6-a679-dcbcb7c72761",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[487, 5703, 176, 247, 345, 157]"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_ids"
]
},
{
"cell_type": "code",
"execution_count": 123,
"id": "acfffecf-2576-4979-8b19-6bd31d0e0d64",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>2831</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>54</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 2831 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 54 "
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sub_data2 = sub_data.drop(index=test_ids)\n",
"sub_data2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb240df2-044c-44b9-8d4e-c5fd0d157c07",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ab927a8c-fed3-42f2-a15d-9403184b1f8c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(33, 176), (33, 247), (33, 487), (131, 882), (332, 558)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_ids = dupl_ids[:5]\n",
"test_ids"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "adc463b7-9ea2-48e1-84e4-972ef45b5f9b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:ihm_analyse.graphs:Graph properties: 2695 Nodes, 9331 Edges\n",
"INFO:ihm_analyse.graphs:Node memory: 73.69 KB\n",
"INFO:ihm_analyse.graphs:Edge memory: 510.29 KB\n",
"INFO:ihm_analyse.graphs:Total memory: 583.98 KB\n"
]
}
],
"source": [
"ret = merge_similarity_dupl_test(data=data, dupl_idx_pairs=dupl_ids)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f2d91678-ea68-49e7-91c1-1cbc8a4fe0cc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3111</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
" <td>37</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680</th>\n",
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
" <td>260</td>\n",
" <td>1</td>\n",
" <td>[311]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2679</th>\n",
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
" <td>170</td>\n",
" <td>1</td>\n",
" <td>[415]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2677</th>\n",
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
" <td>126</td>\n",
" <td>1</td>\n",
" <td>[323]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2676</th>\n",
" <td>Docke angefahren!</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>[176]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6799</th>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>107</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4582 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n",
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
"... ... ... ... \n",
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n",
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
"2676 Docke angefahren! 17 1 \n",
"6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
"131 [0, 970, 2134, 2137] 4 \n",
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
"... ... ... \n",
"2680 [311] 1 \n",
"2679 [415] 1 \n",
"2677 [323] 1 \n",
"2676 [176] 1 \n",
"6799 [306, 326] 2 \n",
"\n",
"[4582 rows x 5 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ret[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cb13e547-5107-4f7b-a92d-ea52e7ce2fd4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>1654</td>\n",
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1616</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
" <td>37</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2679</th>\n",
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
" <td>170</td>\n",
" <td>1</td>\n",
" <td>[415]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2678</th>\n",
" <td>Bitte 8 Scheiben nach Muster anfertigen. Danke.</td>\n",
" <td>48</td>\n",
" <td>1</td>\n",
" <td>[140]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2677</th>\n",
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
" <td>126</td>\n",
" <td>1</td>\n",
" <td>[323]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2676</th>\n",
" <td>Docke angefahren!</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>[176]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6799</th>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>107</td>\n",
" <td>1</td>\n",
" <td>[326]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6800 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
"131 Tägliche Überprüfung der Ölabscheider 37 1616 \n",
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
"... ... ... ... \n",
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
"2678 Bitte 8 Scheiben nach Muster anfertigen. Danke. 48 1 \n",
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
"2676 Docke angefahren! 17 1 \n",
"6799 Befestigung Deckel für Batteriefach defekt ... 107 1 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
"131 [0, 970, 2134, 2137] 4 \n",
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
"... ... ... \n",
"2679 [415] 1 \n",
"2678 [140] 1 \n",
"2677 [323] 1 \n",
"2676 [176] 1 \n",
"6799 [326] 1 \n",
"\n",
"[6800 rows x 5 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "markdown",
"id": "e5f16846-f92e-4a85-8cba-830e34705837",
"metadata": {},
"source": [
"## New Merge Duplicates in Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ed62a563-886f-4269-ab27-237ff39ea0da",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
]
}
],
"source": [
"# dataset\n",
"res = load_pickle(folder[-1])\n",
"data = res[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1e82810d-8cda-439d-ae26-4e65bad351d9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3111</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
" <td>37</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680</th>\n",
" <td>Stand 15.07.2020 (Stöppel): Herr Langner (Toyo...</td>\n",
" <td>260</td>\n",
" <td>1</td>\n",
" <td>[311]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2679</th>\n",
" <td>Zahnräder der Laufkatze verschlissen Ersatztei...</td>\n",
" <td>170</td>\n",
" <td>1</td>\n",
" <td>[415]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2677</th>\n",
" <td>Schalter für Bühne Schwenken abgerissen, bitte...</td>\n",
" <td>126</td>\n",
" <td>1</td>\n",
" <td>[323]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2676</th>\n",
" <td>Docke angefahren!</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>[176]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6799</th>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>107</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4582 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" entry len num_occur \\\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3111 \n",
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
"... ... ... ... \n",
"2680 Stand 15.07.2020 (Stöppel): Herr Langner (Toyo... 260 1 \n",
"2679 Zahnräder der Laufkatze verschlissen Ersatztei... 170 1 \n",
"2677 Schalter für Bühne Schwenken abgerissen, bitte... 126 1 \n",
"2676 Docke angefahren! 17 1 \n",
"6799 Befestigung Deckel für Batteriefach defekt ... 107 2 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
"131 [0, 970, 2134, 2137] 4 \n",
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
"... ... ... \n",
"2680 [311] 1 \n",
"2679 [415] 1 \n",
"2677 [323] 1 \n",
"2676 [176] 1 \n",
"6799 [306, 326] 2 \n",
"\n",
"[4582 rows x 5 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "154111fe-24cc-47a1-9de2-56e1dcf36f67",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}