lang-main/test-notebooks/display_results.ipynb
2024-06-05 16:37:23 +02:00

1661 lines
54 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9d7dae43-e799-469c-afe2-50dba45eeaa7",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3760b040-985c-46ec-ba77-13f0f7a52c83",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-06-05 11:24:20 +0000 | io:INFO | Loaded TOML config file successfully.\n"
]
}
],
"source": [
"from pathlib import Path\n",
"from lang_main.analysis.graphs import convert_graph_to_cytoscape\n",
"from lang_main.io import load_pickle"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "97487448-82c8-4b3d-8a1a-ccccaaac8d86",
"metadata": {},
"outputs": [],
"source": [
"def get_files(path: str) -> tuple[Path, ...]:\n",
" p = Path(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
" assert p.exists(), \"path does not exist\"\n",
" return tuple(p.glob(r'*'))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "598f4d99-9d35-49c9-8c5d-113d4c80cecf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-4_entry_wise_cleansing.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/TokenGraph.graphml'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Token_Analysis-TokenGraph.pickle'))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "55ad4af3-87cd-4189-9309-171aba4e04a6",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
" _torch_pytree._register_pytree_node(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-06-05 11:24:36 +0000 | io:INFO | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[-3]\n",
"ret = load_pickle(file)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1757d83e-2be9-4fae-9ee2-90bc1ce33de4",
"metadata": {},
"outputs": [],
"source": [
"tk_graph = ret[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "dd5d9785-9aab-4552-b791-f201e69b04e8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TokenGraph(name: TokenGraph, number of nodes: 6028, number of edges: 17950)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tk_graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b262bdc-1bb8-473d-aab4-0073c94428cc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a8b2b53c-d7e4-428b-bb12-0d746c68af4f",
"metadata": {},
"outputs": [],
"source": [
"cyto_data = convert_graph_to_cytoscape(tk_graph, batch_size=10)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b8b10f74-00ac-4f22-b325-46013db96cb2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'data': {'id': 'Wartungstätigkeit', 'label': 'Wartungstätigkeit'}},\n",
" {'data': {'id': 'Vorgabe', 'label': 'Vorgabe'}},\n",
" {'data': {'id': 'Maschinenhersteller', 'label': 'Maschinenhersteller'}},\n",
" {'data': {'id': 'Sichtkontrolle', 'label': 'Sichtkontrolle'}},\n",
" {'data': {'id': 'Reinigung', 'label': 'Reinigung'}},\n",
" {'data': {'id': 'Überprüfung', 'label': 'Überprüfung'}},\n",
" {'data': {'id': 'Ölabscheider', 'label': 'Ölabscheider'}},\n",
" {'data': {'id': 'Kontrolle', 'label': 'Kontrolle'}},\n",
" {'data': {'id': 'C-Anlage', 'label': 'C-Anlage'}},\n",
" {'data': {'id': 'Stabbreithalter', 'label': 'Stabbreithalter'}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'Vorgabe',\n",
" 'weight': 92690}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'Maschinenhersteller',\n",
" 'weight': 92690}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'Maschinenbediener',\n",
" 'weight': 242}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'Laserabteilung',\n",
" 'weight': 242}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'Arbeitsplan',\n",
" 'weight': 244}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'abarbeiten',\n",
" 'weight': 242}},\n",
" {'data': {'source': 'Wartungstätigkeit',\n",
" 'target': 'Webmaschinenkontrollliste',\n",
" 'weight': 2}},\n",
" {'data': {'source': 'Wartungstätigkeit', 'target': 'sehen', 'weight': 2}},\n",
" {'data': {'source': 'Vorgabe',\n",
" 'target': 'Maschinenhersteller',\n",
" 'weight': 92690}},\n",
" {'data': {'source': 'Vorgabe', 'target': 'Wartungsplan', 'weight': 2032}}]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cyto_data"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "9b7c87ba-2976-4431-b00d-cef08b580914",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wartungstätigkeit\n",
"Vorgabe\n",
"Maschinenhersteller\n",
"Sichtkontrolle\n",
"Reinigung\n",
"Überprüfung\n",
"Ölabscheider\n",
"Kontrolle\n",
"C-Anlage\n",
"Stabbreithalter\n"
]
}
],
"source": [
"for node in list(tk_graph.nodes)[:10]:\n",
" print(node)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "eb62fe03-3e57-4fa0-a23e-7229d92588ab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wartungstätigkeit\n",
"Vorgabe\n",
"Maschinenhersteller\n",
"Sichtkontrolle\n",
"Reinigung\n",
"Überprüfung\n",
"Ölabscheider\n",
"Kontrolle\n",
"C-Anlage\n",
"Stabbreithalter\n",
"Scharniere\n"
]
}
],
"source": [
"# target: data fields\n",
"elements = []\n",
"\n",
"for i, node in enumerate(tk_graph):\n",
" print(node)\n",
" if i == 10:\n",
" break\n",
" node_info = {\n",
" 'data': {\n",
" 'id': node,\n",
" 'label': node,\n",
" }\n",
" }\n",
" elements.append(node_info)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "562879d0-446d-4fdb-a27e-51c8ce91c4d7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'data': {'id': 'Wartungstätigkeit', 'label': 'Wartungstätigkeit'}},\n",
" {'data': {'id': 'Vorgabe', 'label': 'Vorgabe'}},\n",
" {'data': {'id': 'Maschinenhersteller', 'label': 'Maschinenhersteller'}},\n",
" {'data': {'id': 'Sichtkontrolle', 'label': 'Sichtkontrolle'}},\n",
" {'data': {'id': 'Reinigung', 'label': 'Reinigung'}},\n",
" {'data': {'id': 'Überprüfung', 'label': 'Überprüfung'}},\n",
" {'data': {'id': 'Ölabscheider', 'label': 'Ölabscheider'}},\n",
" {'data': {'id': 'Kontrolle', 'label': 'Kontrolle'}},\n",
" {'data': {'id': 'C-Anlage', 'label': 'C-Anlage'}},\n",
" {'data': {'id': 'Stabbreithalter', 'label': 'Stabbreithalter'}}]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"elements"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "07d3be40-063a-47d4-b0fc-2e1b2da058a0",
"metadata": {},
"outputs": [],
"source": [
"edges = []\n",
"\n",
"for i, (source, target, weight) in enumerate(tk_graph.edges.data('weight', default=1)):\n",
" if i == 10:\n",
" break\n",
" edge_info = {\n",
" 'data': {\n",
" 'source': source,\n",
" 'target': target,\n",
" 'weight': weight,\n",
" }\n",
" }\n",
" edges.append(edge_info)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "c48bd728-7776-43f5-be42-b5374c678c11",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'data': {'source': 'Vorgabe', 'target': 'Wartungsplan', 'weight': 2032}}"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"edge_info"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f46c2f9a-8c8e-4d0f-8ae5-baded18dcfdd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "81b710bb-410a-46c1-9541-0a394b400265",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 53,
"id": "5c0e04a1-8351-4b1a-9027-ca00901a05bc",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"{'weight': 92690}"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tk_graph['Wartungstätigkeit']['Vorgabe']"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "5085d797-0783-4329-bcb0-9f966d2768dc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-06-05 08:16:14 +0000 | graphs:INFO | Successfully saved graph as GraphML file under A:\\Arbeitsaufgaben\\lang-main\\test-notebooks\\TokenGraph.graphml.\n"
]
}
],
"source": [
"tk_graph.save_graph(Path.cwd())"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "540f4720-a2bf-4171-8db5-8e6993d38c13",
"metadata": {},
"outputs": [],
"source": [
"batched = ret[0]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "2b145475-a9e4-4c56-b40c-0ca725dd886f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>batched_idxs</th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>[232, 241, 242, 244, 247, 249, 268, 269, 289, ...</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>[37, 50, 57, 61, 129, 245, 246, 266, 353, 378,...</td>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3108</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>[179, 196, 216, 350, 355, 408, 426, 427, 428, ...</td>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>[224, 276, 277, 278, 279, 280, 281, 282, 283, ...</td>\n",
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
" <td>36</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>[191, 192, 194, 243, 248, 254, 296, 300, 302, ...</td>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2557</th>\n",
" <td>[21406]</td>\n",
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>[211]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2556</th>\n",
" <td>[21405]</td>\n",
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" <td>[93]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2555</th>\n",
" <td>[21404]</td>\n",
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>[1707]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2554</th>\n",
" <td>[21385]</td>\n",
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
" <td>173</td>\n",
" <td>1</td>\n",
" <td>[1]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6771</th>\n",
" <td>[123456]</td>\n",
" <td>Befestigung Deckel für Batteriefach defekt Hal...</td>\n",
" <td>99</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4545 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" batched_idxs \\\n",
"162 [232, 241, 242, 244, 247, 249, 268, 269, 289, ... \n",
"33 [37, 50, 57, 61, 129, 245, 246, 266, 353, 378,... \n",
"131 [179, 196, 216, 350, 355, 408, 426, 427, 428, ... \n",
"160 [224, 276, 277, 278, 279, 280, 281, 282, 283, ... \n",
"140 [191, 192, 194, 243, 248, 254, 296, 300, 302, ... \n",
"... ... \n",
"2557 [21406] \n",
"2556 [21405] \n",
"2555 [21404] \n",
"2554 [21385] \n",
"6771 [123456] \n",
"\n",
" entry len num_occur \\\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3108 \n",
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
"160 Wöchentliche Kontrolle der C-Anlagen 36 1265 \n",
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
"... ... ... ... \n",
"2557 Fehler 9723 Leistungsversorgung Antrieb defekt 46 1 \n",
"2556 T-Warp-Let-Off1 schleppfehler 29 1 \n",
"2555 Fahrräder wurden gewartet und gereinigt. 40 1 \n",
"2554 Bohrlöcher an Gebots- und Verbotszeichen anbri... 173 1 \n",
"6771 Befestigung Deckel für Batteriefach defekt Hal... 99 2 \n",
"\n",
" assoc_obj_ids num_assoc_obj_ids \n",
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
"131 [0, 970, 2134, 2137] 4 \n",
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
"... ... ... \n",
"2557 [211] 1 \n",
"2556 [93] 1 \n",
"2555 [1707] 1 \n",
"2554 [1] 1 \n",
"6771 [306, 326] 2 \n",
"\n",
"[4545 rows x 6 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"batched"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "8d49b064-4a61-4b34-95ee-2e9ba30208be",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ...\n",
"33 Wöchentliche Sichtkontrolle / Reinigung\n",
"131 Tägliche Überprüfung der Ölabscheider\n",
"160 Wöchentliche Kontrolle der C-Anlagen\n",
"140 Halbjährliche Kontrolle des Stabbreithalters\n",
"1778 Brandschutztechnische Prüfung\n",
"332 Prüfung von: - Scharniere - Dichtung - Schließ...\n",
"104 Täglicher Technikrundgang\n",
"132 Tägliche Kontrolle der Kompressorstationen\n",
"98 Tägliche Kesselhauskontrolle\n",
"Name: entry, dtype: object"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"batched.iloc[:10,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c71e23e3-7a56-4d7a-9c05-49d41acce6ee",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa4ebe87-5d40-408d-bb9d-2e386ea28f3f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 27,
"id": "fc2e05b6-95b7-44c0-a014-772a786ce03d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
" _torch_pytree._register_pytree_node(\n",
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
" _torch_pytree._register_pytree_node(\n"
]
}
],
"source": [
"import spacy\n",
"nlp = spacy.load('de_dep_news_trf')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "1eaf6d3a-c307-4572-a2c4-3e3722a41b6a",
"metadata": {},
"outputs": [],
"source": [
"txt = batched.iloc[:10,1].to_list()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "c42457d7-8cbb-4591-8f87-86a34ba26a11",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers\n",
"Wöchentliche Sichtkontrolle / Reinigung\n",
"Tägliche Überprüfung der Ölabscheider\n",
"Wöchentliche Kontrolle der C-Anlagen\n",
"Halbjährliche Kontrolle des Stabbreithalters\n",
"Brandschutztechnische Prüfung\n",
"Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
"Täglicher Technikrundgang\n",
"Tägliche Kontrolle der Kompressorstationen\n",
"Tägliche Kesselhauskontrolle\n"
]
}
],
"source": [
"for doc in nlp.pipe(txt, batch_size=10):\n",
" print(doc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d361b861-6277-42b4-921b-3510b1c7cb6a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a93ecd92-eece-41c9-bba0-e1bf3611ff5c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a729ea4-5b88-4c05-aa2f-c09bc79bc901",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 19,
"id": "ee0fea45-c26b-4253-b7f6-95ad70d0205a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-06-05 07:20:18 +0000 | io:INFO | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[1]\n",
"raw = load_pickle(file)[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "09d4777e-2d87-4798-a159-a57423ae7501",
"metadata": {},
"outputs": [],
"source": [
"idxs = batched.iloc[0,0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "483dc38b-220b-4154-b784-9362e8169083",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 25,
"id": "55ad9530-9cde-4049-8949-c8b31ddeb384",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>VorgangsID</th>\n",
" <th>ObjektID</th>\n",
" <th>HObjektText</th>\n",
" <th>ObjektArtID</th>\n",
" <th>ObjektArtText</th>\n",
" <th>VorgangsTypID</th>\n",
" <th>VorgangsTypName</th>\n",
" <th>VorgangsDatum</th>\n",
" <th>VorgangsStatusId</th>\n",
" <th>VorgangsPrioritaet</th>\n",
" <th>VorgangsBeschreibung</th>\n",
" <th>VorgangsOrt</th>\n",
" <th>VorgangsArtText</th>\n",
" <th>ErledigungsDatum</th>\n",
" <th>ErledigungsArtText</th>\n",
" <th>ErledigungsBeschreibung</th>\n",
" <th>MPMelderArbeitsplatz</th>\n",
" <th>MPAbteilungBezeichnung</th>\n",
" <th>Arbeitsbeginn</th>\n",
" <th>ErstellungsDatum</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>232</th>\n",
" <td>151991</td>\n",
" <td>121</td>\n",
" <td>217 C , Webmaschine, DL 280 EMS Breite 280</td>\n",
" <td>3</td>\n",
" <td>Luft-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2022-03-02</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2022-03-02</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-03-02</td>\n",
" <td>2022-02-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>241</th>\n",
" <td>155717</td>\n",
" <td>187</td>\n",
" <td>246, Webmaschine Jacquard,</td>\n",
" <td>6</td>\n",
" <td>Jacquard-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2022-04-01</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2022-04-01</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-04-01</td>\n",
" <td>2022-02-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>242</th>\n",
" <td>152507</td>\n",
" <td>177</td>\n",
" <td>204 S SI , Webmaschine, DL 280 EMS Breite 220</td>\n",
" <td>3</td>\n",
" <td>Luft-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2022-04-09</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2022-04-09</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-04-09</td>\n",
" <td>2022-02-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>244</th>\n",
" <td>247625</td>\n",
" <td>251</td>\n",
" <td>101, Webmaschine, OM 220 EOS</td>\n",
" <td>3</td>\n",
" <td>Luft-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2022-04-19</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2022-04-19</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-04-19</td>\n",
" <td>2022-04-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>247</th>\n",
" <td>254408</td>\n",
" <td>251</td>\n",
" <td>101, Webmaschine, OM 220 EOS</td>\n",
" <td>3</td>\n",
" <td>Luft-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2022-05-07</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2022-05-07</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-05-07</td>\n",
" <td>2022-04-28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123434</th>\n",
" <td>571453</td>\n",
" <td>3212</td>\n",
" <td>A072, Webmaschine Jacquard,</td>\n",
" <td>6</td>\n",
" <td>Jacquard-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2023-06-28</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2023-06-28</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2023-06-28</td>\n",
" <td>2023-05-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123435</th>\n",
" <td>571453</td>\n",
" <td>187</td>\n",
" <td>246, Webmaschine Jacquard,</td>\n",
" <td>6</td>\n",
" <td>Jacquard-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2023-06-28</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2023-06-28</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2023-06-28</td>\n",
" <td>2023-05-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123436</th>\n",
" <td>571453</td>\n",
" <td>1792</td>\n",
" <td>A057, Webmaschine Jacquard,</td>\n",
" <td>6</td>\n",
" <td>Jacquard-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2023-06-28</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2023-06-28</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2023-06-28</td>\n",
" <td>2023-05-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123437</th>\n",
" <td>571453</td>\n",
" <td>186</td>\n",
" <td>245 J, Webmaschine Jacquard,</td>\n",
" <td>6</td>\n",
" <td>Jacquard-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2023-06-28</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2023-06-28</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2023-06-28</td>\n",
" <td>2023-05-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123438</th>\n",
" <td>571453</td>\n",
" <td>2473</td>\n",
" <td>A056, Webmaschine Jacquard,</td>\n",
" <td>6</td>\n",
" <td>Jacquard-Webmaschine</td>\n",
" <td>1</td>\n",
" <td>Wartung</td>\n",
" <td>2023-06-28</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>NaN</td>\n",
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
" <td>2023-06-28</td>\n",
" <td>Intern UTT - Sichtkontrolle</td>\n",
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2023-06-28</td>\n",
" <td>2023-05-26</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>92592 rows × 20 columns</p>\n",
"</div>"
],
"text/plain": [
" VorgangsID ObjektID HObjektText \\\n",
"232 151991 121 217 C , Webmaschine, DL 280 EMS Breite 280 \n",
"241 155717 187 246, Webmaschine Jacquard, \n",
"242 152507 177 204 S SI , Webmaschine, DL 280 EMS Breite 220 \n",
"244 247625 251 101, Webmaschine, OM 220 EOS \n",
"247 254408 251 101, Webmaschine, OM 220 EOS \n",
"... ... ... ... \n",
"123434 571453 3212 A072, Webmaschine Jacquard, \n",
"123435 571453 187 246, Webmaschine Jacquard, \n",
"123436 571453 1792 A057, Webmaschine Jacquard, \n",
"123437 571453 186 245 J, Webmaschine Jacquard, \n",
"123438 571453 2473 A056, Webmaschine Jacquard, \n",
"\n",
" ObjektArtID ObjektArtText VorgangsTypID VorgangsTypName \\\n",
"232 3 Luft-Webmaschine 1 Wartung \n",
"241 6 Jacquard-Webmaschine 1 Wartung \n",
"242 3 Luft-Webmaschine 1 Wartung \n",
"244 3 Luft-Webmaschine 1 Wartung \n",
"247 3 Luft-Webmaschine 1 Wartung \n",
"... ... ... ... ... \n",
"123434 6 Jacquard-Webmaschine 1 Wartung \n",
"123435 6 Jacquard-Webmaschine 1 Wartung \n",
"123436 6 Jacquard-Webmaschine 1 Wartung \n",
"123437 6 Jacquard-Webmaschine 1 Wartung \n",
"123438 6 Jacquard-Webmaschine 1 Wartung \n",
"\n",
" VorgangsDatum VorgangsStatusId VorgangsPrioritaet \\\n",
"232 2022-03-02 5 0 \n",
"241 2022-04-01 5 0 \n",
"242 2022-04-09 5 0 \n",
"244 2022-04-19 5 0 \n",
"247 2022-05-07 5 0 \n",
"... ... ... ... \n",
"123434 2023-06-28 5 0 \n",
"123435 2023-06-28 5 0 \n",
"123436 2023-06-28 5 0 \n",
"123437 2023-06-28 5 0 \n",
"123438 2023-06-28 5 0 \n",
"\n",
" VorgangsBeschreibung VorgangsOrt \\\n",
"232 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"241 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"242 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"244 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"247 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"... ... ... \n",
"123434 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"123435 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"123436 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"123437 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"123438 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
"\n",
" VorgangsArtText ErledigungsDatum \\\n",
"232 Tägliche Interne Wartungstätigkeiten Weberei 2022-03-02 \n",
"241 Tägliche Interne Wartungstätigkeiten Weberei 2022-04-01 \n",
"242 Tägliche Interne Wartungstätigkeiten Weberei 2022-04-09 \n",
"244 Tägliche Interne Wartungstätigkeiten Weberei 2022-04-19 \n",
"247 Tägliche Interne Wartungstätigkeiten Weberei 2022-05-07 \n",
"... ... ... \n",
"123434 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
"123435 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
"123436 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
"123437 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
"123438 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
"\n",
" ErledigungsArtText \\\n",
"232 Intern UTT - Sichtkontrolle \n",
"241 Intern UTT - Sichtkontrolle \n",
"242 Intern UTT - Sichtkontrolle \n",
"244 Intern UTT - Sichtkontrolle \n",
"247 Intern UTT - Sichtkontrolle \n",
"... ... \n",
"123434 Intern UTT - Sichtkontrolle \n",
"123435 Intern UTT - Sichtkontrolle \n",
"123436 Intern UTT - Sichtkontrolle \n",
"123437 Intern UTT - Sichtkontrolle \n",
"123438 Intern UTT - Sichtkontrolle \n",
"\n",
" ErledigungsBeschreibung \\\n",
"232 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"241 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"242 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"244 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"247 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"... ... \n",
"123434 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"123435 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"123436 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"123437 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"123438 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
"\n",
" MPMelderArbeitsplatz MPAbteilungBezeichnung Arbeitsbeginn \\\n",
"232 NaN NaN 2022-03-02 \n",
"241 NaN NaN 2022-04-01 \n",
"242 NaN NaN 2022-04-09 \n",
"244 NaN NaN 2022-04-19 \n",
"247 NaN NaN 2022-05-07 \n",
"... ... ... ... \n",
"123434 NaN NaN 2023-06-28 \n",
"123435 NaN NaN 2023-06-28 \n",
"123436 NaN NaN 2023-06-28 \n",
"123437 NaN NaN 2023-06-28 \n",
"123438 NaN NaN 2023-06-28 \n",
"\n",
" ErstellungsDatum \n",
"232 2022-02-17 \n",
"241 2022-02-17 \n",
"242 2022-02-17 \n",
"244 2022-04-14 \n",
"247 2022-04-28 \n",
"... ... \n",
"123434 2023-05-26 \n",
"123435 2023-05-26 \n",
"123436 2023-05-26 \n",
"123437 2023-05-26 \n",
"123438 2023-05-26 \n",
"\n",
"[92592 rows x 20 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw.loc[idxs]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05775885-cf62-482c-9628-fbc976df9656",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "29d00943-5c0e-4f5d-81b6-d566dbbcf89d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "82a059ea-0eb8-4db1-b859-3fc07e42faff",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 69,
"id": "d1c1190f-0c80-40e3-8965-78d68400a33d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "e26c52eb-7a6b-49da-97a9-6e24a2a4d91e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shared:INFO | 2024-05-29 11:56:46 +0000 | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[-1]\n",
"ret = load_pickle(file)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "beacf5ca-6946-413a-817c-e7e87da9ace3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>162</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>33</td>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3108</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>131</td>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>160</td>\n",
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
" <td>36</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>140</td>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6756</th>\n",
" <td>2559</td>\n",
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>[211]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6757</th>\n",
" <td>2558</td>\n",
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>[93]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6758</th>\n",
" <td>2557</td>\n",
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>[1707]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6759</th>\n",
" <td>2556</td>\n",
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
" <td>173</td>\n",
" <td>1</td>\n",
" <td>[1]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6760</th>\n",
" <td>6782</td>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4545 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" index ... num_assoc_obj_ids\n",
"0 162 ... 206\n",
"1 33 ... 74\n",
"2 131 ... 4\n",
"3 160 ... 11\n",
"4 140 ... 166\n",
"... ... ... ...\n",
"6756 2559 ... 1\n",
"6757 2558 ... 1\n",
"6758 2557 ... 1\n",
"6759 2556 ... 1\n",
"6760 6782 ... 2\n",
"\n",
"[4545 rows x 6 columns]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ret[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2e873f4-363e-4dbf-93f1-927b4ee3c598",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 72,
"id": "cbf0b450-ec00-471f-9627-717e52c5471d",
"metadata": {},
"outputs": [],
"source": [
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "74e289ed-8d3e-4a50-afdf-d1d97e8a7807",
"metadata": {},
"outputs": [],
"source": [
"tup = tuple(i for i in range(100000000))"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "3e747e82-e6f8-47bb-918b-27bb7c37a10f",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6ade9c6f4e61410fb93f35e43222705b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/100000000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"num = 0\n",
"for i in tqdm(tup):\n",
" num += i"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "64cd6cc7-2803-41f1-b05c-83d65bdc7d42",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4999999950000000"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36366147-3632-4518-936e-878563305e49",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"id": "4dbc00b8-1437-4986-85e4-645a8bcf4a6d",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "17156aa0-8fd6-407b-b014-698df0e534a9",
"metadata": {},
"outputs": [],
"source": [
"arr = np.random.rand(1000,1000)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "4292a60b-9cb2-42d9-bedf-3b1120f1b515",
"metadata": {},
"outputs": [],
"source": [
"idx = np.argwhere(arr >= 0.97)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "4426f1d5-dcd2-4d64-bdca-7dece6793f8f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30220"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(idx)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "5b78436e-a828-42bd-a5ed-ae6045349391",
"metadata": {},
"outputs": [],
"source": [
"batch = idx[:200]"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "75edc50e-b64c-4319-8f74-27653ed3452c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"88.5 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"tuple(map(tuple, batch))"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "d9c827a4-ccdf-4cc1-90af-b018ae4858a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"94.9 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"tuple(tuple(x) for x in batch)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "acb2a0c9-b7d2-463d-8e63-c52fc7754ae8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}