1661 lines
54 KiB
Plaintext
1661 lines
54 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "9d7dae43-e799-469c-afe2-50dba45eeaa7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%load_ext autoreload\n",
|
||
"%autoreload 2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "3760b040-985c-46ec-ba77-13f0f7a52c83",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-06-05 11:24:20 +0000 | io:INFO | Loaded TOML config file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from pathlib import Path\n",
|
||
"from lang_main.analysis.graphs import convert_graph_to_cytoscape\n",
|
||
"from lang_main.io import load_pickle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "97487448-82c8-4b3d-8a1a-ccccaaac8d86",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def get_files(path: str) -> tuple[Path, ...]:\n",
|
||
" p = Path(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
|
||
" assert p.exists(), \"path does not exist\"\n",
|
||
" return tuple(p.glob(r'*'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "598f4d99-9d35-49c9-8c5d-113d4c80cecf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-4_entry_wise_cleansing.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/TokenGraph.graphml'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Token_Analysis-TokenGraph.pickle'))"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
|
||
"files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "55ad4af3-87cd-4189-9309-171aba4e04a6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
|
||
" _torch_pytree._register_pytree_node(\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-06-05 11:24:36 +0000 | io:INFO | Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"file = files[-3]\n",
|
||
"ret = load_pickle(file)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "1757d83e-2be9-4fae-9ee2-90bc1ce33de4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tk_graph = ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "dd5d9785-9aab-4552-b791-f201e69b04e8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"TokenGraph(name: TokenGraph, number of nodes: 6028, number of edges: 17950)"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5b262bdc-1bb8-473d-aab4-0073c94428cc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "a8b2b53c-d7e4-428b-bb12-0d746c68af4f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cyto_data = convert_graph_to_cytoscape(tk_graph, batch_size=10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "b8b10f74-00ac-4f22-b325-46013db96cb2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[{'data': {'id': 'Wartungstätigkeit', 'label': 'Wartungstätigkeit'}},\n",
|
||
" {'data': {'id': 'Vorgabe', 'label': 'Vorgabe'}},\n",
|
||
" {'data': {'id': 'Maschinenhersteller', 'label': 'Maschinenhersteller'}},\n",
|
||
" {'data': {'id': 'Sichtkontrolle', 'label': 'Sichtkontrolle'}},\n",
|
||
" {'data': {'id': 'Reinigung', 'label': 'Reinigung'}},\n",
|
||
" {'data': {'id': 'Überprüfung', 'label': 'Überprüfung'}},\n",
|
||
" {'data': {'id': 'Ölabscheider', 'label': 'Ölabscheider'}},\n",
|
||
" {'data': {'id': 'Kontrolle', 'label': 'Kontrolle'}},\n",
|
||
" {'data': {'id': 'C-Anlage', 'label': 'C-Anlage'}},\n",
|
||
" {'data': {'id': 'Stabbreithalter', 'label': 'Stabbreithalter'}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'Vorgabe',\n",
|
||
" 'weight': 92690}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'Maschinenhersteller',\n",
|
||
" 'weight': 92690}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'Maschinenbediener',\n",
|
||
" 'weight': 242}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'Laserabteilung',\n",
|
||
" 'weight': 242}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'Arbeitsplan',\n",
|
||
" 'weight': 244}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'abarbeiten',\n",
|
||
" 'weight': 242}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit',\n",
|
||
" 'target': 'Webmaschinenkontrollliste',\n",
|
||
" 'weight': 2}},\n",
|
||
" {'data': {'source': 'Wartungstätigkeit', 'target': 'sehen', 'weight': 2}},\n",
|
||
" {'data': {'source': 'Vorgabe',\n",
|
||
" 'target': 'Maschinenhersteller',\n",
|
||
" 'weight': 92690}},\n",
|
||
" {'data': {'source': 'Vorgabe', 'target': 'Wartungsplan', 'weight': 2032}}]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"cyto_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"id": "9b7c87ba-2976-4431-b00d-cef08b580914",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Wartungstätigkeit\n",
|
||
"Vorgabe\n",
|
||
"Maschinenhersteller\n",
|
||
"Sichtkontrolle\n",
|
||
"Reinigung\n",
|
||
"Überprüfung\n",
|
||
"Ölabscheider\n",
|
||
"Kontrolle\n",
|
||
"C-Anlage\n",
|
||
"Stabbreithalter\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for node in list(tk_graph.nodes)[:10]:\n",
|
||
" print(node)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"id": "eb62fe03-3e57-4fa0-a23e-7229d92588ab",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Wartungstätigkeit\n",
|
||
"Vorgabe\n",
|
||
"Maschinenhersteller\n",
|
||
"Sichtkontrolle\n",
|
||
"Reinigung\n",
|
||
"Überprüfung\n",
|
||
"Ölabscheider\n",
|
||
"Kontrolle\n",
|
||
"C-Anlage\n",
|
||
"Stabbreithalter\n",
|
||
"Scharniere\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# target: data fields\n",
|
||
"elements = []\n",
|
||
"\n",
|
||
"for i, node in enumerate(tk_graph):\n",
|
||
" print(node)\n",
|
||
" if i == 10:\n",
|
||
" break\n",
|
||
" node_info = {\n",
|
||
" 'data': {\n",
|
||
" 'id': node,\n",
|
||
" 'label': node,\n",
|
||
" }\n",
|
||
" }\n",
|
||
" elements.append(node_info)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"id": "562879d0-446d-4fdb-a27e-51c8ce91c4d7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[{'data': {'id': 'Wartungstätigkeit', 'label': 'Wartungstätigkeit'}},\n",
|
||
" {'data': {'id': 'Vorgabe', 'label': 'Vorgabe'}},\n",
|
||
" {'data': {'id': 'Maschinenhersteller', 'label': 'Maschinenhersteller'}},\n",
|
||
" {'data': {'id': 'Sichtkontrolle', 'label': 'Sichtkontrolle'}},\n",
|
||
" {'data': {'id': 'Reinigung', 'label': 'Reinigung'}},\n",
|
||
" {'data': {'id': 'Überprüfung', 'label': 'Überprüfung'}},\n",
|
||
" {'data': {'id': 'Ölabscheider', 'label': 'Ölabscheider'}},\n",
|
||
" {'data': {'id': 'Kontrolle', 'label': 'Kontrolle'}},\n",
|
||
" {'data': {'id': 'C-Anlage', 'label': 'C-Anlage'}},\n",
|
||
" {'data': {'id': 'Stabbreithalter', 'label': 'Stabbreithalter'}}]"
|
||
]
|
||
},
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"elements"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"id": "07d3be40-063a-47d4-b0fc-2e1b2da058a0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"edges = []\n",
|
||
"\n",
|
||
"for i, (source, target, weight) in enumerate(tk_graph.edges.data('weight', default=1)):\n",
|
||
" if i == 10:\n",
|
||
" break\n",
|
||
" edge_info = {\n",
|
||
" 'data': {\n",
|
||
" 'source': source,\n",
|
||
" 'target': target,\n",
|
||
" 'weight': weight,\n",
|
||
" }\n",
|
||
" }\n",
|
||
" edges.append(edge_info)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"id": "c48bd728-7776-43f5-be42-b5374c678c11",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'data': {'source': 'Vorgabe', 'target': 'Wartungsplan', 'weight': 2032}}"
|
||
]
|
||
},
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"edge_info"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f46c2f9a-8c8e-4d0f-8ae5-baded18dcfdd",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "81b710bb-410a-46c1-9541-0a394b400265",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "5c0e04a1-8351-4b1a-9027-ca00901a05bc",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'weight': 92690}"
|
||
]
|
||
},
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph['Wartungstätigkeit']['Vorgabe']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"id": "5085d797-0783-4329-bcb0-9f966d2768dc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-06-05 08:16:14 +0000 | graphs:INFO | Successfully saved graph as GraphML file under A:\\Arbeitsaufgaben\\lang-main\\test-notebooks\\TokenGraph.graphml.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph.save_graph(Path.cwd())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "540f4720-a2bf-4171-8db5-8e6993d38c13",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"batched = ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "2b145475-a9e4-4c56-b40c-0ca725dd886f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>batched_idxs</th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>[232, 241, 242, 244, 247, 249, 268, 269, 289, ...</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>[37, 50, 57, 61, 129, 245, 246, 266, 353, 378,...</td>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>3108</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>[179, 196, 216, 350, 355, 408, 426, 427, 428, ...</td>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1619</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>[224, 276, 277, 278, 279, 280, 281, 282, 283, ...</td>\n",
|
||
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>[191, 192, 194, 243, 248, 254, 296, 300, 302, ...</td>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2557</th>\n",
|
||
" <td>[21406]</td>\n",
|
||
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[211]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2556</th>\n",
|
||
" <td>[21405]</td>\n",
|
||
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[93]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2555</th>\n",
|
||
" <td>[21404]</td>\n",
|
||
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1707]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2554</th>\n",
|
||
" <td>[21385]</td>\n",
|
||
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
|
||
" <td>173</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6771</th>\n",
|
||
" <td>[123456]</td>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt Hal...</td>\n",
|
||
" <td>99</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>[306, 326]</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>4545 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" batched_idxs \\\n",
|
||
"162 [232, 241, 242, 244, 247, 249, 268, 269, 289, ... \n",
|
||
"33 [37, 50, 57, 61, 129, 245, 246, 266, 353, 378,... \n",
|
||
"131 [179, 196, 216, 350, 355, 408, 426, 427, 428, ... \n",
|
||
"160 [224, 276, 277, 278, 279, 280, 281, 282, 283, ... \n",
|
||
"140 [191, 192, 194, 243, 248, 254, 296, 300, 302, ... \n",
|
||
"... ... \n",
|
||
"2557 [21406] \n",
|
||
"2556 [21405] \n",
|
||
"2555 [21404] \n",
|
||
"2554 [21385] \n",
|
||
"6771 [123456] \n",
|
||
"\n",
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 3108 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1619 \n",
|
||
"160 Wöchentliche Kontrolle der C-Anlagen 36 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... ... ... \n",
|
||
"2557 Fehler 9723 Leistungsversorgung Antrieb defekt 46 1 \n",
|
||
"2556 T-Warp-Let-Off1 schleppfehler 29 1 \n",
|
||
"2555 Fahrräder wurden gewartet und gereinigt. 40 1 \n",
|
||
"2554 Bohrlöcher an Gebots- und Verbotszeichen anbri... 173 1 \n",
|
||
"6771 Befestigung Deckel für Batteriefach defekt Hal... 99 2 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 323, 329, 331, 332, ... 74 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"2557 [211] 1 \n",
|
||
"2556 [93] 1 \n",
|
||
"2555 [1707] 1 \n",
|
||
"2554 [1] 1 \n",
|
||
"6771 [306, 326] 2 \n",
|
||
"\n",
|
||
"[4545 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"batched"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "8d49b064-4a61-4b34-95ee-2e9ba30208be",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ...\n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung\n",
|
||
"131 Tägliche Überprüfung der Ölabscheider\n",
|
||
"160 Wöchentliche Kontrolle der C-Anlagen\n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters\n",
|
||
"1778 Brandschutztechnische Prüfung\n",
|
||
"332 Prüfung von: - Scharniere - Dichtung - Schließ...\n",
|
||
"104 Täglicher Technikrundgang\n",
|
||
"132 Tägliche Kontrolle der Kompressorstationen\n",
|
||
"98 Tägliche Kesselhauskontrolle\n",
|
||
"Name: entry, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"batched.iloc[:10,1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c71e23e3-7a56-4d7a-9c05-49d41acce6ee",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "aa4ebe87-5d40-408d-bb9d-2e386ea28f3f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "fc2e05b6-95b7-44c0-a014-772a786ce03d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
|
||
" _torch_pytree._register_pytree_node(\n",
|
||
"A:\\Arbeitsaufgaben\\lang-main\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
|
||
" _torch_pytree._register_pytree_node(\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import spacy\n",
|
||
"nlp = spacy.load('de_dep_news_trf')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "1eaf6d3a-c307-4572-a2c4-3e3722a41b6a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"txt = batched.iloc[:10,1].to_list()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "c42457d7-8cbb-4591-8f87-86a34ba26a11",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers\n",
|
||
"Wöchentliche Sichtkontrolle / Reinigung\n",
|
||
"Tägliche Überprüfung der Ölabscheider\n",
|
||
"Wöchentliche Kontrolle der C-Anlagen\n",
|
||
"Halbjährliche Kontrolle des Stabbreithalters\n",
|
||
"Brandschutztechnische Prüfung\n",
|
||
"Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"Täglicher Technikrundgang\n",
|
||
"Tägliche Kontrolle der Kompressorstationen\n",
|
||
"Tägliche Kesselhauskontrolle\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for doc in nlp.pipe(txt, batch_size=10):\n",
|
||
" print(doc)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d361b861-6277-42b4-921b-3510b1c7cb6a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a93ecd92-eece-41c9-bba0-e1bf3611ff5c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1a729ea4-5b88-4c05-aa2f-c09bc79bc901",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "ee0fea45-c26b-4253-b7f6-95ad70d0205a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-06-05 07:20:18 +0000 | io:INFO | Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"file = files[1]\n",
|
||
"raw = load_pickle(file)[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "09d4777e-2d87-4798-a159-a57423ae7501",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"idxs = batched.iloc[0,0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "483dc38b-220b-4154-b784-9362e8169083",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"id": "55ad9530-9cde-4049-8949-c8b31ddeb384",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>VorgangsID</th>\n",
|
||
" <th>ObjektID</th>\n",
|
||
" <th>HObjektText</th>\n",
|
||
" <th>ObjektArtID</th>\n",
|
||
" <th>ObjektArtText</th>\n",
|
||
" <th>VorgangsTypID</th>\n",
|
||
" <th>VorgangsTypName</th>\n",
|
||
" <th>VorgangsDatum</th>\n",
|
||
" <th>VorgangsStatusId</th>\n",
|
||
" <th>VorgangsPrioritaet</th>\n",
|
||
" <th>VorgangsBeschreibung</th>\n",
|
||
" <th>VorgangsOrt</th>\n",
|
||
" <th>VorgangsArtText</th>\n",
|
||
" <th>ErledigungsDatum</th>\n",
|
||
" <th>ErledigungsArtText</th>\n",
|
||
" <th>ErledigungsBeschreibung</th>\n",
|
||
" <th>MPMelderArbeitsplatz</th>\n",
|
||
" <th>MPAbteilungBezeichnung</th>\n",
|
||
" <th>Arbeitsbeginn</th>\n",
|
||
" <th>ErstellungsDatum</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>232</th>\n",
|
||
" <td>151991</td>\n",
|
||
" <td>121</td>\n",
|
||
" <td>217 C , Webmaschine, DL 280 EMS Breite 280</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Luft-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2022-03-02</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2022-03-02</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-03-02</td>\n",
|
||
" <td>2022-02-17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>241</th>\n",
|
||
" <td>155717</td>\n",
|
||
" <td>187</td>\n",
|
||
" <td>246, Webmaschine Jacquard,</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Jacquard-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2022-04-01</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2022-04-01</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-04-01</td>\n",
|
||
" <td>2022-02-17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>242</th>\n",
|
||
" <td>152507</td>\n",
|
||
" <td>177</td>\n",
|
||
" <td>204 S SI , Webmaschine, DL 280 EMS Breite 220</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Luft-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2022-04-09</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2022-04-09</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-04-09</td>\n",
|
||
" <td>2022-02-17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>244</th>\n",
|
||
" <td>247625</td>\n",
|
||
" <td>251</td>\n",
|
||
" <td>101, Webmaschine, OM 220 EOS</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Luft-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2022-04-19</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2022-04-19</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-04-19</td>\n",
|
||
" <td>2022-04-14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>247</th>\n",
|
||
" <td>254408</td>\n",
|
||
" <td>251</td>\n",
|
||
" <td>101, Webmaschine, OM 220 EOS</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Luft-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2022-05-07</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2022-05-07</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-05-07</td>\n",
|
||
" <td>2022-04-28</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>123434</th>\n",
|
||
" <td>571453</td>\n",
|
||
" <td>3212</td>\n",
|
||
" <td>A072, Webmaschine Jacquard,</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Jacquard-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>2023-05-26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>123435</th>\n",
|
||
" <td>571453</td>\n",
|
||
" <td>187</td>\n",
|
||
" <td>246, Webmaschine Jacquard,</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Jacquard-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>2023-05-26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>123436</th>\n",
|
||
" <td>571453</td>\n",
|
||
" <td>1792</td>\n",
|
||
" <td>A057, Webmaschine Jacquard,</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Jacquard-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>2023-05-26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>123437</th>\n",
|
||
" <td>571453</td>\n",
|
||
" <td>186</td>\n",
|
||
" <td>245 J, Webmaschine Jacquard,</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Jacquard-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>2023-05-26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>123438</th>\n",
|
||
" <td>571453</td>\n",
|
||
" <td>2473</td>\n",
|
||
" <td>A056, Webmaschine Jacquard,</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Jacquard-Webmaschine</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Wartung</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tägliche Interne Wartungstätigkeiten Weberei</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>Intern UTT - Sichtkontrolle</td>\n",
|
||
" <td>Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-06-28</td>\n",
|
||
" <td>2023-05-26</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>92592 rows × 20 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" VorgangsID ObjektID HObjektText \\\n",
|
||
"232 151991 121 217 C , Webmaschine, DL 280 EMS Breite 280 \n",
|
||
"241 155717 187 246, Webmaschine Jacquard, \n",
|
||
"242 152507 177 204 S SI , Webmaschine, DL 280 EMS Breite 220 \n",
|
||
"244 247625 251 101, Webmaschine, OM 220 EOS \n",
|
||
"247 254408 251 101, Webmaschine, OM 220 EOS \n",
|
||
"... ... ... ... \n",
|
||
"123434 571453 3212 A072, Webmaschine Jacquard, \n",
|
||
"123435 571453 187 246, Webmaschine Jacquard, \n",
|
||
"123436 571453 1792 A057, Webmaschine Jacquard, \n",
|
||
"123437 571453 186 245 J, Webmaschine Jacquard, \n",
|
||
"123438 571453 2473 A056, Webmaschine Jacquard, \n",
|
||
"\n",
|
||
" ObjektArtID ObjektArtText VorgangsTypID VorgangsTypName \\\n",
|
||
"232 3 Luft-Webmaschine 1 Wartung \n",
|
||
"241 6 Jacquard-Webmaschine 1 Wartung \n",
|
||
"242 3 Luft-Webmaschine 1 Wartung \n",
|
||
"244 3 Luft-Webmaschine 1 Wartung \n",
|
||
"247 3 Luft-Webmaschine 1 Wartung \n",
|
||
"... ... ... ... ... \n",
|
||
"123434 6 Jacquard-Webmaschine 1 Wartung \n",
|
||
"123435 6 Jacquard-Webmaschine 1 Wartung \n",
|
||
"123436 6 Jacquard-Webmaschine 1 Wartung \n",
|
||
"123437 6 Jacquard-Webmaschine 1 Wartung \n",
|
||
"123438 6 Jacquard-Webmaschine 1 Wartung \n",
|
||
"\n",
|
||
" VorgangsDatum VorgangsStatusId VorgangsPrioritaet \\\n",
|
||
"232 2022-03-02 5 0 \n",
|
||
"241 2022-04-01 5 0 \n",
|
||
"242 2022-04-09 5 0 \n",
|
||
"244 2022-04-19 5 0 \n",
|
||
"247 2022-05-07 5 0 \n",
|
||
"... ... ... ... \n",
|
||
"123434 2023-06-28 5 0 \n",
|
||
"123435 2023-06-28 5 0 \n",
|
||
"123436 2023-06-28 5 0 \n",
|
||
"123437 2023-06-28 5 0 \n",
|
||
"123438 2023-06-28 5 0 \n",
|
||
"\n",
|
||
" VorgangsBeschreibung VorgangsOrt \\\n",
|
||
"232 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"241 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"242 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"244 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"247 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"... ... ... \n",
|
||
"123434 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"123435 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"123436 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"123437 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"123438 Tägliche Wartungstätigkeiten nach Vorgabe des ... NaN \n",
|
||
"\n",
|
||
" VorgangsArtText ErledigungsDatum \\\n",
|
||
"232 Tägliche Interne Wartungstätigkeiten Weberei 2022-03-02 \n",
|
||
"241 Tägliche Interne Wartungstätigkeiten Weberei 2022-04-01 \n",
|
||
"242 Tägliche Interne Wartungstätigkeiten Weberei 2022-04-09 \n",
|
||
"244 Tägliche Interne Wartungstätigkeiten Weberei 2022-04-19 \n",
|
||
"247 Tägliche Interne Wartungstätigkeiten Weberei 2022-05-07 \n",
|
||
"... ... ... \n",
|
||
"123434 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
|
||
"123435 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
|
||
"123436 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
|
||
"123437 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
|
||
"123438 Tägliche Interne Wartungstätigkeiten Weberei 2023-06-28 \n",
|
||
"\n",
|
||
" ErledigungsArtText \\\n",
|
||
"232 Intern UTT - Sichtkontrolle \n",
|
||
"241 Intern UTT - Sichtkontrolle \n",
|
||
"242 Intern UTT - Sichtkontrolle \n",
|
||
"244 Intern UTT - Sichtkontrolle \n",
|
||
"247 Intern UTT - Sichtkontrolle \n",
|
||
"... ... \n",
|
||
"123434 Intern UTT - Sichtkontrolle \n",
|
||
"123435 Intern UTT - Sichtkontrolle \n",
|
||
"123436 Intern UTT - Sichtkontrolle \n",
|
||
"123437 Intern UTT - Sichtkontrolle \n",
|
||
"123438 Intern UTT - Sichtkontrolle \n",
|
||
"\n",
|
||
" ErledigungsBeschreibung \\\n",
|
||
"232 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"241 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"242 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"244 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"247 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"... ... \n",
|
||
"123434 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"123435 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"123436 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"123437 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"123438 Sichtkontrolle durchgeführt\\n\\nAuffälligkeiten... \n",
|
||
"\n",
|
||
" MPMelderArbeitsplatz MPAbteilungBezeichnung Arbeitsbeginn \\\n",
|
||
"232 NaN NaN 2022-03-02 \n",
|
||
"241 NaN NaN 2022-04-01 \n",
|
||
"242 NaN NaN 2022-04-09 \n",
|
||
"244 NaN NaN 2022-04-19 \n",
|
||
"247 NaN NaN 2022-05-07 \n",
|
||
"... ... ... ... \n",
|
||
"123434 NaN NaN 2023-06-28 \n",
|
||
"123435 NaN NaN 2023-06-28 \n",
|
||
"123436 NaN NaN 2023-06-28 \n",
|
||
"123437 NaN NaN 2023-06-28 \n",
|
||
"123438 NaN NaN 2023-06-28 \n",
|
||
"\n",
|
||
" ErstellungsDatum \n",
|
||
"232 2022-02-17 \n",
|
||
"241 2022-02-17 \n",
|
||
"242 2022-02-17 \n",
|
||
"244 2022-04-14 \n",
|
||
"247 2022-04-28 \n",
|
||
"... ... \n",
|
||
"123434 2023-05-26 \n",
|
||
"123435 2023-05-26 \n",
|
||
"123436 2023-05-26 \n",
|
||
"123437 2023-05-26 \n",
|
||
"123438 2023-05-26 \n",
|
||
"\n",
|
||
"[92592 rows x 20 columns]"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"raw.loc[idxs]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "05775885-cf62-482c-9628-fbc976df9656",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "29d00943-5c0e-4f5d-81b6-d566dbbcf89d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "82a059ea-0eb8-4db1-b859-3fc07e42faff",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"id": "d1c1190f-0c80-40e3-8965-78d68400a33d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
|
||
]
|
||
},
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
|
||
"files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"id": "e26c52eb-7a6b-49da-97a9-6e24a2a4d91e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"shared:INFO | 2024-05-29 11:56:46 +0000 | Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"file = files[-1]\n",
|
||
"ret = load_pickle(file)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"id": "beacf5ca-6946-413a-817c-e7e87da9ace3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>index</th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>162</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>33</td>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>3108</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||
" <td>74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>131</td>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1619</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>160</td>\n",
|
||
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>140</td>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6756</th>\n",
|
||
" <td>2559</td>\n",
|
||
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[211]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6757</th>\n",
|
||
" <td>2558</td>\n",
|
||
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[93]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6758</th>\n",
|
||
" <td>2557</td>\n",
|
||
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1707]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6759</th>\n",
|
||
" <td>2556</td>\n",
|
||
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
|
||
" <td>173</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6760</th>\n",
|
||
" <td>6782</td>\n",
|
||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||
" <td>106</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>[306, 326]</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>4545 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" index ... num_assoc_obj_ids\n",
|
||
"0 162 ... 206\n",
|
||
"1 33 ... 74\n",
|
||
"2 131 ... 4\n",
|
||
"3 160 ... 11\n",
|
||
"4 140 ... 166\n",
|
||
"... ... ... ...\n",
|
||
"6756 2559 ... 1\n",
|
||
"6757 2558 ... 1\n",
|
||
"6758 2557 ... 1\n",
|
||
"6759 2556 ... 1\n",
|
||
"6760 6782 ... 2\n",
|
||
"\n",
|
||
"[4545 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 71,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d2e873f4-363e-4dbf-93f1-927b4ee3c598",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"id": "cbf0b450-ec00-471f-9627-717e52c5471d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tqdm.auto import tqdm"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"id": "74e289ed-8d3e-4a50-afdf-d1d97e8a7807",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tup = tuple(i for i in range(100000000))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"id": "3e747e82-e6f8-47bb-918b-27bb7c37a10f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "6ade9c6f4e61410fb93f35e43222705b",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
" 0%| | 0/100000000 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"num = 0\n",
|
||
"for i in tqdm(tup):\n",
|
||
" num += i"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"id": "64cd6cc7-2803-41f1-b05c-83d65bdc7d42",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"4999999950000000"
|
||
]
|
||
},
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"num"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "36366147-3632-4518-936e-878563305e49",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "4dbc00b8-1437-4986-85e4-645a8bcf4a6d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "17156aa0-8fd6-407b-b014-698df0e534a9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"arr = np.random.rand(1000,1000)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "4292a60b-9cb2-42d9-bedf-3b1120f1b515",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"idx = np.argwhere(arr >= 0.97)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "4426f1d5-dcd2-4d64-bdca-7dece6793f8f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"30220"
|
||
]
|
||
},
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(idx)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"id": "5b78436e-a828-42bd-a5ed-ae6045349391",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch = idx[:200]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"id": "75edc50e-b64c-4319-8f74-27653ed3452c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"88.5 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%%timeit\n",
|
||
"tuple(map(tuple, batch))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"id": "d9c827a4-ccdf-4cc1-90af-b018ae4858a7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"94.9 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%%timeit\n",
|
||
"tuple(tuple(x) for x in batch)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "acb2a0c9-b7d2-463d-8e63-c52fc7754ae8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.9"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|