825 lines
26 KiB
Plaintext
825 lines
26 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "79034f9b-adae-4066-a35f-b0e7fd38055f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\foersterflorian\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n",
|
||
"INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
|
||
"INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import os\n",
|
||
"import sys\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"from ihm_analyse.lib.preprocess import load_raw_data\n",
|
||
"from ihm_analyse import load_pickle\n",
|
||
"from ihm_analyse.predefined_pipes import pipe_merge"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "af94968f-ae6c-402b-b866-cb6c15b81cef",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403')"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pwd = os.getcwd()\n",
|
||
"pwd = Path(pwd)\n",
|
||
"p = pwd / '01_03_Rohdaten_202403/'\n",
|
||
"p"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "753daf9e-0209-4a13-b458-1048c8b2bfbf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export6 - 43306 Zeilen.csv'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export8 - 708 Zeilen.csv'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export9 - 8176 Zeilen.csv'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_trunc.csv')]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"folder = list(p.glob(r'*.csv'))\n",
|
||
"folder"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "50d9ef9c-c56b-4d5b-9dfd-c02b68a29288",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "52186a59-69f2-4ed2-8d19-dac76e50526a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "5b76284b-bcc3-4b31-9ece-bde35b22b717",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_59499_Zeilen.csv')"
|
||
]
|
||
},
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"path_to_dataset = folder[1]\n",
|
||
"path_to_dataset"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "2701b8d9-657c-4d7a-b103-b8c8b1865224",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.preprocess:Loaded dataset successfully.\n",
|
||
"INFO:ihm_analyse.preprocess:Dataset properties: number of entries: 59499, number of features 20\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"(data,) = load_raw_data(path_to_dataset)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "74d7b72e-3cab-46e2-bca2-67c70b9221c7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"17849"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"percentage_trunc = 0.3\n",
|
||
"num_entries_trunc = int(len(data) * percentage_trunc)\n",
|
||
"num_entries_trunc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "62d2fa0e-baa6-4d7c-bd37-5fdbe21005d3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 17849 entries, 0 to 17848\n",
|
||
"Data columns (total 20 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 VorgangsID 17849 non-null int64 \n",
|
||
" 1 ObjektID 17849 non-null int64 \n",
|
||
" 2 HObjektText 17848 non-null object \n",
|
||
" 3 ObjektArtID 17849 non-null int64 \n",
|
||
" 4 ObjektArtText 17849 non-null object \n",
|
||
" 5 VorgangsTypID 17849 non-null int64 \n",
|
||
" 6 VorgangsTypName 17849 non-null object \n",
|
||
" 7 VorgangsDatum 17849 non-null datetime64[ns]\n",
|
||
" 8 VorgangsStatusId 17849 non-null int64 \n",
|
||
" 9 VorgangsPrioritaet 17849 non-null int64 \n",
|
||
" 10 VorgangsBeschreibung 15988 non-null object \n",
|
||
" 11 VorgangsOrt 0 non-null float64 \n",
|
||
" 12 VorgangsArtText 17849 non-null object \n",
|
||
" 13 ErledigungsDatum 17849 non-null datetime64[ns]\n",
|
||
" 14 ErledigungsArtText 11879 non-null object \n",
|
||
" 15 ErledigungsBeschreibung 9916 non-null object \n",
|
||
" 16 MPMelderArbeitsplatz 3 non-null object \n",
|
||
" 17 MPAbteilungBezeichnung 3 non-null object \n",
|
||
" 18 Arbeitsbeginn 1920 non-null datetime64[ns]\n",
|
||
" 19 ErstellungsDatum 17849 non-null datetime64[ns]\n",
|
||
"dtypes: datetime64[ns](4), float64(1), int64(6), object(9)\n",
|
||
"memory usage: 2.7+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"data_trunc = data.iloc[:num_entries_trunc].copy()\n",
|
||
"data_trunc.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "6a15fb8c-e3b7-4c92-b73d-788b337d6251",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_trunc.csv')"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"saving_path = p / 'Export7_trunc.csv'\n",
|
||
"saving_path"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "fb912634-cefa-4b8d-a370-37f6c8178f5a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_trunc.to_csv(\n",
|
||
" path_or_buf=saving_path,\n",
|
||
" sep=';', \n",
|
||
" encoding='cp1252', \n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "30085691-aa23-478c-8d65-d3e6800c7c77",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "85733e19-6c52-479c-a8f0-3872bdbd5bfd",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ae65a019-26a8-45c9-bfb9-2662b84ff2f2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7ba722ae-51b8-4e8d-9a1a-917098a3f70e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "356f7d32-446e-4dc1-aa83-a0b816742087",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Token_Analysis-TokenGraph.pickle')]"
|
||
]
|
||
},
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"res_path = pwd / 'results/Export7_trunc/'\n",
|
||
"contents = list(res_path.glob(r'*.pickle'))\n",
|
||
"contents"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"id": "e4415e9c-6ebb-46d9-b06a-eb67df56689e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"preproc_data = contents[1]\n",
|
||
"last_step = contents[-1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"id": "2b29672a-c573-4d09-8601-e468a23bad0c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n",
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"ret_preproc_data = load_pickle(preproc_data)\n",
|
||
"ret_idx_paris = load_pickle(last_step)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "20c807da-e64f-4a48-8306-28a0a3dcfae9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "KeyError",
|
||
"evalue": "0",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[1;32mIn[53], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m idx_pairs \u001b[38;5;241m=\u001b[39m \u001b[43mret_idx_paris\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[0;32m 2\u001b[0m preproc_data \u001b[38;5;241m=\u001b[39m ret_preproc_data[\u001b[38;5;241m0\u001b[39m]\n",
|
||
"File \u001b[1;32m~\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\networkx\\classes\\graph.py:513\u001b[0m, in \u001b[0;36mGraph.__getitem__\u001b[1;34m(self, n)\u001b[0m\n\u001b[0;32m 489\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, n):\n\u001b[0;32m 490\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a dict of neighbors of node n. Use: 'G[n]'.\u001b[39;00m\n\u001b[0;32m 491\u001b[0m \n\u001b[0;32m 492\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 511\u001b[0m \u001b[38;5;124;03m AtlasView({1: {}})\u001b[39;00m\n\u001b[0;32m 512\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 513\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mn\u001b[49m\u001b[43m]\u001b[49m\n",
|
||
"File \u001b[1;32m~\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\networkx\\classes\\coreviews.py:81\u001b[0m, in \u001b[0;36mAdjacencyView.__getitem__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name):\n\u001b[1;32m---> 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m AtlasView(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_atlas\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m)\n",
|
||
"\u001b[1;31mKeyError\u001b[0m: 0"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"idx_pairs = ret_idx_paris[0]\n",
|
||
"preproc_data = ret_preproc_data[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "1fca8442-c0e9-420f-9ad4-22a3b672dda3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.pipelines:Starting processing pipeline >>Merge_Duplicates<<...\n",
|
||
"INFO:ihm_analyse.preprocess:Start merging of similarity candidates...\n",
|
||
"INFO:ihm_analyse.graphs:Graph properties: 5465 Nodes, 71087 Edges\n",
|
||
"INFO:ihm_analyse.graphs:Node memory: 149.43 KB\n",
|
||
"INFO:ihm_analyse.graphs:Edge memory: 3887.57 KB\n",
|
||
"INFO:ihm_analyse.graphs:Total memory: 4037.00 KB\n",
|
||
"INFO:ihm_analyse.preprocess:Similarity candidates merged successfully.\n",
|
||
"INFO:ihm_analyse.helpers:Saved file successfully under results\\Export7_trunc\\Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle\n",
|
||
"INFO:ihm_analyse.pipelines:Processing pipeline >>Merge_Duplicates<< successfully ended.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"ret = pipe_merge.run(starting_values=(preproc_data, idx_pairs))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "87bd8bba-b0c3-45a1-a9a8-b6bd279cf51f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>445</th>\n",
|
||
" <td>Wartung nach Arbeitsplan, siehe Extradaten / A...</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>3435</td>\n",
|
||
" <td>[563, 604, 616, 617, 15089, 15226, 15276, 1533...</td>\n",
|
||
" <td>36</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>I/W nach Liste</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>238</td>\n",
|
||
" <td>[2363, 2364, 2367, 2368, 2369, 2370, 2371, 237...</td>\n",
|
||
" <td>85</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2377</th>\n",
|
||
" <td>1 Wöchentliche Wartung aller 3 Etikettendrucke...</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>535</td>\n",
|
||
" <td>[111, 121, 127, 209, 219, 220, 221, 222, 236, ...</td>\n",
|
||
" <td>73</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2380</th>\n",
|
||
" <td>Infratech Meet Di + DO JourFix PT/InT</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>183</td>\n",
|
||
" <td>28526</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4337</th>\n",
|
||
" <td>24.05.2022 10:28:01 (Halm, Karl-Josef) Aktione...</td>\n",
|
||
" <td>579</td>\n",
|
||
" <td>3817</td>\n",
|
||
" <td>[5, 7, 9, 13, 14, 15, 17, 18, 24, 25, 30, 32, ...</td>\n",
|
||
" <td>754</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3061</th>\n",
|
||
" <td>stopper schaltet nicht.</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[15280]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3059</th>\n",
|
||
" <td>12.09.2022 13:48:24 (Struzyna, Christian) Temp...</td>\n",
|
||
" <td>127</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[12671]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3054</th>\n",
|
||
" <td>08.09.2022 12:56:33 (Unruh, Jakob) Neue Serie ...</td>\n",
|
||
" <td>262</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[273]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3053</th>\n",
|
||
" <td>Preset-Punkt überprüfen und ggf. nachjustieren...</td>\n",
|
||
" <td>148</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[273]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3050</th>\n",
|
||
" <td>13.09.2022 08:05:40 (Betke, Gennadi) Griefer ...</td>\n",
|
||
" <td>79</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[15785]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>3627 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" entry len num_occur \\\n",
|
||
"445 Wartung nach Arbeitsplan, siehe Extradaten / A... 52 3435 \n",
|
||
"26 I/W nach Liste 14 238 \n",
|
||
"2377 1 Wöchentliche Wartung aller 3 Etikettendrucke... 91 535 \n",
|
||
"2380 Infratech Meet Di + DO JourFix PT/InT 38 183 \n",
|
||
"4337 24.05.2022 10:28:01 (Halm, Karl-Josef) Aktione... 579 3817 \n",
|
||
"... ... ... ... \n",
|
||
"3061 stopper schaltet nicht. 23 1 \n",
|
||
"3059 12.09.2022 13:48:24 (Struzyna, Christian) Temp... 127 1 \n",
|
||
"3054 08.09.2022 12:56:33 (Unruh, Jakob) Neue Serie ... 262 1 \n",
|
||
"3053 Preset-Punkt überprüfen und ggf. nachjustieren... 148 1 \n",
|
||
"3050 13.09.2022 08:05:40 (Betke, Gennadi) Griefer ... 79 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"445 [563, 604, 616, 617, 15089, 15226, 15276, 1533... 36 \n",
|
||
"26 [2363, 2364, 2367, 2368, 2369, 2370, 2371, 237... 85 \n",
|
||
"2377 [111, 121, 127, 209, 219, 220, 221, 222, 236, ... 73 \n",
|
||
"2380 28526 1 \n",
|
||
"4337 [5, 7, 9, 13, 14, 15, 17, 18, 24, 25, 30, 32, ... 754 \n",
|
||
"... ... ... \n",
|
||
"3061 [15280] 1 \n",
|
||
"3059 [12671] 1 \n",
|
||
"3054 [273] 1 \n",
|
||
"3053 [273] 1 \n",
|
||
"3050 [15785] 1 \n",
|
||
"\n",
|
||
"[3627 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "43e6d41c-7a49-4756-9629-0ec0ee6c5b7c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "1a9969fa-6b0d-466a-bd4f-1ba5f4868873",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" int('23456')\n",
|
||
"except ValueError:\n",
|
||
" print('went wrong')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "76af32de-5f0a-4d7e-9751-5f2a38a7a69e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7bd5cffa-0b09-45c7-bc15-0cd3082353d7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "e8423609-c95d-42c8-99f3-95274fa52ae8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Token_Analysis-TokenGraph.pickle')]"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"res_path = pwd / 'results/Export7_trunc/'\n",
|
||
"contents = list(res_path.glob(r'*.pickle'))\n",
|
||
"contents"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "71fa1c2e-22cf-483a-964c-a5cca2bd3790",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Token_Analysis-TokenGraph.pickle')"
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"path_to_graph = contents[-1]\n",
|
||
"path_to_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "9101beaf-6a7c-4987-9c44-141386966291",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph = load_pickle(path_to_graph)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "e08954c3-9a5f-43c8-a98e-f9b8a74c3ff5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"TokenGraph(name: TokenGraph, number of nodes: 10536, number of edges: 48562)"
|
||
]
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "2a8d0abb-d68b-4c6e-80e9-b3b27998c8d2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'num_nodes': 10536,\n",
|
||
" 'num_edges': 46393,\n",
|
||
" 'min_edge_weight': 1,\n",
|
||
" 'max_edge_weight': 15374,\n",
|
||
" 'node_memory': 652596,\n",
|
||
" 'edge_memory': 2598008,\n",
|
||
" 'total_memory': 3250604}"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph.metadata_undirected"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "cc34c667-5a33-4061-a83a-50fc8c537b19",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tk_graph_filtered = tk_graph.filter_by_edge_weight(100)\n",
|
||
"tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"id": "67df971c-fe7a-4f88-89ae-ba1366da1166",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'num_nodes': 289,\n",
|
||
" 'num_edges': 457,\n",
|
||
" 'min_edge_weight': 100,\n",
|
||
" 'max_edge_weight': 15369,\n",
|
||
" 'node_memory': 17674,\n",
|
||
" 'edge_memory': 25592,\n",
|
||
" 'total_memory': 43266}"
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph_filtered.metadata_undirected"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "8c524312-aff4-47f4-801e-ad8112aa2a70",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc')"
|
||
]
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"save_path_graph = res_path\n",
|
||
"save_path_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "b62c888f-620d-4b29-924b-45ea17d99bc1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under A:\\Arbeitsaufgaben\\Instandhaltung\\results\\Export7_trunc\\TokenGraph-filtered.graphml.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"tk_graph_filtered.save_graph(save_path_graph, filename='TokenGraph-filtered')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ca21e2d3-dc5a-4117-8be9-d132ba2c8d28",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|