lang-main/test-notebooks/display_results.ipynb

664 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"id": "3760b040-985c-46ec-ba77-13f0f7a52c83",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"from lang_main import load_pickle"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "97487448-82c8-4b3d-8a1a-ccccaaac8d86",
"metadata": {},
"outputs": [],
"source": [
"def get_files(path: str) -> tuple[Path, ...]:\n",
" p = Path(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
" assert p.exists(), \"path does not exist\"\n",
" return tuple(p.glob(r'*'))"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "598f4d99-9d35-49c9-8c5d-113d4c80cecf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "55ad4af3-87cd-4189-9309-171aba4e04a6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shared:INFO | 2024-05-29 12:49:47 +0000 | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[-1]\n",
"ret = load_pickle(file)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "540f4720-a2bf-4171-8db5-8e6993d38c13",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3108</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
" <td>36</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2559</th>\n",
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>[211]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2558</th>\n",
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>[93]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2557</th>\n",
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>[1707]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2556</th>\n",
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
" <td>173</td>\n",
" <td>1</td>\n",
" <td>[1]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6782</th>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4545 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" entry ... num_assoc_obj_ids\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... ... 206\n",
"33 Wöchentliche Sichtkontrolle / Reinigung ... 74\n",
"131 Tägliche Überprüfung der Ölabscheider ... 4\n",
"160 Wöchentliche Kontrolle der C-Anlagen ... 11\n",
"140 Halbjährliche Kontrolle des Stabbreithalters ... 166\n",
"... ... ... ...\n",
"2559 Fehler 9723 Leistungsversorgung Antrieb defekt ... 1\n",
"2558 T-Warp-Let-Off1 schleppfehler ... 1\n",
"2557 Fahrräder wurden gewartet und gereinigt. ... 1\n",
"2556 Bohrlöcher an Gebots- und Verbotszeichen anbri... ... 1\n",
"6782 Befestigung Deckel für Batteriefach defekt ... ... 2\n",
"\n",
"[4545 rows x 5 columns]"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ret[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee0fea45-c26b-4253-b7f6-95ad70d0205a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "82a059ea-0eb8-4db1-b859-3fc07e42faff",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 69,
"id": "d1c1190f-0c80-40e3-8965-78d68400a33d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "e26c52eb-7a6b-49da-97a9-6e24a2a4d91e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shared:INFO | 2024-05-29 11:56:46 +0000 | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[-1]\n",
"ret = load_pickle(file)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "beacf5ca-6946-413a-817c-e7e87da9ace3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>162</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>33</td>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3108</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>131</td>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>160</td>\n",
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
" <td>36</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>140</td>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6756</th>\n",
" <td>2559</td>\n",
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>[211]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6757</th>\n",
" <td>2558</td>\n",
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>[93]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6758</th>\n",
" <td>2557</td>\n",
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>[1707]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6759</th>\n",
" <td>2556</td>\n",
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
" <td>173</td>\n",
" <td>1</td>\n",
" <td>[1]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6760</th>\n",
" <td>6782</td>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4545 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" index ... num_assoc_obj_ids\n",
"0 162 ... 206\n",
"1 33 ... 74\n",
"2 131 ... 4\n",
"3 160 ... 11\n",
"4 140 ... 166\n",
"... ... ... ...\n",
"6756 2559 ... 1\n",
"6757 2558 ... 1\n",
"6758 2557 ... 1\n",
"6759 2556 ... 1\n",
"6760 6782 ... 2\n",
"\n",
"[4545 rows x 6 columns]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ret[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2e873f4-363e-4dbf-93f1-927b4ee3c598",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 72,
"id": "cbf0b450-ec00-471f-9627-717e52c5471d",
"metadata": {},
"outputs": [],
"source": [
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "74e289ed-8d3e-4a50-afdf-d1d97e8a7807",
"metadata": {},
"outputs": [],
"source": [
"tup = tuple(i for i in range(100000000))"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "3e747e82-e6f8-47bb-918b-27bb7c37a10f",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6ade9c6f4e61410fb93f35e43222705b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/100000000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"num = 0\n",
"for i in tqdm(tup):\n",
" num += i"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "64cd6cc7-2803-41f1-b05c-83d65bdc7d42",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4999999950000000"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36366147-3632-4518-936e-878563305e49",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"id": "4dbc00b8-1437-4986-85e4-645a8bcf4a6d",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "17156aa0-8fd6-407b-b014-698df0e534a9",
"metadata": {},
"outputs": [],
"source": [
"arr = np.random.rand(1000,1000)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "4292a60b-9cb2-42d9-bedf-3b1120f1b515",
"metadata": {},
"outputs": [],
"source": [
"idx = np.argwhere(arr >= 0.97)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "4426f1d5-dcd2-4d64-bdca-7dece6793f8f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30220"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(idx)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "5b78436e-a828-42bd-a5ed-ae6045349391",
"metadata": {},
"outputs": [],
"source": [
"batch = idx[:200]"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "75edc50e-b64c-4319-8f74-27653ed3452c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"88.5 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"tuple(map(tuple, batch))"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "d9c827a4-ccdf-4cc1-90af-b018ae4858a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"94.9 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"tuple(tuple(x) for x in batch)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "acb2a0c9-b7d2-463d-8e63-c52fc7754ae8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}