{ "cells": [ { "cell_type": "code", "execution_count": 20, "id": "3760b040-985c-46ec-ba77-13f0f7a52c83", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "from lang_main import load_pickle" ] }, { "cell_type": "code", "execution_count": 28, "id": "97487448-82c8-4b3d-8a1a-ccccaaac8d86", "metadata": {}, "outputs": [], "source": [ "def get_files(path: str) -> tuple[Path, ...]:\n", " p = Path(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n", " assert p.exists(), \"path does not exist\"\n", " return tuple(p.glob(r'*'))" ] }, { "cell_type": "code", "execution_count": 87, "id": "598f4d99-9d35-49c9-8c5d-113d4c80cecf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n", "files" ] }, { "cell_type": "code", "execution_count": 88, "id": "55ad4af3-87cd-4189-9309-171aba4e04a6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shared:INFO | 2024-05-29 12:49:47 +0000 | Loaded file successfully.\n" ] } ], "source": [ "file = files[-1]\n", "ret = load_pickle(file)" ] }, { "cell_type": "code", "execution_count": 89, "id": "540f4720-a2bf-4171-8db5-8e6993d38c13", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrylennum_occurassoc_obj_idsnum_assoc_obj_ids
162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
33Wöchentliche Sichtkontrolle / Reinigung393108[301, 304, 305, 313, 314, 323, 329, 331, 332, ...74
131Tägliche Überprüfung der Ölabscheider371619[0, 970, 2134, 2137]4
160Wöchentliche Kontrolle der C-Anlagen361265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
..................
2559Fehler 9723 Leistungsversorgung Antrieb defekt461[211]1
2558T-Warp-Let-Off1 schleppfehler301[93]1
2557Fahrräder wurden gewartet und gereinigt.401[1707]1
2556Bohrlöcher an Gebots- und Verbotszeichen anbri...1731[1]1
6782Befestigung Deckel für Batteriefach defekt ...1062[306, 326]2
\n", "

4545 rows × 5 columns

\n", "
" ], "text/plain": [ " entry ... num_assoc_obj_ids\n", "162 Tägliche Wartungstätigkeiten nach Vorgabe des ... ... 206\n", "33 Wöchentliche Sichtkontrolle / Reinigung ... 74\n", "131 Tägliche Überprüfung der Ölabscheider ... 4\n", "160 Wöchentliche Kontrolle der C-Anlagen ... 11\n", "140 Halbjährliche Kontrolle des Stabbreithalters ... 166\n", "... ... ... ...\n", "2559 Fehler 9723 Leistungsversorgung Antrieb defekt ... 1\n", "2558 T-Warp-Let-Off1 schleppfehler ... 1\n", "2557 Fahrräder wurden gewartet und gereinigt. ... 1\n", "2556 Bohrlöcher an Gebots- und Verbotszeichen anbri... ... 1\n", "6782 Befestigung Deckel für Batteriefach defekt ... ... 2\n", "\n", "[4545 rows x 5 columns]" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "ee0fea45-c26b-4253-b7f6-95ad70d0205a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "82a059ea-0eb8-4db1-b859-3fc07e42faff", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 69, "id": "d1c1190f-0c80-40e3-8965-78d68400a33d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n", " WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n", "files" ] }, { "cell_type": "code", "execution_count": 70, "id": "e26c52eb-7a6b-49da-97a9-6e24a2a4d91e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shared:INFO | 2024-05-29 11:56:46 +0000 | Loaded file successfully.\n" ] } ], "source": [ "file = files[-1]\n", "ret = load_pickle(file)" ] }, { "cell_type": "code", "execution_count": 71, "id": "beacf5ca-6946-413a-817c-e7e87da9ace3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexentrylennum_occurassoc_obj_idsnum_assoc_obj_ids
0162Tägliche Wartungstätigkeiten nach Vorgabe des ...6692592[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...206
133Wöchentliche Sichtkontrolle / Reinigung393108[301, 304, 305, 313, 314, 323, 329, 331, 332, ...74
2131Tägliche Überprüfung der Ölabscheider371619[0, 970, 2134, 2137]4
3160Wöchentliche Kontrolle der C-Anlagen361265[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...11
4140Halbjährliche Kontrolle des Stabbreithalters44687[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...166
.....................
67562559Fehler 9723 Leistungsversorgung Antrieb defekt461[211]1
67572558T-Warp-Let-Off1 schleppfehler301[93]1
67582557Fahrräder wurden gewartet und gereinigt.401[1707]1
67592556Bohrlöcher an Gebots- und Verbotszeichen anbri...1731[1]1
67606782Befestigung Deckel für Batteriefach defekt ...1062[306, 326]2
\n", "

4545 rows × 6 columns

\n", "
" ], "text/plain": [ " index ... num_assoc_obj_ids\n", "0 162 ... 206\n", "1 33 ... 74\n", "2 131 ... 4\n", "3 160 ... 11\n", "4 140 ... 166\n", "... ... ... ...\n", "6756 2559 ... 1\n", "6757 2558 ... 1\n", "6758 2557 ... 1\n", "6759 2556 ... 1\n", "6760 6782 ... 2\n", "\n", "[4545 rows x 6 columns]" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "d2e873f4-363e-4dbf-93f1-927b4ee3c598", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 72, "id": "cbf0b450-ec00-471f-9627-717e52c5471d", "metadata": {}, "outputs": [], "source": [ "from tqdm.auto import tqdm" ] }, { "cell_type": "code", "execution_count": 84, "id": "74e289ed-8d3e-4a50-afdf-d1d97e8a7807", "metadata": {}, "outputs": [], "source": [ "tup = tuple(i for i in range(100000000))" ] }, { "cell_type": "code", "execution_count": 85, "id": "3e747e82-e6f8-47bb-918b-27bb7c37a10f", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6ade9c6f4e61410fb93f35e43222705b", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/100000000 [00:00= 0.97)" ] }, { "cell_type": "code", "execution_count": 50, "id": "4426f1d5-dcd2-4d64-bdca-7dece6793f8f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "30220" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(idx)" ] }, { "cell_type": "code", "execution_count": 66, "id": "5b78436e-a828-42bd-a5ed-ae6045349391", "metadata": {}, "outputs": [], "source": [ "batch = idx[:200]" ] }, { "cell_type": "code", "execution_count": 67, "id": "75edc50e-b64c-4319-8f74-27653ed3452c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "88.5 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" ] } ], "source": [ "%%timeit\n", "tuple(map(tuple, batch))" ] }, { "cell_type": "code", "execution_count": 68, "id": "d9c827a4-ccdf-4cc1-90af-b018ae4858a7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "94.9 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" ] } ], "source": [ "%%timeit\n", "tuple(tuple(x) for x in batch)" ] }, { "cell_type": "code", "execution_count": null, "id": "acb2a0c9-b7d2-463d-8e63-c52fc7754ae8", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }