STRF for similarity duplicates, time analysis pipeline, enhanced config

This commit is contained in:
Florian Förster
2024-05-29 16:34:31 +02:00
parent 5d2c97165a
commit bb987e2108
30 changed files with 1875 additions and 693 deletions

View File

@@ -13,29 +13,25 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "bca16fc4-1ffb-48ef-bd0d-bdc782428a45",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\foersterflorian\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
"ename": "ModuleNotFoundError",
"evalue": "No module named 'ihm_analyse'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mihm_analyse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CONFIG\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mihm_analyse\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpreprocess\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 3\u001b[0m load_raw_data,\n\u001b[0;32m 4\u001b[0m remove_duplicates,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m merge_similarity_dupl,\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mihm_analyse\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpipelines\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BasePipeline, EmbeddingPipeline\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'ihm_analyse'"
]
}
],
"source": [
"from ihm_analyse import CONFIG\n",
"from ihm_analyse.lib.preprocess import (\n",
"from lang_main import CONFIG\n",
"from lang_main.lib.preprocess import (\n",
" load_raw_data,\n",
" remove_duplicates,\n",
" remove_NA,\n",
@@ -47,8 +43,8 @@
" list_cosSim_dupl_candidates,\n",
" merge_similarity_dupl,\n",
")\n",
"from ihm_analyse.lib.pipelines import BasePipeline, EmbeddingPipeline\n",
"from ihm_analyse.lib.helpers import (\n",
"from lang_main.pipelines import BasePipeline, EmbeddingPipeline\n",
"from lang_main.lib.helpers import (\n",
" save_pickle, \n",
" load_pickle, \n",
" create_saving_folder,\n",

View File

@@ -1,28 +1,42 @@
from typing import cast
from pathlib import Path
import pandas as pd
import plotly.express as px
from dash import (
Dash,
html,
dcc,
callback,
Output,
Input,
Output,
State,
callback,
dash_table,
dcc,
html,
)
import plotly.express as px
import pandas as pd
from lang_main import load_pickle
from lang_main.types import ObjectID, TimelineCandidates
from pandas import DataFrame
from lang_main import load_pickle
from lang_main.types import TimelineCandidates, ObjectID
#df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
# ** data
data = cast(DataFrame, load_pickle('./data.pkl'))
cands = cast(TimelineCandidates, load_pickle('./map_candidates.pkl'))
texts = cast(dict[ObjectID, str], load_pickle('./map_texts.pkl'))
p_df = Path(r'.\test-notebooks\dashboard\Pipe-TargetFeature_Step-3_remove_NA.pkl')
p_tl = Path(
r'.\test-notebooks\dashboard\Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl'
)
ret = cast(DataFrame, load_pickle(p_df))
data = ret[0]
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
cands = ret[0]
texts = ret[1]
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
# data = cast(DataFrame, load_pickle(p_df))
# cands = cast(TimelineCandidates, load_pickle(p_cands))
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
table_feats = [
'ErstellungsDatum',
'ErledigungsDatum',
@@ -52,25 +66,28 @@ hover_data = {
app = Dash(prevent_initial_callbacks=True)
app.layout = [
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign':'center'}),
html.Div(children=[
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
dcc.Dropdown(
list(cands.keys()),
id='dropdown-selection',
placeholder="ObjektID auswählen...",
)
]),
html.Div(children=[
html.H3(id='object_text'),
dcc.Dropdown(id='choice-candidates'),
dcc.Graph(id='graph-output'),
]),
html.Div(children=[
dash_table.DataTable(id='table-candidates')
]),
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
html.Div(
children=[
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
dcc.Dropdown(
list(cands.keys()),
id='dropdown-selection',
placeholder='ObjektID auswählen...',
),
]
),
html.Div(
children=[
html.H3(id='object_text'),
dcc.Dropdown(id='choice-candidates'),
dcc.Graph(id='graph-output'),
]
),
html.Div(children=[dash_table.DataTable(id='table-candidates')]),
]
@callback(
Output('object_text', 'children'),
Input('dropdown-selection', 'value'),
@@ -82,6 +99,7 @@ def update_obj_text(obj_id):
headline = f'HObjektText: {obj_text}'
return headline
@callback(
Output('choice-candidates', 'options'),
Input('dropdown-selection', 'value'),
@@ -90,9 +108,10 @@ def update_obj_text(obj_id):
def update_choice_candidates(obj_id):
obj_id = int(obj_id)
cands_obj_id = cands[obj_id]
choices = list(range(1, len(cands_obj_id)+1))
choices = list(range(1, len(cands_obj_id) + 1))
return choices
@callback(
Output('graph-output', 'figure'),
Input('choice-candidates', 'value'),
@@ -106,7 +125,7 @@ def update_timeline(index, obj_id):
title = f'HObjektText: {obj_text}'
# cands
cands_obj_id = cands[obj_id]
cands_choice = cands_obj_id[int(index)-1]
cands_choice = cands_obj_id[int(index) - 1]
# data
df = data.loc[list(cands_choice)].sort_index()
# figure
@@ -117,22 +136,18 @@ def update_timeline(index, obj_id):
title=title,
hover_data=hover_data,
)
fig.update_traces(
mode='markers+lines',
marker=markers,
marker_symbol='diamond'
)
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
fig.update_xaxes(
tickformat="%B\n%Y",
tickformat='%B\n%Y',
rangeslider_visible=True,
)
fig.update_yaxes(type='category')
fig.update_layout(hovermode="x unified")
fig.update_layout(hovermode='x unified')
return fig
@callback(
[Output('table-candidates', 'data'),
Output('table-candidates', 'columns')],
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
Input('choice-candidates', 'value'),
State('dropdown-selection', 'value'),
prevent_initial_call=True,
@@ -141,19 +156,20 @@ def update_table_candidates(index, obj_id):
obj_id = int(obj_id)
# cands
cands_obj_id = cands[obj_id]
cands_choice = cands_obj_id[int(index)-1]
cands_choice = cands_obj_id[int(index) - 1]
# data
df = data.loc[list(cands_choice)].sort_index()
df = (df
.filter(items=table_feats, axis=1)
.sort_values(by='ErstellungsDatum', ascending=True))
cols = [{"name": i, "id": i} for i in df.columns]
df = df.filter(items=table_feats, axis=1).sort_values(
by='ErstellungsDatum', ascending=True
)
cols = [{'name': i, 'id': i} for i in df.columns]
# convert dates to strings
for col in table_feats_dates:
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
table_data = df.to_dict('records')
return table_data, cols
if __name__ == '__main__':
app.run(debug=True)
app.run(debug=True)

View File

@@ -0,0 +1,56 @@
# lang_main: Config file
[paths]
inputs = './inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
[control]
preprocessing = true
preprocessing_skip = false
token_analysis = false
token_analysis_skip = false
graph_postprocessing = false
graph_postprocessing_skip = false
time_analysis = false
time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
input_features = [
'VorgangsTypName',
'VorgangsArtText',
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_similarity = 0.8

View File

@@ -0,0 +1,663 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"id": "3760b040-985c-46ec-ba77-13f0f7a52c83",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"from lang_main import load_pickle"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "97487448-82c8-4b3d-8a1a-ccccaaac8d86",
"metadata": {},
"outputs": [],
"source": [
"def get_files(path: str) -> tuple[Path, ...]:\n",
" p = Path(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
" assert p.exists(), \"path does not exist\"\n",
" return tuple(p.glob(r'*'))"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "598f4d99-9d35-49c9-8c5d-113d4c80cecf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "55ad4af3-87cd-4189-9309-171aba4e04a6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shared:INFO | 2024-05-29 12:49:47 +0000 | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[-1]\n",
"ret = load_pickle(file)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "540f4720-a2bf-4171-8db5-8e6993d38c13",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3108</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
" <td>36</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2559</th>\n",
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>[211]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2558</th>\n",
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>[93]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2557</th>\n",
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>[1707]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2556</th>\n",
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
" <td>173</td>\n",
" <td>1</td>\n",
" <td>[1]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6782</th>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4545 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" entry ... num_assoc_obj_ids\n",
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... ... 206\n",
"33 Wöchentliche Sichtkontrolle / Reinigung ... 74\n",
"131 Tägliche Überprüfung der Ölabscheider ... 4\n",
"160 Wöchentliche Kontrolle der C-Anlagen ... 11\n",
"140 Halbjährliche Kontrolle des Stabbreithalters ... 166\n",
"... ... ... ...\n",
"2559 Fehler 9723 Leistungsversorgung Antrieb defekt ... 1\n",
"2558 T-Warp-Let-Off1 schleppfehler ... 1\n",
"2557 Fahrräder wurden gewartet und gereinigt. ... 1\n",
"2556 Bohrlöcher an Gebots- und Verbotszeichen anbri... ... 1\n",
"6782 Befestigung Deckel für Batteriefach defekt ... ... 2\n",
"\n",
"[4545 rows x 5 columns]"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ret[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee0fea45-c26b-4253-b7f6-95ad70d0205a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "82a059ea-0eb8-4db1-b859-3fc07e42faff",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 69,
"id": "d1c1190f-0c80-40e3-8965-78d68400a33d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
"files"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "e26c52eb-7a6b-49da-97a9-6e24a2a4d91e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shared:INFO | 2024-05-29 11:56:46 +0000 | Loaded file successfully.\n"
]
}
],
"source": [
"file = files[-1]\n",
"ret = load_pickle(file)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "beacf5ca-6946-413a-817c-e7e87da9ace3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>entry</th>\n",
" <th>len</th>\n",
" <th>num_occur</th>\n",
" <th>assoc_obj_ids</th>\n",
" <th>num_assoc_obj_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>162</td>\n",
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
" <td>66</td>\n",
" <td>92592</td>\n",
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
" <td>206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>33</td>\n",
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
" <td>39</td>\n",
" <td>3108</td>\n",
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>131</td>\n",
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
" <td>37</td>\n",
" <td>1619</td>\n",
" <td>[0, 970, 2134, 2137]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>160</td>\n",
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
" <td>36</td>\n",
" <td>1265</td>\n",
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>140</td>\n",
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
" <td>44</td>\n",
" <td>687</td>\n",
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6756</th>\n",
" <td>2559</td>\n",
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>[211]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6757</th>\n",
" <td>2558</td>\n",
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>[93]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6758</th>\n",
" <td>2557</td>\n",
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>[1707]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6759</th>\n",
" <td>2556</td>\n",
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
" <td>173</td>\n",
" <td>1</td>\n",
" <td>[1]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6760</th>\n",
" <td>6782</td>\n",
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>[306, 326]</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4545 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" index ... num_assoc_obj_ids\n",
"0 162 ... 206\n",
"1 33 ... 74\n",
"2 131 ... 4\n",
"3 160 ... 11\n",
"4 140 ... 166\n",
"... ... ... ...\n",
"6756 2559 ... 1\n",
"6757 2558 ... 1\n",
"6758 2557 ... 1\n",
"6759 2556 ... 1\n",
"6760 6782 ... 2\n",
"\n",
"[4545 rows x 6 columns]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ret[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2e873f4-363e-4dbf-93f1-927b4ee3c598",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 72,
"id": "cbf0b450-ec00-471f-9627-717e52c5471d",
"metadata": {},
"outputs": [],
"source": [
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "74e289ed-8d3e-4a50-afdf-d1d97e8a7807",
"metadata": {},
"outputs": [],
"source": [
"tup = tuple(i for i in range(100000000))"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "3e747e82-e6f8-47bb-918b-27bb7c37a10f",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6ade9c6f4e61410fb93f35e43222705b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/100000000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"num = 0\n",
"for i in tqdm(tup):\n",
" num += i"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "64cd6cc7-2803-41f1-b05c-83d65bdc7d42",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4999999950000000"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36366147-3632-4518-936e-878563305e49",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"id": "4dbc00b8-1437-4986-85e4-645a8bcf4a6d",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "17156aa0-8fd6-407b-b014-698df0e534a9",
"metadata": {},
"outputs": [],
"source": [
"arr = np.random.rand(1000,1000)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "4292a60b-9cb2-42d9-bedf-3b1120f1b515",
"metadata": {},
"outputs": [],
"source": [
"idx = np.argwhere(arr >= 0.97)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "4426f1d5-dcd2-4d64-bdca-7dece6793f8f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30220"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(idx)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "5b78436e-a828-42bd-a5ed-ae6045349391",
"metadata": {},
"outputs": [],
"source": [
"batch = idx[:200]"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "75edc50e-b64c-4319-8f74-27653ed3452c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"88.5 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"tuple(map(tuple, batch))"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "d9c827a4-ccdf-4cc1-90af-b018ae4858a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"94.9 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"tuple(tuple(x) for x in batch)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "acb2a0c9-b7d2-463d-8e63-c52fc7754ae8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}