STRF for similarity duplicates, time analysis pipeline, enhanced config
This commit is contained in:
@@ -13,29 +13,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 2,
|
||||
"id": "bca16fc4-1ffb-48ef-bd0d-bdc782428a45",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\foersterflorian\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'ihm_analyse'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mihm_analyse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CONFIG\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mihm_analyse\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpreprocess\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 3\u001b[0m load_raw_data,\n\u001b[0;32m 4\u001b[0m remove_duplicates,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m merge_similarity_dupl,\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mihm_analyse\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpipelines\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BasePipeline, EmbeddingPipeline\n",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'ihm_analyse'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from ihm_analyse import CONFIG\n",
|
||||
"from ihm_analyse.lib.preprocess import (\n",
|
||||
"from lang_main import CONFIG\n",
|
||||
"from lang_main.lib.preprocess import (\n",
|
||||
" load_raw_data,\n",
|
||||
" remove_duplicates,\n",
|
||||
" remove_NA,\n",
|
||||
@@ -47,8 +43,8 @@
|
||||
" list_cosSim_dupl_candidates,\n",
|
||||
" merge_similarity_dupl,\n",
|
||||
")\n",
|
||||
"from ihm_analyse.lib.pipelines import BasePipeline, EmbeddingPipeline\n",
|
||||
"from ihm_analyse.lib.helpers import (\n",
|
||||
"from lang_main.pipelines import BasePipeline, EmbeddingPipeline\n",
|
||||
"from lang_main.lib.helpers import (\n",
|
||||
" save_pickle, \n",
|
||||
" load_pickle, \n",
|
||||
" create_saving_folder,\n",
|
||||
|
||||
BIN
test-notebooks/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
Normal file
BIN
test-notebooks/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
Normal file
Binary file not shown.
Binary file not shown.
@@ -1,28 +1,42 @@
|
||||
from typing import cast
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
from dash import (
|
||||
Dash,
|
||||
html,
|
||||
dcc,
|
||||
callback,
|
||||
Output,
|
||||
Input,
|
||||
Output,
|
||||
State,
|
||||
callback,
|
||||
dash_table,
|
||||
dcc,
|
||||
html,
|
||||
)
|
||||
import plotly.express as px
|
||||
import pandas as pd
|
||||
from lang_main import load_pickle
|
||||
from lang_main.types import ObjectID, TimelineCandidates
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main import load_pickle
|
||||
from lang_main.types import TimelineCandidates, ObjectID
|
||||
|
||||
#df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
|
||||
# ** data
|
||||
data = cast(DataFrame, load_pickle('./data.pkl'))
|
||||
cands = cast(TimelineCandidates, load_pickle('./map_candidates.pkl'))
|
||||
texts = cast(dict[ObjectID, str], load_pickle('./map_texts.pkl'))
|
||||
p_df = Path(r'.\test-notebooks\dashboard\Pipe-TargetFeature_Step-3_remove_NA.pkl')
|
||||
p_tl = Path(
|
||||
r'.\test-notebooks\dashboard\Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl'
|
||||
)
|
||||
ret = cast(DataFrame, load_pickle(p_df))
|
||||
data = ret[0]
|
||||
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
|
||||
cands = ret[0]
|
||||
texts = ret[1]
|
||||
|
||||
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
|
||||
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
|
||||
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
|
||||
# data = cast(DataFrame, load_pickle(p_df))
|
||||
# cands = cast(TimelineCandidates, load_pickle(p_cands))
|
||||
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
|
||||
|
||||
table_feats = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
@@ -52,25 +66,28 @@ hover_data = {
|
||||
app = Dash(prevent_initial_callbacks=True)
|
||||
|
||||
app.layout = [
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign':'center'}),
|
||||
html.Div(children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
placeholder="ObjektID auswählen...",
|
||||
)
|
||||
]),
|
||||
html.Div(children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
]),
|
||||
html.Div(children=[
|
||||
dash_table.DataTable(id='table-candidates')
|
||||
]),
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
]
|
||||
),
|
||||
html.Div(children=[dash_table.DataTable(id='table-candidates')]),
|
||||
]
|
||||
|
||||
|
||||
@callback(
|
||||
Output('object_text', 'children'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
@@ -82,6 +99,7 @@ def update_obj_text(obj_id):
|
||||
headline = f'HObjektText: {obj_text}'
|
||||
return headline
|
||||
|
||||
|
||||
@callback(
|
||||
Output('choice-candidates', 'options'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
@@ -90,9 +108,10 @@ def update_obj_text(obj_id):
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
cands_obj_id = cands[obj_id]
|
||||
choices = list(range(1, len(cands_obj_id)+1))
|
||||
choices = list(range(1, len(cands_obj_id) + 1))
|
||||
return choices
|
||||
|
||||
|
||||
@callback(
|
||||
Output('graph-output', 'figure'),
|
||||
Input('choice-candidates', 'value'),
|
||||
@@ -106,7 +125,7 @@ def update_timeline(index, obj_id):
|
||||
title = f'HObjektText: {obj_text}'
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index)-1]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index()
|
||||
# figure
|
||||
@@ -117,22 +136,18 @@ def update_timeline(index, obj_id):
|
||||
title=title,
|
||||
hover_data=hover_data,
|
||||
)
|
||||
fig.update_traces(
|
||||
mode='markers+lines',
|
||||
marker=markers,
|
||||
marker_symbol='diamond'
|
||||
)
|
||||
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
|
||||
fig.update_xaxes(
|
||||
tickformat="%B\n%Y",
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
)
|
||||
fig.update_yaxes(type='category')
|
||||
fig.update_layout(hovermode="x unified")
|
||||
fig.update_layout(hovermode='x unified')
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
[Output('table-candidates', 'data'),
|
||||
Output('table-candidates', 'columns')],
|
||||
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
@@ -141,19 +156,20 @@ def update_table_candidates(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index)-1]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index()
|
||||
df = (df
|
||||
.filter(items=table_feats, axis=1)
|
||||
.sort_values(by='ErstellungsDatum', ascending=True))
|
||||
cols = [{"name": i, "id": i} for i in df.columns]
|
||||
df = df.filter(items=table_feats, axis=1).sort_values(
|
||||
by='ErstellungsDatum', ascending=True
|
||||
)
|
||||
cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# convert dates to strings
|
||||
for col in table_feats_dates:
|
||||
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
|
||||
table_data = df.to_dict('records')
|
||||
return table_data, cols
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
||||
app.run(debug=True)
|
||||
|
||||
56
test-notebooks/dashboard/lang_main_config.toml
Normal file
56
test-notebooks/dashboard/lang_main_config.toml
Normal file
@@ -0,0 +1,56 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
[control]
|
||||
preprocessing = true
|
||||
preprocessing_skip = false
|
||||
token_analysis = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis = false
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
input_features = [
|
||||
'VorgangsTypName',
|
||||
'VorgangsArtText',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
Binary file not shown.
Binary file not shown.
663
test-notebooks/display_results.ipynb
Normal file
663
test-notebooks/display_results.ipynb
Normal file
@@ -0,0 +1,663 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "3760b040-985c-46ec-ba77-13f0f7a52c83",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"from lang_main import load_pickle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "97487448-82c8-4b3d-8a1a-ccccaaac8d86",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_files(path: str) -> tuple[Path, ...]:\n",
|
||||
" p = Path(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
|
||||
" assert p.exists(), \"path does not exist\"\n",
|
||||
" return tuple(p.glob(r'*'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"id": "598f4d99-9d35-49c9-8c5d-113d4c80cecf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
|
||||
]
|
||||
},
|
||||
"execution_count": 87,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
|
||||
"files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"id": "55ad4af3-87cd-4189-9309-171aba4e04a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"shared:INFO | 2024-05-29 12:49:47 +0000 | Loaded file successfully.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"file = files[-1]\n",
|
||||
"ret = load_pickle(file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"id": "540f4720-a2bf-4171-8db5-8e6993d38c13",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>entry</th>\n",
|
||||
" <th>len</th>\n",
|
||||
" <th>num_occur</th>\n",
|
||||
" <th>assoc_obj_ids</th>\n",
|
||||
" <th>num_assoc_obj_ids</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>162</th>\n",
|
||||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" <td>92592</td>\n",
|
||||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||||
" <td>206</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>33</th>\n",
|
||||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||||
" <td>39</td>\n",
|
||||
" <td>3108</td>\n",
|
||||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||||
" <td>74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>131</th>\n",
|
||||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||||
" <td>37</td>\n",
|
||||
" <td>1619</td>\n",
|
||||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>160</th>\n",
|
||||
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
|
||||
" <td>36</td>\n",
|
||||
" <td>1265</td>\n",
|
||||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>140</th>\n",
|
||||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||||
" <td>44</td>\n",
|
||||
" <td>687</td>\n",
|
||||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2559</th>\n",
|
||||
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
|
||||
" <td>46</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[211]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2558</th>\n",
|
||||
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[93]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2557</th>\n",
|
||||
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[1707]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2556</th>\n",
|
||||
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
|
||||
" <td>173</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[1]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6782</th>\n",
|
||||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||||
" <td>106</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>[306, 326]</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>4545 rows × 5 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" entry ... num_assoc_obj_ids\n",
|
||||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... ... 206\n",
|
||||
"33 Wöchentliche Sichtkontrolle / Reinigung ... 74\n",
|
||||
"131 Tägliche Überprüfung der Ölabscheider ... 4\n",
|
||||
"160 Wöchentliche Kontrolle der C-Anlagen ... 11\n",
|
||||
"140 Halbjährliche Kontrolle des Stabbreithalters ... 166\n",
|
||||
"... ... ... ...\n",
|
||||
"2559 Fehler 9723 Leistungsversorgung Antrieb defekt ... 1\n",
|
||||
"2558 T-Warp-Let-Off1 schleppfehler ... 1\n",
|
||||
"2557 Fahrräder wurden gewartet und gereinigt. ... 1\n",
|
||||
"2556 Bohrlöcher an Gebots- und Verbotszeichen anbri... ... 1\n",
|
||||
"6782 Befestigung Deckel für Batteriefach defekt ... ... 2\n",
|
||||
"\n",
|
||||
"[4545 rows x 5 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 89,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ret[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ee0fea45-c26b-4253-b7f6-95ad70d0205a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82a059ea-0eb8-4db1-b859-3fc07e42faff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"id": "d1c1190f-0c80-40e3-8965-78d68400a33d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-3_remove_NA.pkl'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-TargetFeature_Step-5_analyse_feature.pkl'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pkl'))"
|
||||
]
|
||||
},
|
||||
"execution_count": 69,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"files = get_files(r'A:\\Arbeitsaufgaben\\lang-main\\scripts\\results\\test_20240529')\n",
|
||||
"files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"id": "e26c52eb-7a6b-49da-97a9-6e24a2a4d91e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"shared:INFO | 2024-05-29 11:56:46 +0000 | Loaded file successfully.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"file = files[-1]\n",
|
||||
"ret = load_pickle(file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"id": "beacf5ca-6946-413a-817c-e7e87da9ace3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>index</th>\n",
|
||||
" <th>entry</th>\n",
|
||||
" <th>len</th>\n",
|
||||
" <th>num_occur</th>\n",
|
||||
" <th>assoc_obj_ids</th>\n",
|
||||
" <th>num_assoc_obj_ids</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>162</td>\n",
|
||||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" <td>92592</td>\n",
|
||||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||||
" <td>206</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>33</td>\n",
|
||||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||||
" <td>39</td>\n",
|
||||
" <td>3108</td>\n",
|
||||
" <td>[301, 304, 305, 313, 314, 323, 329, 331, 332, ...</td>\n",
|
||||
" <td>74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>131</td>\n",
|
||||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||||
" <td>37</td>\n",
|
||||
" <td>1619</td>\n",
|
||||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>160</td>\n",
|
||||
" <td>Wöchentliche Kontrolle der C-Anlagen</td>\n",
|
||||
" <td>36</td>\n",
|
||||
" <td>1265</td>\n",
|
||||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>140</td>\n",
|
||||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||||
" <td>44</td>\n",
|
||||
" <td>687</td>\n",
|
||||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6756</th>\n",
|
||||
" <td>2559</td>\n",
|
||||
" <td>Fehler 9723 Leistungsversorgung Antrieb defekt</td>\n",
|
||||
" <td>46</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[211]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6757</th>\n",
|
||||
" <td>2558</td>\n",
|
||||
" <td>T-Warp-Let-Off1 schleppfehler</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[93]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6758</th>\n",
|
||||
" <td>2557</td>\n",
|
||||
" <td>Fahrräder wurden gewartet und gereinigt.</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[1707]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6759</th>\n",
|
||||
" <td>2556</td>\n",
|
||||
" <td>Bohrlöcher an Gebots- und Verbotszeichen anbri...</td>\n",
|
||||
" <td>173</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[1]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6760</th>\n",
|
||||
" <td>6782</td>\n",
|
||||
" <td>Befestigung Deckel für Batteriefach defekt ...</td>\n",
|
||||
" <td>106</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>[306, 326]</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>4545 rows × 6 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" index ... num_assoc_obj_ids\n",
|
||||
"0 162 ... 206\n",
|
||||
"1 33 ... 74\n",
|
||||
"2 131 ... 4\n",
|
||||
"3 160 ... 11\n",
|
||||
"4 140 ... 166\n",
|
||||
"... ... ... ...\n",
|
||||
"6756 2559 ... 1\n",
|
||||
"6757 2558 ... 1\n",
|
||||
"6758 2557 ... 1\n",
|
||||
"6759 2556 ... 1\n",
|
||||
"6760 6782 ... 2\n",
|
||||
"\n",
|
||||
"[4545 rows x 6 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ret[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d2e873f4-363e-4dbf-93f1-927b4ee3c598",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"id": "cbf0b450-ec00-471f-9627-717e52c5471d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from tqdm.auto import tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"id": "74e289ed-8d3e-4a50-afdf-d1d97e8a7807",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tup = tuple(i for i in range(100000000))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"id": "3e747e82-e6f8-47bb-918b-27bb7c37a10f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6ade9c6f4e61410fb93f35e43222705b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/100000000 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"num = 0\n",
|
||||
"for i in tqdm(tup):\n",
|
||||
" num += i"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"id": "64cd6cc7-2803-41f1-b05c-83d65bdc7d42",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"4999999950000000"
|
||||
]
|
||||
},
|
||||
"execution_count": 86,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"num"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "36366147-3632-4518-936e-878563305e49",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "4dbc00b8-1437-4986-85e4-645a8bcf4a6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "17156aa0-8fd6-407b-b014-698df0e534a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arr = np.random.rand(1000,1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "4292a60b-9cb2-42d9-bedf-3b1120f1b515",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"idx = np.argwhere(arr >= 0.97)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "4426f1d5-dcd2-4d64-bdca-7dece6793f8f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"30220"
|
||||
]
|
||||
},
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(idx)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"id": "5b78436e-a828-42bd-a5ed-ae6045349391",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"batch = idx[:200]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"id": "75edc50e-b64c-4319-8f74-27653ed3452c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"88.5 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%timeit\n",
|
||||
"tuple(map(tuple, batch))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"id": "d9c827a4-ccdf-4cc1-90af-b018ae4858a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"94.9 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%timeit\n",
|
||||
"tuple(tuple(x) for x in batch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "acb2a0c9-b7d2-463d-8e63-c52fc7754ae8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user