sandboxing

This commit is contained in:
Florian Förster
2024-08-05 08:43:45 +02:00
parent 9197146d2c
commit 3f58a14852
10 changed files with 2362 additions and 283 deletions

View File

@@ -1,9 +1,11 @@
import time
import webbrowser
from pathlib import Path
from collections.abc import Collection, Iterable
from threading import Thread
from typing import Any, Final, cast
import pandas as pd
# import dash_cytoscape as cyto
import plotly.express as px
from dash import (
@@ -21,20 +23,37 @@ from plotly.graph_objects import Figure
import lang_main.io
from lang_main.analysis import graphs, tokens
from lang_main.constants import SAVE_PATH_FOLDER, SPCY_MODEL
from lang_main.analysis.timeline import (
calc_delta_to_next_failure,
filter_timeline_cands,
)
from lang_main.constants import (
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER,
SPCY_MODEL,
)
from lang_main.errors import EmptyEdgesError, EmptyGraphError
from lang_main.pipelines.predefined import (
build_tk_graph_render_pipe,
build_tk_graph_rescaling_pipe,
)
from lang_main.types import EntryPoints, ObjectID, TimelineCandidates
from lang_main.types import (
DataFrameTLFiltered,
EntryPoints,
HTMLColumns,
HTMLTable,
ObjectID,
TimelineCandidates,
)
# ** data
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
# data = cleanup_descriptions(data, properties=['ErledigungsBeschreibung'])
# p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
p_tl = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
p_tl = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_CANDS)
cands, texts = cast(
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
)
@@ -56,17 +75,27 @@ PTH_RENDERED_GRAPH = lang_main.io.get_entry_point(
file_ext='.svg',
)
TABLE_FEATS: Final[list[str]] = [
# NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
TABLE_FEATS_OVERVIEW: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
NAME_DELTA_FEAT_TO_REPAIR,
'VorgangsTypName',
'VorgangsBeschreibung',
'ErledigungsBeschreibung',
]
TABLE_FEATS_DATES: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
]
TABLE_FEATS_BEST_ACTIONS: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
'VorgangsTypName',
'VorgangsBeschreibung',
'ErledigungsBeschreibung',
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
]
# ** figure config
MARKERS_OCCURRENCES: Final[dict[str, Any]] = {
@@ -86,13 +115,15 @@ HOVER_DATA: Final[dict[str, Any]] = {
'ErstellungsDatum': '|%d.%m.%Y',
'ErledigungsDatum': '|%d.%m.%Y',
'VorgangsBeschreibung': True,
'ErledigungsBeschreibung': True,
}
HOVER_DATA_DELTA: Final[dict[str, Any]] = {
'ErstellungsDatum': '|%d.%m.%Y',
'ErledigungsDatum': '|%d.%m.%Y',
'VorgangsDatum': '|%d.%m.%Y',
'delta': True,
NAME_DELTA_FEAT_TO_REPAIR: True,
'VorgangsBeschreibung': True,
'ErledigungsBeschreibung': True,
}
# ** graph
@@ -136,10 +167,10 @@ graph_layout = html.Div(
html.Img(
id='static-graph-img',
alt='static rendered graph',
# style={
# 'width': 'auto',
# 'height': 'auto',
# },
style={
'width': 'auto',
'height': 'auto',
},
),
html.P(id='info-graph-errors', children=[]),
],
@@ -186,7 +217,27 @@ app.layout = html.Div(
]
),
html.Div(
[dash_table.DataTable(id='table-candidates')], style={'marginBottom': '2em'}
children=[
html.Div(
[
html.H5('Überblick ähnlicher Vorgänge'),
dash_table.DataTable(id='table-candidates'),
],
style={'paddingBottom': '1em'},
),
html.Div(
[
html.H5(
(
'Maßnahmen sortiert nach längstem Zeitraum '
'bis zum nächsten Ereignis'
)
),
dash_table.DataTable(id='table-best-actions'),
]
),
],
style={'marginBottom': '2em', 'padding': '2em'},
),
graph_layout,
],
@@ -222,20 +273,21 @@ def update_choice_candidates(obj_id):
# ** helpers to filter DataFrame
def pre_filter_data(
def filter_candidates(
data: DataFrame,
idx: int,
obj_id: ObjectID,
) -> DataFrame:
) -> DataFrameTLFiltered:
# assert correct data type because of Dash
idx = int(idx)
obj_id = int(obj_id)
# data = data.copy()
cands_for_obj_id = cands[obj_id]
cands_choice = cands_for_obj_id[int(idx) - 1]
# data
data = data.loc[list(cands_choice)].sort_index() # type: ignore
data['delta'] = data['ErledigungsDatum'] - data['ErstellungsDatum']
data['delta'] = data['delta'].dt.days
data = filter_timeline_cands(
data=data,
cands=cands,
obj_id=obj_id,
entry_idx=(idx - 1), # idx in Dashboard starts with 1
)
return data
@@ -258,10 +310,10 @@ def update_timeline(index, obj_id):
obj_text = texts[obj_id]
title_occurrences = f'HObjektText: {obj_text}'
title_delta = f'HObjektText: {obj_text}, Differenz Erstellung und Erledigung'
df = pre_filter_data(data, idx=index, obj_id=obj_id)
df = filter_candidates(data, idx=index, obj_id=obj_id)
# figure
fig_occurrences = fig_timeline_occurrences(df, title_occurrences)
fig_delta = fig_timeline_delta(df, title_delta)
fig_delta = fig_timeline_delta(df, title_delta, delta_feature=NAME_DELTA_FEAT_TO_REPAIR)
return fig_occurrences, fig_delta
@@ -293,11 +345,12 @@ def fig_timeline_occurrences(
def fig_timeline_delta(
df: DataFrame,
title: str,
delta_feature: str,
) -> Figure:
fig = px.scatter(
data_frame=df,
x='ErstellungsDatum',
y='delta',
y=delta_feature,
title=title,
hover_data=HOVER_DATA_DELTA,
)
@@ -309,25 +362,77 @@ def fig_timeline_delta(
return fig
def transform_to_HTML_table(
data: DataFrame,
target_features: Collection[str],
date_cols: Iterable[str] | None = None,
sorting_feature: str | None = None,
sorting_ascending: bool = True,
) -> tuple[HTMLColumns, HTMLTable]:
target_features = list(target_features)
data = data.copy()
data = data.filter(items=target_features, axis=1)
if sorting_feature is not None:
data = data.sort_values(by='ErstellungsDatum', ascending=sorting_ascending)
if date_cols is not None:
for col in date_cols:
data[col] = data[col].dt.strftime(r'%Y-%m-%d')
columns = [{'name': col, 'id': col} for col in data.columns]
table_data = data.to_dict('records')
return columns, table_data
# 'table-best-actions'
# ** HTML table
@callback(
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
[
Output('table-candidates', 'columns'),
Output('table-candidates', 'data'),
Output('table-best-actions', 'columns'),
Output('table-best-actions', 'data'),
],
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_table_candidates(index, obj_id):
df = pre_filter_data(data, idx=index, obj_id=obj_id)
df = df.filter(items=TABLE_FEATS, axis=1).sort_values(
by='ErstellungsDatum', ascending=True
def update_tables_candidates(
index,
obj_id,
) -> tuple[HTMLColumns, HTMLTable, HTMLColumns, HTMLTable]:
cands = filter_candidates(data, idx=index, obj_id=obj_id)
overview_cols, overview_table = transform_to_HTML_table(
data=cands,
target_features=TABLE_FEATS_OVERVIEW,
date_cols=TABLE_FEATS_DATES,
sorting_feature='ErstellungsDatum',
sorting_ascending=True,
)
cols = [{'name': i, 'id': i} for i in df.columns]
# convert dates to strings
for col in TABLE_FEATS_DATES:
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
# df = df.filter(items=TABLE_FEATS_OVERVIEW, axis=1).sort_values(
# by='ErstellungsDatum', ascending=True
# )
# cols = [{'name': i, 'id': i} for i in df.columns]
# # convert dates to strings
# for col in TABLE_FEATS_DATES:
# df[col] = df[col].dt.strftime(r'%Y-%m-%d')
table_data = df.to_dict('records')
return table_data, cols
# table_data = df.to_dict('records')
cands_best_actions = calc_delta_to_next_failure(
data=cands,
date_feature='ErstellungsDatum',
name_delta_feature=NAME_DELTA_FEAT_TO_NEXT_FAILURE,
)
best_actions_cols, best_actions_table = transform_to_HTML_table(
data=cands_best_actions,
target_features=TABLE_FEATS_BEST_ACTIONS,
date_cols=TABLE_FEATS_DATES,
)
return overview_cols, overview_table, best_actions_cols, best_actions_table
# ** graph callbacks
@@ -345,7 +450,7 @@ def update_table_candidates(index, obj_id):
def display_candidates_as_graph(index, obj_id):
error_msg = ''
t1 = time.perf_counter()
df = pre_filter_data(data, idx=index, obj_id=obj_id)
df = filter_candidates(data, idx=index, obj_id=obj_id)
t2 = time.perf_counter()
print(f'Time for filtering: {t2 - t1} s')