improved imports, dummy dataset generation

This commit is contained in:
Florian Förster
2024-08-07 20:06:06 +02:00
parent 3f58a14852
commit 9328c0218a
35 changed files with 1966 additions and 106 deletions

View File

@@ -3,11 +3,11 @@ import webbrowser
from collections.abc import Collection, Iterable
from threading import Thread
from typing import Any, Final, cast
import pandas as pd
from pathlib import Path
# import dash_cytoscape as cyto
import plotly.express as px
import plotly.io
from dash import (
Dash,
Input,
@@ -22,16 +22,17 @@ from pandas import DataFrame
from plotly.graph_objects import Figure
import lang_main.io
from lang_main import model_loader as m_load
from lang_main.analysis import graphs, tokens
from lang_main.analysis.timeline import (
calc_delta_to_next_failure,
filter_timeline_cands,
)
from lang_main.constants import (
MODEL_LOADER_MAP,
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER,
SPCY_MODEL,
)
from lang_main.errors import EmptyEdgesError, EmptyGraphError
from lang_main.pipelines.predefined import (
@@ -43,10 +44,16 @@ from lang_main.types import (
EntryPoints,
HTMLColumns,
HTMLTable,
LanguageModels,
ObjectID,
TimelineCandidates,
)
# ** model
SPACY_MODEL = m_load.instantiate_model(
model_load_map=MODEL_LOADER_MAP,
model=LanguageModels.SPACY,
)
# ** data
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
@@ -62,17 +69,42 @@ rescaling_pipe = build_tk_graph_rescaling_pipe(
exit_point=EntryPoints.TIMELINE_TK_GRAPH_RESCALED,
save_result=False,
)
BASE_NETWORK_NAME: Final[str] = 'test_timeline'
BASE_NETWORK_NAME: Final[str] = 'timeline_candidates'
# RENDER_FOLDER: Final[Path] = Path.cwd() / 'assets/'
graph_render_pipe = build_tk_graph_render_pipe(
with_subgraphs=False,
base_network_name=BASE_NETWORK_NAME,
)
# PTH_RENDERED_GRAPH = f'assets/{BASE_NETWORK_NAME}.svg'
PTH_RENDERED_TIMELINE = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
'chart_timeline',
file_ext='.svg',
check_existence=False,
)
PTH_TABLE_TIMELINE = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
'table_timeline',
file_ext='.xlsx',
check_existence=False,
)
PTH_RENDERED_DELTA_REPAIR = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
'chart_delta_repair',
file_ext='.svg',
check_existence=False,
)
PTH_TABLE_DELTA_REPAIR = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
'table_delta_repair',
file_ext='.xlsx',
check_existence=False,
)
PTH_RENDERED_GRAPH = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
BASE_NETWORK_NAME,
file_ext='.svg',
check_existence=False,
)
# NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
@@ -168,7 +200,7 @@ graph_layout = html.Div(
id='static-graph-img',
alt='static rendered graph',
style={
'width': 'auto',
'width': '900px',
'height': 'auto',
},
),
@@ -212,7 +244,27 @@ app.layout = html.Div(
children=[
html.H3(id='object-text'),
dcc.Dropdown(id='selector-candidates'),
html.Button(
'Download Diagramm',
id='bt-dl-timeline',
style={
'marginLeft': 'auto',
'width': '300px',
'marginTop': '1em',
},
),
dcc.Download(id='dl-timeline'),
dcc.Graph(id='figure-occurrences'),
html.Button(
'Download Diagramm',
id='bt-dl-deltarepair',
style={
'marginLeft': 'auto',
'width': '300px',
'marginTop': '1em',
},
),
dcc.Download(id='dl-deltarepair'),
dcc.Graph(id='figure-delta'),
]
),
@@ -221,6 +273,16 @@ app.layout = html.Div(
html.Div(
[
html.H5('Überblick ähnlicher Vorgänge'),
dcc.Download(id='dl-table-timeline'),
html.Button(
'Download Table',
id='bt-table-timeline',
style={
'marginLeft': 'auto',
'width': '300px',
'marginTop': '1em',
},
),
dash_table.DataTable(id='table-candidates'),
],
style={'paddingBottom': '1em'},
@@ -233,6 +295,16 @@ app.layout = html.Div(
'bis zum nächsten Ereignis'
)
),
dcc.Download(id='dl-table-deltarepair'),
html.Button(
'Download Table',
id='bt-table-deltarepair',
style={
'marginLeft': 'auto',
'width': '300px',
'marginTop': '1em',
},
),
dash_table.DataTable(id='table-best-actions'),
]
),
@@ -368,6 +440,7 @@ def transform_to_HTML_table(
date_cols: Iterable[str] | None = None,
sorting_feature: str | None = None,
sorting_ascending: bool = True,
save_path: Path | None = None,
) -> tuple[HTMLColumns, HTMLTable]:
target_features = list(target_features)
data = data.copy()
@@ -383,6 +456,9 @@ def transform_to_HTML_table(
columns = [{'name': col, 'id': col} for col in data.columns]
table_data = data.to_dict('records')
if save_path is not None:
data.to_excel(save_path)
return columns, table_data
@@ -410,6 +486,7 @@ def update_tables_candidates(
date_cols=TABLE_FEATS_DATES,
sorting_feature='ErstellungsDatum',
sorting_ascending=True,
save_path=PTH_TABLE_TIMELINE,
)
# df = df.filter(items=TABLE_FEATS_OVERVIEW, axis=1).sort_values(
# by='ErstellungsDatum', ascending=True
@@ -430,6 +507,7 @@ def update_tables_candidates(
data=cands_best_actions,
target_features=TABLE_FEATS_BEST_ACTIONS,
date_cols=TABLE_FEATS_DATES,
save_path=PTH_TABLE_DELTA_REPAIR,
)
return overview_cols, overview_table, best_actions_cols, best_actions_table
@@ -457,7 +535,7 @@ def display_candidates_as_graph(index, obj_id):
t1 = time.perf_counter()
tk_graph_cands, _ = tokens.build_token_graph(
data=df,
model=SPCY_MODEL,
model=SPACY_MODEL,
target_feature='VorgangsBeschreibung',
build_map=False,
logging_graph=False,
@@ -496,10 +574,58 @@ def display_candidates_as_graph(index, obj_id):
Input('bt-reset', 'n_clicks'),
prevent_initial_call=True,
)
def func(n_clicks):
def download_graph(_):
return dcc.send_file(path=PTH_RENDERED_GRAPH)
@callback(
Output('dl-timeline', 'data'),
Input('bt-dl-timeline', 'n_clicks'),
State('figure-occurrences', 'figure'),
prevent_initial_call=True,
)
def download_timeline(_, fig: dict):
# add these lines before fig = go.Figure(fig_raw)
if 'rangeslider' in fig['layout']['xaxis']:
del fig['layout']['xaxis']['rangeslider']['yaxis']
figure = Figure(fig)
figure.write_image(PTH_RENDERED_TIMELINE)
return dcc.send_file(path=PTH_RENDERED_TIMELINE)
@callback(
Output('dl-deltarepair', 'data'),
Input('bt-dl-deltarepair', 'n_clicks'),
State('figure-delta', 'figure'),
prevent_initial_call=True,
)
def download_delta_repair(_, fig: dict):
# add these lines before fig = go.Figure(fig_raw)
if 'rangeslider' in fig['layout']['xaxis']:
del fig['layout']['xaxis']['rangeslider']['yaxis']
figure = Figure(fig)
figure.write_image(PTH_RENDERED_DELTA_REPAIR)
return dcc.send_file(path=PTH_RENDERED_DELTA_REPAIR)
@callback(
Output('dl-table-timeline', 'data'),
Input('bt-table-timeline', 'n_clicks'),
prevent_initial_call=True,
)
def download_table_timeline(_):
return dcc.send_file(path=PTH_TABLE_TIMELINE)
@callback(
Output('dl-table-deltarepair', 'data'),
Input('bt-table-deltarepair', 'n_clicks'),
prevent_initial_call=True,
)
def download_table_delta_repair(_):
return dcc.send_file(path=PTH_TABLE_DELTA_REPAIR)
def _start_webbrowser():
host = '127.0.0.1'
port = '8050'

View File

@@ -2,8 +2,10 @@
[paths]
inputs = './inputs/'
results = './results/test_20240619/'
dataset = '../data/02_202307/Export4.csv'
results = './results/dummy_N_1000/'
dataset = '../data/Dummy_Dataset_N_1000.csv'
# results = './results/test_20240807/'
# dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
@@ -12,12 +14,12 @@ dataset = '../data/02_202307/Export4.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
preprocessing_skip = true
token_analysis_skip = true
graph_postprocessing_skip = true
graph_rescaling_skip = true
preprocessing_skip = false
token_analysis_skip = false
graph_postprocessing_skip = false
graph_rescaling_skip = false
graph_static_rendering_skip = false
time_analysis_skip = true
time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
@@ -34,7 +36,7 @@ threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
threshold_edge_weight = 1
[time_analysis.uniqueness]
threshold_unique_texts = 4