improved imports, dummy dataset generation
This commit is contained in:
@@ -3,11 +3,11 @@ import webbrowser
|
||||
from collections.abc import Collection, Iterable
|
||||
from threading import Thread
|
||||
from typing import Any, Final, cast
|
||||
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
# import dash_cytoscape as cyto
|
||||
import plotly.express as px
|
||||
import plotly.io
|
||||
from dash import (
|
||||
Dash,
|
||||
Input,
|
||||
@@ -22,16 +22,17 @@ from pandas import DataFrame
|
||||
from plotly.graph_objects import Figure
|
||||
|
||||
import lang_main.io
|
||||
from lang_main import model_loader as m_load
|
||||
from lang_main.analysis import graphs, tokens
|
||||
from lang_main.analysis.timeline import (
|
||||
calc_delta_to_next_failure,
|
||||
filter_timeline_cands,
|
||||
)
|
||||
from lang_main.constants import (
|
||||
MODEL_LOADER_MAP,
|
||||
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
|
||||
NAME_DELTA_FEAT_TO_REPAIR,
|
||||
SAVE_PATH_FOLDER,
|
||||
SPCY_MODEL,
|
||||
)
|
||||
from lang_main.errors import EmptyEdgesError, EmptyGraphError
|
||||
from lang_main.pipelines.predefined import (
|
||||
@@ -43,10 +44,16 @@ from lang_main.types import (
|
||||
EntryPoints,
|
||||
HTMLColumns,
|
||||
HTMLTable,
|
||||
LanguageModels,
|
||||
ObjectID,
|
||||
TimelineCandidates,
|
||||
)
|
||||
|
||||
# ** model
|
||||
SPACY_MODEL = m_load.instantiate_model(
|
||||
model_load_map=MODEL_LOADER_MAP,
|
||||
model=LanguageModels.SPACY,
|
||||
)
|
||||
# ** data
|
||||
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
|
||||
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
|
||||
@@ -62,17 +69,42 @@ rescaling_pipe = build_tk_graph_rescaling_pipe(
|
||||
exit_point=EntryPoints.TIMELINE_TK_GRAPH_RESCALED,
|
||||
save_result=False,
|
||||
)
|
||||
BASE_NETWORK_NAME: Final[str] = 'test_timeline'
|
||||
BASE_NETWORK_NAME: Final[str] = 'timeline_candidates'
|
||||
# RENDER_FOLDER: Final[Path] = Path.cwd() / 'assets/'
|
||||
graph_render_pipe = build_tk_graph_render_pipe(
|
||||
with_subgraphs=False,
|
||||
base_network_name=BASE_NETWORK_NAME,
|
||||
)
|
||||
# PTH_RENDERED_GRAPH = f'assets/{BASE_NETWORK_NAME}.svg'
|
||||
PTH_RENDERED_TIMELINE = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'chart_timeline',
|
||||
file_ext='.svg',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_TABLE_TIMELINE = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'table_timeline',
|
||||
file_ext='.xlsx',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_RENDERED_DELTA_REPAIR = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'chart_delta_repair',
|
||||
file_ext='.svg',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_TABLE_DELTA_REPAIR = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'table_delta_repair',
|
||||
file_ext='.xlsx',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_RENDERED_GRAPH = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
BASE_NETWORK_NAME,
|
||||
file_ext='.svg',
|
||||
check_existence=False,
|
||||
)
|
||||
|
||||
# NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
|
||||
@@ -168,7 +200,7 @@ graph_layout = html.Div(
|
||||
id='static-graph-img',
|
||||
alt='static rendered graph',
|
||||
style={
|
||||
'width': 'auto',
|
||||
'width': '900px',
|
||||
'height': 'auto',
|
||||
},
|
||||
),
|
||||
@@ -212,7 +244,27 @@ app.layout = html.Div(
|
||||
children=[
|
||||
html.H3(id='object-text'),
|
||||
dcc.Dropdown(id='selector-candidates'),
|
||||
html.Button(
|
||||
'Download Diagramm',
|
||||
id='bt-dl-timeline',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dcc.Download(id='dl-timeline'),
|
||||
dcc.Graph(id='figure-occurrences'),
|
||||
html.Button(
|
||||
'Download Diagramm',
|
||||
id='bt-dl-deltarepair',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dcc.Download(id='dl-deltarepair'),
|
||||
dcc.Graph(id='figure-delta'),
|
||||
]
|
||||
),
|
||||
@@ -221,6 +273,16 @@ app.layout = html.Div(
|
||||
html.Div(
|
||||
[
|
||||
html.H5('Überblick ähnlicher Vorgänge'),
|
||||
dcc.Download(id='dl-table-timeline'),
|
||||
html.Button(
|
||||
'Download Table',
|
||||
id='bt-table-timeline',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dash_table.DataTable(id='table-candidates'),
|
||||
],
|
||||
style={'paddingBottom': '1em'},
|
||||
@@ -233,6 +295,16 @@ app.layout = html.Div(
|
||||
'bis zum nächsten Ereignis'
|
||||
)
|
||||
),
|
||||
dcc.Download(id='dl-table-deltarepair'),
|
||||
html.Button(
|
||||
'Download Table',
|
||||
id='bt-table-deltarepair',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dash_table.DataTable(id='table-best-actions'),
|
||||
]
|
||||
),
|
||||
@@ -368,6 +440,7 @@ def transform_to_HTML_table(
|
||||
date_cols: Iterable[str] | None = None,
|
||||
sorting_feature: str | None = None,
|
||||
sorting_ascending: bool = True,
|
||||
save_path: Path | None = None,
|
||||
) -> tuple[HTMLColumns, HTMLTable]:
|
||||
target_features = list(target_features)
|
||||
data = data.copy()
|
||||
@@ -383,6 +456,9 @@ def transform_to_HTML_table(
|
||||
columns = [{'name': col, 'id': col} for col in data.columns]
|
||||
table_data = data.to_dict('records')
|
||||
|
||||
if save_path is not None:
|
||||
data.to_excel(save_path)
|
||||
|
||||
return columns, table_data
|
||||
|
||||
|
||||
@@ -410,6 +486,7 @@ def update_tables_candidates(
|
||||
date_cols=TABLE_FEATS_DATES,
|
||||
sorting_feature='ErstellungsDatum',
|
||||
sorting_ascending=True,
|
||||
save_path=PTH_TABLE_TIMELINE,
|
||||
)
|
||||
# df = df.filter(items=TABLE_FEATS_OVERVIEW, axis=1).sort_values(
|
||||
# by='ErstellungsDatum', ascending=True
|
||||
@@ -430,6 +507,7 @@ def update_tables_candidates(
|
||||
data=cands_best_actions,
|
||||
target_features=TABLE_FEATS_BEST_ACTIONS,
|
||||
date_cols=TABLE_FEATS_DATES,
|
||||
save_path=PTH_TABLE_DELTA_REPAIR,
|
||||
)
|
||||
|
||||
return overview_cols, overview_table, best_actions_cols, best_actions_table
|
||||
@@ -457,7 +535,7 @@ def display_candidates_as_graph(index, obj_id):
|
||||
t1 = time.perf_counter()
|
||||
tk_graph_cands, _ = tokens.build_token_graph(
|
||||
data=df,
|
||||
model=SPCY_MODEL,
|
||||
model=SPACY_MODEL,
|
||||
target_feature='VorgangsBeschreibung',
|
||||
build_map=False,
|
||||
logging_graph=False,
|
||||
@@ -496,10 +574,58 @@ def display_candidates_as_graph(index, obj_id):
|
||||
Input('bt-reset', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def func(n_clicks):
|
||||
def download_graph(_):
|
||||
return dcc.send_file(path=PTH_RENDERED_GRAPH)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-timeline', 'data'),
|
||||
Input('bt-dl-timeline', 'n_clicks'),
|
||||
State('figure-occurrences', 'figure'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_timeline(_, fig: dict):
|
||||
# add these lines before fig = go.Figure(fig_raw)
|
||||
if 'rangeslider' in fig['layout']['xaxis']:
|
||||
del fig['layout']['xaxis']['rangeslider']['yaxis']
|
||||
figure = Figure(fig)
|
||||
figure.write_image(PTH_RENDERED_TIMELINE)
|
||||
return dcc.send_file(path=PTH_RENDERED_TIMELINE)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-deltarepair', 'data'),
|
||||
Input('bt-dl-deltarepair', 'n_clicks'),
|
||||
State('figure-delta', 'figure'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_delta_repair(_, fig: dict):
|
||||
# add these lines before fig = go.Figure(fig_raw)
|
||||
if 'rangeslider' in fig['layout']['xaxis']:
|
||||
del fig['layout']['xaxis']['rangeslider']['yaxis']
|
||||
figure = Figure(fig)
|
||||
figure.write_image(PTH_RENDERED_DELTA_REPAIR)
|
||||
return dcc.send_file(path=PTH_RENDERED_DELTA_REPAIR)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-table-timeline', 'data'),
|
||||
Input('bt-table-timeline', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_table_timeline(_):
|
||||
return dcc.send_file(path=PTH_TABLE_TIMELINE)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-table-deltarepair', 'data'),
|
||||
Input('bt-table-deltarepair', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_table_delta_repair(_):
|
||||
return dcc.send_file(path=PTH_TABLE_DELTA_REPAIR)
|
||||
|
||||
|
||||
def _start_webbrowser():
|
||||
host = '127.0.0.1'
|
||||
port = '8050'
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_20240619/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
results = './results/dummy_N_1000/'
|
||||
dataset = '../data/Dummy_Dataset_N_1000.csv'
|
||||
# results = './results/test_20240807/'
|
||||
# dataset = '../data/02_202307/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
@@ -12,12 +14,12 @@ dataset = '../data/02_202307/Export4.csv'
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing_skip = true
|
||||
graph_rescaling_skip = true
|
||||
preprocessing_skip = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
graph_static_rendering_skip = false
|
||||
time_analysis_skip = true
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
@@ -34,7 +36,7 @@ threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
threshold_edge_weight = 1
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
|
||||
Reference in New Issue
Block a user