improved dashboard, fixed language tags, tests graph plotting

This commit is contained in:
Florian Förster
2024-06-26 16:13:53 +02:00
parent fb4437a3a2
commit 2656780907
11 changed files with 541 additions and 1714 deletions

View File

@@ -16,8 +16,8 @@ target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.
p = Path(target).resolve()
ret = lang_main.io.load_pickle(p)
tk_graph = cast(graphs.TokenGraph, ret[0])
tk_graph_filtered = tk_graph.filter_by_edge_weight(150, None)
tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
cyto_data_base, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
MIN_WEIGHT = weight_data['min']
@@ -235,8 +235,16 @@ def update_edge_weight(weight_min, weight_max):
weight_min = MIN_WEIGHT
if weight_max is None:
weight_max = MAX_WEIGHT
tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
tk_graph_filtered = graphs.filter_graph_by_edge_weight(
tk_graph,
weight_min,
weight_max,
)
tk_graph_filtered = graphs.filter_graph_by_node_degree(
tk_graph_filtered,
1,
None,
)
cyto_data, _ = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
return cyto_data

View File

@@ -2,10 +2,9 @@ import time
import webbrowser
from pathlib import Path
from threading import Thread
from typing import cast
from typing import Any, Final, cast
import dash_cytoscape as cyto
import pandas as pd
import plotly.express as px
from dash import (
Dash,
@@ -19,45 +18,33 @@ from dash import (
)
from pandas import DataFrame
from lang_main.analysis import graphs
from lang_main.io import load_pickle
from lang_main.types import ObjectID, TimelineCandidates
from lang_main.analysis import tokens
import lang_main.io
from lang_main.analysis import graphs, tokens
from lang_main.constants import SPCY_MODEL
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
from lang_main.types import ObjectID, TimelineCandidates
# ** data
# p_df = Path(r'../Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
# p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
ret = cast(tuple[DataFrame], load_pickle(p_df))
data = ret[0]
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
cands = ret[0]
texts = ret[1]
cands, texts = cast(
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
)
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
# data = cast(DataFrame, load_pickle(p_df))
# cands = cast(TimelineCandidates, load_pickle(p_cands))
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
table_feats = [
TABLE_FEATS: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
'VorgangsTypName',
'VorgangsBeschreibung',
]
table_feats_dates = [
TABLE_FEATS_DATES: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
]
# ** figure config
markers = {
MARKERS: Final[dict[str, Any]] = {
'size': 12,
'color': 'yellow',
'line': {
@@ -65,15 +52,15 @@ markers = {
'color': 'red',
},
}
hover_data = {
HOVER_DATA: Final[dict[str, Any]] = {
'ErstellungsDatum': '|%d.%m.%Y',
'VorgangsBeschreibung': True,
}
# ** graphs
# ** graph
target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
p = Path(target).resolve()
ret = load_pickle(p)
ret = lang_main.io.load_pickle(p)
tk_graph = cast(graphs.TokenGraph, ret[0])
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
@@ -142,14 +129,12 @@ my_stylesheet = [
# {'selector': '.triangle', 'style': {'shape': 'triangle'}},
]
# ** app
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__, external_stylesheets=external_stylesheets)
graph_layout = html.Div(
[
html.Button('Trigger JS Weight', id='test_js_weight'),
html.Button('Trigger Candidate Graph', id='cand_graph'),
html.Button('Trigger Candidate Graph', id='graph-build-btn'),
dcc.Store(id='graph-store', storage_type='memory'),
dcc.Store(id='graph-store-cyto-curr_cands', storage_type='memory'),
html.Div(id='output'),
html.Div(
[
@@ -184,7 +169,7 @@ graph_layout = html.Div(
[
html.H3('Graph Filter'),
dcc.Input(
id='weight_min',
id='graph-weight_min',
type='number',
min=MIN_WEIGHT,
max=MAX_WEIGHT,
@@ -194,7 +179,7 @@ graph_layout = html.Div(
style={'width': '40%'},
),
dcc.Input(
id='weight_max',
id='graph-weight_max',
type='number',
min=MIN_WEIGHT,
max=MAX_WEIGHT,
@@ -204,7 +189,7 @@ graph_layout = html.Div(
style={'width': '40%'},
),
html.H3('Graph'),
html.Button('Re-Layout', id='trigger_relayout'),
html.Button('Re-Layout', id='graph-trigger_relayout'),
html.Div(
[
cyto.Cytoscape(
@@ -230,6 +215,12 @@ graph_layout = html.Div(
],
)
# ** app
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div(
[
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
@@ -238,16 +229,16 @@ app.layout = html.Div(
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
dcc.Dropdown(
list(cands.keys()),
id='dropdown-selection',
id='selector-obj_id',
placeholder='ObjektID auswählen...',
),
]
),
html.Div(
children=[
html.H3(id='object_text'),
dcc.Dropdown(id='choice-candidates'),
dcc.Graph(id='graph-output'),
html.H3(id='object-text'),
dcc.Dropdown(id='selector-candidates'),
dcc.Graph(id='graph-candidates'),
]
),
html.Div(
@@ -260,8 +251,8 @@ app.layout = html.Div(
@callback(
Output('object_text', 'children'),
Input('dropdown-selection', 'value'),
Output('object-text', 'children'),
Input('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_obj_text(obj_id):
@@ -272,21 +263,24 @@ def update_obj_text(obj_id):
@callback(
Output('choice-candidates', 'options'),
Input('dropdown-selection', 'value'),
[Output('selector-candidates', 'options'), Output('selector-candidates', 'value')],
Input('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_choice_candidates(obj_id):
obj_id = int(obj_id)
cands_obj_id = cands[obj_id]
choices = list(range(1, len(cands_obj_id) + 1))
return choices
return choices, choices[0]
# TODO check possible storage of pre-filtered result
# TODO change input of ``update_table_candidates`` and ``display_candidates_as_graph``
# TODO to storage component
@callback(
Output('graph-output', 'figure'),
Input('choice-candidates', 'value'),
State('dropdown-selection', 'value'),
Output('graph-candidates', 'figure'),
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_timeline(index, obj_id):
@@ -295,19 +289,20 @@ def update_timeline(index, obj_id):
obj_text = texts[obj_id]
title = f'HObjektText: {obj_text}'
# cands
cands_obj_id = cands[obj_id]
cands_choice = cands_obj_id[int(index) - 1]
# cands_per_obj_id = cands[obj_id]
# cands_similar = cands_per_obj_id[int(index) - 1]
# data
df = data.loc[list(cands_choice)].sort_index() # type: ignore
# df = data.loc[list(cands_similar)].sort_index() # type: ignore
df = pre_filter_data(data, idx=index, obj_id=obj_id)
# figure
fig = px.line(
data_frame=df,
x='ErstellungsDatum',
y='ObjektID',
title=title,
hover_data=hover_data,
hover_data=HOVER_DATA,
)
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
fig.update_traces(mode='markers+lines', marker=MARKERS, marker_symbol='diamond')
fig.update_xaxes(
tickformat='%B\n%Y',
rangeslider_visible=True,
@@ -319,24 +314,18 @@ def update_timeline(index, obj_id):
@callback(
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
Input('choice-candidates', 'value'),
State('dropdown-selection', 'value'),
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_table_candidates(index, obj_id):
# obj_id = int(obj_id)
# # cands
# cands_obj_id = cands[obj_id]
# cands_choice = cands_obj_id[int(index) - 1]
# # data
# df = data.loc[list(cands_choice)].sort_index() # type: ignore
df = pre_filter_data(data, idx=index, obj_id=obj_id)
df = df.filter(items=table_feats, axis=1).sort_values(
df = df.filter(items=TABLE_FEATS, axis=1).sort_values(
by='ErstellungsDatum', ascending=True
)
cols = [{'name': i, 'id': i} for i in df.columns]
# convert dates to strings
for col in table_feats_dates:
for col in TABLE_FEATS_DATES:
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
table_data = df.to_dict('records')
@@ -348,6 +337,7 @@ def pre_filter_data(
idx: int,
obj_id: ObjectID,
) -> DataFrame:
idx = int(idx)
obj_id = int(obj_id)
data = data.copy()
# cands
@@ -359,33 +349,53 @@ def pre_filter_data(
return data
# ** graph
# ** graph callbacks
# TODO store pre-calculated graph
@app.callback(
Output('cytoscape-graph', 'elements', allow_duplicate=True),
Output('weight_min', 'min', allow_duplicate=True),
Output('weight_min', 'max', allow_duplicate=True),
Output('weight_min', 'placeholder', allow_duplicate=True),
Output('weight_max', 'min', allow_duplicate=True),
Output('weight_max', 'max', allow_duplicate=True),
Output('weight_max', 'placeholder', allow_duplicate=True),
Input('cand_graph', 'n_clicks'),
State('choice-candidates', 'value'),
State('dropdown-selection', 'value'),
Output('graph-weight_min', 'min', allow_duplicate=True),
Output('graph-weight_min', 'max', allow_duplicate=True),
Output('graph-weight_min', 'placeholder', allow_duplicate=True),
Output('graph-weight_max', 'min', allow_duplicate=True),
Output('graph-weight_max', 'max', allow_duplicate=True),
Output('graph-weight_max', 'placeholder', allow_duplicate=True),
Output('graph-store', 'data'),
Output('graph-store-cyto-curr_cands', 'data'),
# Input('graph-build-btn', 'n_clicks'),
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_graph_candidates(_, index, obj_id):
def display_candidates_as_graph(index, obj_id):
t1 = time.perf_counter()
df = pre_filter_data(data, idx=index, obj_id=obj_id)
t2 = time.perf_counter()
print(f'Time for filtering: {t2 - t1} s')
t1 = time.perf_counter()
tk_graph_cands, _ = tokens.build_token_graph(
data=df,
model=SPCY_MODEL,
target_feature='VorgangsBeschreibung',
build_map=False,
logging_graph=False,
)
t2 = time.perf_counter()
print(f'Time for graph building: {t2 - t1} s')
t1 = time.perf_counter()
cyto_data, weight_info = graphs.convert_graph_to_cytoscape(tk_graph_cands)
weight_min = weight_info['min']
weight_max = weight_info['max']
placeholder_min = f'Minimum edge weight: {weight_min} - {weight_max}'
placeholder_max = f'Minimum edge weight: {weight_min} - {weight_max}'
placeholder_max = f'Maximum edge weight: {weight_min} - {weight_max}'
t2 = time.perf_counter()
print(f'Time for graph metadata and conversion: {t2 - t1} s')
t1 = time.perf_counter()
graph_to_store = lang_main.io.encode_to_base64_str(tk_graph_cands)
t2 = time.perf_counter()
print(f'Time for encoding: {t2 - t1} s')
return (
cyto_data,
weight_min,
@@ -394,6 +404,8 @@ def update_graph_candidates(_, index, obj_id):
weight_min,
weight_max,
placeholder_max,
graph_to_store,
cyto_data,
)
@@ -412,30 +424,44 @@ def update_layout_internal(layout_choice):
@app.callback(
Output('cytoscape-graph', 'zoom'),
Output('cytoscape-graph', 'elements', allow_duplicate=True),
Output('weight_min', 'value'),
Output('weight_max', 'value'),
Output('graph-weight_min', 'value'),
Output('graph-weight_max', 'value'),
Input('bt-reset', 'n_clicks'),
State('graph-store-cyto-curr_cands', 'data'),
prevent_initial_call=True,
)
def reset_layout(n_clicks):
return (1, cyto_data_base, None, None)
def reset_layout(_, current_cands_cyto_elements):
return (1, current_cands_cyto_elements, None, None)
# update edge weight
@app.callback(
Output('cytoscape-graph', 'elements', allow_duplicate=True),
Input('weight_min', 'value'),
Input('weight_max', 'value'),
Input('graph-weight_min', 'value'),
Input('graph-weight_max', 'value'),
State('graph-store', 'data'),
State('graph-store-cyto-curr_cands', 'data'),
State('graph-weight_min', 'min'),
State('graph-weight_min', 'max'),
prevent_initial_call=True,
)
def update_edge_weight(weight_min, weight_max):
if not any([weight_min, weight_max]):
return cyto_data_base
def update_edge_weight(
weight_min,
weight_max,
current_graph,
current_cands_cyto_elements,
current_min,
current_max,
):
if not any((weight_min, weight_max)):
return current_cands_cyto_elements
if weight_min is None:
weight_min = MIN_WEIGHT
weight_min = current_min
if weight_max is None:
weight_max = MAX_WEIGHT
weight_max = current_max
tk_graph = cast(graphs.TokenGraph, lang_main.io.decode_from_base64_str(current_graph))
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, weight_min, weight_max)
# tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
@@ -444,6 +470,7 @@ def update_edge_weight(weight_min, weight_max):
return cyto_data
# ** graph: layout with edge weight
app.clientside_callback(
"""
function(n_clicks, layout) {
@@ -458,11 +485,12 @@ app.clientside_callback(
}
""",
Output('cytoscape-graph', 'layout', allow_duplicate=True),
Input('trigger_relayout', 'n_clicks'),
Input('graph-trigger_relayout', 'n_clicks'),
State('cytoscape-graph', 'layout'),
prevent_initial_call=True,
)
# ** graph: display edge weight (line thickness)
app.clientside_callback(
"""
function(n_clicks, stylesheet) {