improved dashboard, fixed language tags, tests graph plotting
This commit is contained in:
@@ -30,11 +30,11 @@ from lang_main.types import (
|
||||
)
|
||||
|
||||
# ** build pipelines
|
||||
pipe_merge = build_merge_duplicates_pipe()
|
||||
pipe_target_feat = build_base_target_feature_pipe()
|
||||
pipe_timeline = build_timeline_pipe()
|
||||
pipe_merge = build_merge_duplicates_pipe()
|
||||
pipe_token_analysis = build_tk_graph_pipe()
|
||||
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
||||
pipe_timeline = build_timeline_pipe()
|
||||
|
||||
|
||||
# ** preprocessing pipeline
|
||||
@@ -76,8 +76,6 @@ def run_graph_postprocessing() -> None:
|
||||
# filter graph by edge weight and remove single nodes (no connection)
|
||||
ret = cast(tuple[TokenGraph], pipe_graph_postprocessing.run(starting_values=(tk_graph,)))
|
||||
tk_graph_filtered = ret[0]
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT, None)
|
||||
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
tk_graph_filtered.to_GraphML(
|
||||
SAVE_PATH_FOLDER, filename='TokenGraph-filtered', directed=False
|
||||
)
|
||||
|
||||
@@ -16,8 +16,8 @@ target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.
|
||||
p = Path(target).resolve()
|
||||
ret = lang_main.io.load_pickle(p)
|
||||
tk_graph = cast(graphs.TokenGraph, ret[0])
|
||||
tk_graph_filtered = tk_graph.filter_by_edge_weight(150, None)
|
||||
tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
cyto_data_base, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
|
||||
MIN_WEIGHT = weight_data['min']
|
||||
@@ -235,8 +235,16 @@ def update_edge_weight(weight_min, weight_max):
|
||||
weight_min = MIN_WEIGHT
|
||||
if weight_max is None:
|
||||
weight_max = MAX_WEIGHT
|
||||
tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
|
||||
tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(
|
||||
tk_graph,
|
||||
weight_min,
|
||||
weight_max,
|
||||
)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(
|
||||
tk_graph_filtered,
|
||||
1,
|
||||
None,
|
||||
)
|
||||
cyto_data, _ = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
return cyto_data
|
||||
|
||||
|
||||
@@ -2,10 +2,9 @@ import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import cast
|
||||
from typing import Any, Final, cast
|
||||
|
||||
import dash_cytoscape as cyto
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
from dash import (
|
||||
Dash,
|
||||
@@ -19,45 +18,33 @@ from dash import (
|
||||
)
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.analysis import graphs
|
||||
from lang_main.io import load_pickle
|
||||
from lang_main.types import ObjectID, TimelineCandidates
|
||||
from lang_main.analysis import tokens
|
||||
import lang_main.io
|
||||
from lang_main.analysis import graphs, tokens
|
||||
from lang_main.constants import SPCY_MODEL
|
||||
|
||||
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
from lang_main.types import ObjectID, TimelineCandidates
|
||||
|
||||
# ** data
|
||||
# p_df = Path(r'../Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
|
||||
p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
|
||||
# p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
|
||||
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
|
||||
p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
|
||||
ret = cast(tuple[DataFrame], load_pickle(p_df))
|
||||
data = ret[0]
|
||||
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
|
||||
cands = ret[0]
|
||||
texts = ret[1]
|
||||
cands, texts = cast(
|
||||
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
|
||||
)
|
||||
|
||||
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
|
||||
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
|
||||
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
|
||||
# data = cast(DataFrame, load_pickle(p_df))
|
||||
# cands = cast(TimelineCandidates, load_pickle(p_cands))
|
||||
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
|
||||
|
||||
table_feats = [
|
||||
TABLE_FEATS: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
'VorgangsTypName',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
table_feats_dates = [
|
||||
TABLE_FEATS_DATES: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
]
|
||||
|
||||
# ** figure config
|
||||
markers = {
|
||||
MARKERS: Final[dict[str, Any]] = {
|
||||
'size': 12,
|
||||
'color': 'yellow',
|
||||
'line': {
|
||||
@@ -65,15 +52,15 @@ markers = {
|
||||
'color': 'red',
|
||||
},
|
||||
}
|
||||
hover_data = {
|
||||
HOVER_DATA: Final[dict[str, Any]] = {
|
||||
'ErstellungsDatum': '|%d.%m.%Y',
|
||||
'VorgangsBeschreibung': True,
|
||||
}
|
||||
|
||||
# ** graphs
|
||||
# ** graph
|
||||
target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
|
||||
p = Path(target).resolve()
|
||||
ret = load_pickle(p)
|
||||
ret = lang_main.io.load_pickle(p)
|
||||
tk_graph = cast(graphs.TokenGraph, ret[0])
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
@@ -142,14 +129,12 @@ my_stylesheet = [
|
||||
# {'selector': '.triangle', 'style': {'shape': 'triangle'}},
|
||||
]
|
||||
|
||||
# ** app
|
||||
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
||||
app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
graph_layout = html.Div(
|
||||
[
|
||||
html.Button('Trigger JS Weight', id='test_js_weight'),
|
||||
html.Button('Trigger Candidate Graph', id='cand_graph'),
|
||||
html.Button('Trigger Candidate Graph', id='graph-build-btn'),
|
||||
dcc.Store(id='graph-store', storage_type='memory'),
|
||||
dcc.Store(id='graph-store-cyto-curr_cands', storage_type='memory'),
|
||||
html.Div(id='output'),
|
||||
html.Div(
|
||||
[
|
||||
@@ -184,7 +169,7 @@ graph_layout = html.Div(
|
||||
[
|
||||
html.H3('Graph Filter'),
|
||||
dcc.Input(
|
||||
id='weight_min',
|
||||
id='graph-weight_min',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
@@ -194,7 +179,7 @@ graph_layout = html.Div(
|
||||
style={'width': '40%'},
|
||||
),
|
||||
dcc.Input(
|
||||
id='weight_max',
|
||||
id='graph-weight_max',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
@@ -204,7 +189,7 @@ graph_layout = html.Div(
|
||||
style={'width': '40%'},
|
||||
),
|
||||
html.H3('Graph'),
|
||||
html.Button('Re-Layout', id='trigger_relayout'),
|
||||
html.Button('Re-Layout', id='graph-trigger_relayout'),
|
||||
html.Div(
|
||||
[
|
||||
cyto.Cytoscape(
|
||||
@@ -230,6 +215,12 @@ graph_layout = html.Div(
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# ** app
|
||||
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
||||
app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
|
||||
app.layout = html.Div(
|
||||
[
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
@@ -238,16 +229,16 @@ app.layout = html.Div(
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
id='selector-obj_id',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
html.H3(id='object-text'),
|
||||
dcc.Dropdown(id='selector-candidates'),
|
||||
dcc.Graph(id='graph-candidates'),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
@@ -260,8 +251,8 @@ app.layout = html.Div(
|
||||
|
||||
|
||||
@callback(
|
||||
Output('object_text', 'children'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
Output('object-text', 'children'),
|
||||
Input('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_obj_text(obj_id):
|
||||
@@ -272,21 +263,24 @@ def update_obj_text(obj_id):
|
||||
|
||||
|
||||
@callback(
|
||||
Output('choice-candidates', 'options'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
[Output('selector-candidates', 'options'), Output('selector-candidates', 'value')],
|
||||
Input('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
cands_obj_id = cands[obj_id]
|
||||
choices = list(range(1, len(cands_obj_id) + 1))
|
||||
return choices
|
||||
return choices, choices[0]
|
||||
|
||||
|
||||
# TODO check possible storage of pre-filtered result
|
||||
# TODO change input of ``update_table_candidates`` and ``display_candidates_as_graph``
|
||||
# TODO to storage component
|
||||
@callback(
|
||||
Output('graph-output', 'figure'),
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
Output('graph-candidates', 'figure'),
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_timeline(index, obj_id):
|
||||
@@ -295,19 +289,20 @@ def update_timeline(index, obj_id):
|
||||
obj_text = texts[obj_id]
|
||||
title = f'HObjektText: {obj_text}'
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# cands_per_obj_id = cands[obj_id]
|
||||
# cands_similar = cands_per_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
# df = data.loc[list(cands_similar)].sort_index() # type: ignore
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
# figure
|
||||
fig = px.line(
|
||||
data_frame=df,
|
||||
x='ErstellungsDatum',
|
||||
y='ObjektID',
|
||||
title=title,
|
||||
hover_data=hover_data,
|
||||
hover_data=HOVER_DATA,
|
||||
)
|
||||
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
|
||||
fig.update_traces(mode='markers+lines', marker=MARKERS, marker_symbol='diamond')
|
||||
fig.update_xaxes(
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
@@ -319,24 +314,18 @@ def update_timeline(index, obj_id):
|
||||
|
||||
@callback(
|
||||
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_table_candidates(index, obj_id):
|
||||
# obj_id = int(obj_id)
|
||||
# # cands
|
||||
# cands_obj_id = cands[obj_id]
|
||||
# cands_choice = cands_obj_id[int(index) - 1]
|
||||
# # data
|
||||
# df = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
df = df.filter(items=table_feats, axis=1).sort_values(
|
||||
df = df.filter(items=TABLE_FEATS, axis=1).sort_values(
|
||||
by='ErstellungsDatum', ascending=True
|
||||
)
|
||||
cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# convert dates to strings
|
||||
for col in table_feats_dates:
|
||||
for col in TABLE_FEATS_DATES:
|
||||
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
table_data = df.to_dict('records')
|
||||
@@ -348,6 +337,7 @@ def pre_filter_data(
|
||||
idx: int,
|
||||
obj_id: ObjectID,
|
||||
) -> DataFrame:
|
||||
idx = int(idx)
|
||||
obj_id = int(obj_id)
|
||||
data = data.copy()
|
||||
# cands
|
||||
@@ -359,33 +349,53 @@ def pre_filter_data(
|
||||
return data
|
||||
|
||||
|
||||
# ** graph
|
||||
# ** graph callbacks
|
||||
# TODO store pre-calculated graph
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('weight_min', 'min', allow_duplicate=True),
|
||||
Output('weight_min', 'max', allow_duplicate=True),
|
||||
Output('weight_min', 'placeholder', allow_duplicate=True),
|
||||
Output('weight_max', 'min', allow_duplicate=True),
|
||||
Output('weight_max', 'max', allow_duplicate=True),
|
||||
Output('weight_max', 'placeholder', allow_duplicate=True),
|
||||
Input('cand_graph', 'n_clicks'),
|
||||
State('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
Output('graph-weight_min', 'min', allow_duplicate=True),
|
||||
Output('graph-weight_min', 'max', allow_duplicate=True),
|
||||
Output('graph-weight_min', 'placeholder', allow_duplicate=True),
|
||||
Output('graph-weight_max', 'min', allow_duplicate=True),
|
||||
Output('graph-weight_max', 'max', allow_duplicate=True),
|
||||
Output('graph-weight_max', 'placeholder', allow_duplicate=True),
|
||||
Output('graph-store', 'data'),
|
||||
Output('graph-store-cyto-curr_cands', 'data'),
|
||||
# Input('graph-build-btn', 'n_clicks'),
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_graph_candidates(_, index, obj_id):
|
||||
def display_candidates_as_graph(index, obj_id):
|
||||
t1 = time.perf_counter()
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for filtering: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
tk_graph_cands, _ = tokens.build_token_graph(
|
||||
data=df,
|
||||
model=SPCY_MODEL,
|
||||
target_feature='VorgangsBeschreibung',
|
||||
build_map=False,
|
||||
logging_graph=False,
|
||||
)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for graph building: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
cyto_data, weight_info = graphs.convert_graph_to_cytoscape(tk_graph_cands)
|
||||
weight_min = weight_info['min']
|
||||
weight_max = weight_info['max']
|
||||
placeholder_min = f'Minimum edge weight: {weight_min} - {weight_max}'
|
||||
placeholder_max = f'Minimum edge weight: {weight_min} - {weight_max}'
|
||||
placeholder_max = f'Maximum edge weight: {weight_min} - {weight_max}'
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for graph metadata and conversion: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
graph_to_store = lang_main.io.encode_to_base64_str(tk_graph_cands)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for encoding: {t2 - t1} s')
|
||||
return (
|
||||
cyto_data,
|
||||
weight_min,
|
||||
@@ -394,6 +404,8 @@ def update_graph_candidates(_, index, obj_id):
|
||||
weight_min,
|
||||
weight_max,
|
||||
placeholder_max,
|
||||
graph_to_store,
|
||||
cyto_data,
|
||||
)
|
||||
|
||||
|
||||
@@ -412,30 +424,44 @@ def update_layout_internal(layout_choice):
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'zoom'),
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('weight_min', 'value'),
|
||||
Output('weight_max', 'value'),
|
||||
Output('graph-weight_min', 'value'),
|
||||
Output('graph-weight_max', 'value'),
|
||||
Input('bt-reset', 'n_clicks'),
|
||||
State('graph-store-cyto-curr_cands', 'data'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_layout(n_clicks):
|
||||
return (1, cyto_data_base, None, None)
|
||||
def reset_layout(_, current_cands_cyto_elements):
|
||||
return (1, current_cands_cyto_elements, None, None)
|
||||
|
||||
|
||||
# update edge weight
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Input('weight_min', 'value'),
|
||||
Input('weight_max', 'value'),
|
||||
Input('graph-weight_min', 'value'),
|
||||
Input('graph-weight_max', 'value'),
|
||||
State('graph-store', 'data'),
|
||||
State('graph-store-cyto-curr_cands', 'data'),
|
||||
State('graph-weight_min', 'min'),
|
||||
State('graph-weight_min', 'max'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_edge_weight(weight_min, weight_max):
|
||||
if not any([weight_min, weight_max]):
|
||||
return cyto_data_base
|
||||
def update_edge_weight(
|
||||
weight_min,
|
||||
weight_max,
|
||||
current_graph,
|
||||
current_cands_cyto_elements,
|
||||
current_min,
|
||||
current_max,
|
||||
):
|
||||
if not any((weight_min, weight_max)):
|
||||
return current_cands_cyto_elements
|
||||
|
||||
if weight_min is None:
|
||||
weight_min = MIN_WEIGHT
|
||||
weight_min = current_min
|
||||
if weight_max is None:
|
||||
weight_max = MAX_WEIGHT
|
||||
weight_max = current_max
|
||||
|
||||
tk_graph = cast(graphs.TokenGraph, lang_main.io.decode_from_base64_str(current_graph))
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, weight_min, weight_max)
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
@@ -444,6 +470,7 @@ def update_edge_weight(weight_min, weight_max):
|
||||
return cyto_data
|
||||
|
||||
|
||||
# ** graph: layout with edge weight
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, layout) {
|
||||
@@ -458,11 +485,12 @@ app.clientside_callback(
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('trigger_relayout', 'n_clicks'),
|
||||
Input('graph-trigger_relayout', 'n_clicks'),
|
||||
State('cytoscape-graph', 'layout'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
|
||||
# ** graph: display edge weight (line thickness)
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, stylesheet) {
|
||||
Reference in New Issue
Block a user