new pipeline management, proto graph display timeline
This commit is contained in:
@@ -1,42 +1,44 @@
|
||||
import typing
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
from lang_main.analysis.graphs import TokenGraph
|
||||
from lang_main.constants import (
|
||||
DO_GRAPH_POSTPROCESSING,
|
||||
DO_PREPROCESSING,
|
||||
DO_TIME_ANALYSIS,
|
||||
DO_TOKEN_ANALYSIS,
|
||||
INPUT_PATH_FOLDER,
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
SKIP_GRAPH_POSTPROCESSING,
|
||||
SKIP_PREPROCESSING,
|
||||
SKIP_TIME_ANALYSIS,
|
||||
SKIP_TOKEN_ANALYSIS,
|
||||
THRESHOLD_AMOUNT_CHARACTERS,
|
||||
THRESHOLD_EDGE_WEIGHT,
|
||||
)
|
||||
from lang_main.io import create_saving_folder, load_pickle
|
||||
from lang_main.io import create_saving_folder, get_entry_point, load_pickle
|
||||
from lang_main.pipelines.base import PipelineContainer
|
||||
from lang_main.pipelines.predefined import (
|
||||
pipe_merge,
|
||||
pipe_target_feat,
|
||||
pipe_timeline,
|
||||
pipe_token_analysis,
|
||||
build_base_target_feature_pipe,
|
||||
build_merge_duplicates_pipe,
|
||||
build_timeline_pipe,
|
||||
build_tk_graph_pipe,
|
||||
build_tk_graph_post_pipe,
|
||||
)
|
||||
from lang_main.types import (
|
||||
EntryPoints,
|
||||
ObjectID,
|
||||
PandasIndex,
|
||||
SpacyDoc,
|
||||
TimelineCandidates,
|
||||
)
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
# ** build pipelines
|
||||
pipe_merge = build_merge_duplicates_pipe()
|
||||
pipe_target_feat = build_base_target_feature_pipe()
|
||||
pipe_timeline = build_timeline_pipe()
|
||||
pipe_token_analysis = build_tk_graph_pipe()
|
||||
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
||||
|
||||
|
||||
# ** processing pipeline
|
||||
def run_preprocessing() -> DataFrame:
|
||||
# ** preprocessing pipeline
|
||||
def run_preprocessing() -> None:
|
||||
create_saving_folder(
|
||||
saving_path_folder=SAVE_PATH_FOLDER,
|
||||
overwrite_existing=False,
|
||||
@@ -46,134 +48,69 @@ def run_preprocessing() -> DataFrame:
|
||||
tuple[DataFrame], pipe_target_feat.run(starting_values=(PATH_TO_DATASET,))
|
||||
)
|
||||
target_feat_data = ret[0]
|
||||
# only entries with more than threshold amount of characters
|
||||
data_filter = typing.cast(Series, (target_feat_data['len'] > THRESHOLD_AMOUNT_CHARACTERS))
|
||||
subset_data = target_feat_data.loc[data_filter].copy()
|
||||
# merge duplicates, results saved separately
|
||||
ret = typing.cast(tuple[DataFrame], pipe_merge.run(starting_values=(subset_data,)))
|
||||
preprocessed_data = ret[0]
|
||||
|
||||
return preprocessed_data
|
||||
_ = typing.cast(tuple[DataFrame], pipe_merge.run(starting_values=(target_feat_data,)))
|
||||
|
||||
|
||||
def run_token_analysis(
|
||||
preprocessed_data: DataFrame,
|
||||
) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
|
||||
# ** token analysis
|
||||
def run_token_analysis() -> None:
|
||||
# load entry point
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TOKEN_ANALYSIS)
|
||||
loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
|
||||
preprocessed_data = loaded_results[0]
|
||||
# build token graph
|
||||
(tk_graph, docs_mapping) = typing.cast(
|
||||
tuple[TokenGraph, dict[PandasIndex, SpacyDoc]],
|
||||
tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
|
||||
pipe_token_analysis.run(starting_values=(preprocessed_data,)),
|
||||
)
|
||||
tk_graph.save_graph(SAVE_PATH_FOLDER, directed=False)
|
||||
tk_graph.to_pickle(SAVE_PATH_FOLDER, filename=f'{pipe_token_analysis.name}-TokenGraph')
|
||||
|
||||
return tk_graph, docs_mapping
|
||||
tk_graph.to_GraphML(SAVE_PATH_FOLDER, filename='TokenGraph', directed=False)
|
||||
|
||||
|
||||
def run_graph_postprocessing(
|
||||
tk_graph: TokenGraph,
|
||||
) -> TokenGraph:
|
||||
def run_graph_postprocessing() -> None:
|
||||
# load entry point
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
|
||||
loaded_results = cast(
|
||||
tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
|
||||
load_pickle(entry_point_path),
|
||||
)
|
||||
tk_graph = loaded_results[0]
|
||||
# filter graph by edge weight and remove single nodes (no connection)
|
||||
tk_graph_filtered = tk_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT)
|
||||
tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)
|
||||
tk_graph_filtered.save_graph(
|
||||
ret = cast(tuple[TokenGraph], pipe_graph_postprocessing.run(starting_values=(tk_graph,)))
|
||||
tk_graph_filtered = ret[0]
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT, None)
|
||||
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
tk_graph_filtered.to_GraphML(
|
||||
SAVE_PATH_FOLDER, filename='TokenGraph-filtered', directed=False
|
||||
)
|
||||
tk_graph_filtered.to_pickle(
|
||||
SAVE_PATH_FOLDER, filename=f'{pipe_token_analysis.name}-TokenGraph-filtered'
|
||||
)
|
||||
|
||||
return tk_graph_filtered
|
||||
|
||||
|
||||
def run_time_analysis() -> tuple[TimelineCandidates, dict[ObjectID, str]]:
|
||||
filename = 'without_nan'
|
||||
loading_path = INPUT_PATH_FOLDER.joinpath(filename).with_suffix('.pkl')
|
||||
verify_path(loading_path)
|
||||
ret = load_pickle(loading_path)
|
||||
preprocessed_data = ret[0]
|
||||
# ** time analysis
|
||||
def run_time_analysis() -> None:
|
||||
# load entry point
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
|
||||
loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
|
||||
preprocessed_data = loaded_results[0]
|
||||
|
||||
ret = cast(
|
||||
_ = cast(
|
||||
tuple[TimelineCandidates, dict[ObjectID, str]],
|
||||
pipe_timeline.run(starting_values=(preprocessed_data,)),
|
||||
)
|
||||
return ret
|
||||
|
||||
|
||||
def verify_path(
|
||||
loading_path: Path,
|
||||
) -> None:
|
||||
if not loading_path.exists():
|
||||
raise FileNotFoundError(f'Could not load results. File not found: {loading_path}')
|
||||
def build_pipeline_container() -> PipelineContainer:
|
||||
container = PipelineContainer(
|
||||
name='Pipeline-Container-Base', working_dir=SAVE_PATH_FOLDER
|
||||
)
|
||||
container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
|
||||
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
|
||||
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
|
||||
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
|
||||
|
||||
return container
|
||||
|
||||
|
||||
def main() -> None:
|
||||
pre_step_skipped: bool = False
|
||||
# ** preprocess
|
||||
if DO_PREPROCESSING and not SKIP_PREPROCESSING:
|
||||
preprocessed_data = run_preprocessing()
|
||||
elif not SKIP_PREPROCESSING:
|
||||
# !! hardcoded result filenames
|
||||
target_pattern: str = r'*Pipe-Merge_Duplicates_Step-1*'
|
||||
loading_path = list(SAVE_PATH_FOLDER.glob(target_pattern))[0]
|
||||
verify_path(loading_path)
|
||||
ret = typing.cast(tuple[DataFrame], load_pickle(loading_path))
|
||||
preprocessed_data = ret[0]
|
||||
else:
|
||||
pre_step_skipped = True
|
||||
warnings.warn('No preprocessing action selected. Skipped.')
|
||||
# sys.exit(0)
|
||||
# ** token analysis
|
||||
if DO_TOKEN_ANALYSIS and not SKIP_TOKEN_ANALYSIS:
|
||||
if pre_step_skipped:
|
||||
raise RuntimeError(
|
||||
'Preprocessing step skipped. Token analysis cannot be performed.'
|
||||
)
|
||||
preprocessed_data_trunc = typing.cast(
|
||||
DataFrame, preprocessed_data[['batched_idxs', 'entry', 'num_occur']].copy()
|
||||
) # type: ignore
|
||||
tk_graph, docs_mapping = run_token_analysis(preprocessed_data_trunc)
|
||||
elif not SKIP_TOKEN_ANALYSIS:
|
||||
# !! hardcoded result filenames
|
||||
# whole graph
|
||||
filename: str = f'{pipe_token_analysis.name}-TokenGraph'
|
||||
loading_path = SAVE_PATH_FOLDER.joinpath(filename).with_suffix('.pkl')
|
||||
verify_path(loading_path)
|
||||
# tk_graph = typing.cast(TokenGraph, load_pickle(loading_path))
|
||||
tk_graph = TokenGraph.from_pickle(loading_path)
|
||||
pre_step_skipped = False
|
||||
else:
|
||||
pre_step_skipped = True
|
||||
warnings.warn('No token analysis action selected. Skipped.')
|
||||
# ** graph postprocessing
|
||||
if DO_GRAPH_POSTPROCESSING and not SKIP_GRAPH_POSTPROCESSING:
|
||||
if pre_step_skipped:
|
||||
raise RuntimeError(
|
||||
(
|
||||
'Preprocessing or token analysis step skipped. '
|
||||
'Graph postprocessing cannot be performed.'
|
||||
)
|
||||
)
|
||||
tk_graph_filtered = run_graph_postprocessing(tk_graph)
|
||||
elif not SKIP_GRAPH_POSTPROCESSING:
|
||||
# !! hardcoded result filenames
|
||||
# filtered graph
|
||||
filename: str = f'{pipe_token_analysis.name}-TokenGraph-filtered'
|
||||
loading_path = SAVE_PATH_FOLDER.joinpath(filename).with_suffix('.pkl')
|
||||
verify_path(loading_path)
|
||||
# tk_graph_filtered = typing.cast(TokenGraph, load_pickle(loading_path))
|
||||
tk_graph_filtered = TokenGraph.from_pickle(loading_path)
|
||||
pre_step_skipped = False
|
||||
else:
|
||||
warnings.warn('No graph postprocessing action selected. Skipped.')
|
||||
# ** time analysis
|
||||
if DO_TIME_ANALYSIS and not SKIP_TIME_ANALYSIS:
|
||||
# no check for fails, runs separately
|
||||
ret = run_time_analysis()
|
||||
elif not SKIP_TIME_ANALYSIS:
|
||||
...
|
||||
else:
|
||||
warnings.warn('No time analysis action selected. Skipped.')
|
||||
procedure = build_pipeline_container()
|
||||
procedure.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,190 +0,0 @@
|
||||
import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import cast
|
||||
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
from dash import (
|
||||
Dash,
|
||||
Input,
|
||||
Output,
|
||||
State,
|
||||
callback,
|
||||
dash_table,
|
||||
dcc,
|
||||
html,
|
||||
)
|
||||
from lang_main.io import load_pickle
|
||||
from lang_main.types import ObjectID, TimelineCandidates
|
||||
from pandas import DataFrame
|
||||
|
||||
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
|
||||
# ** data
|
||||
p_df = Path(r'./Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
|
||||
p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
|
||||
ret = cast(DataFrame, load_pickle(p_df))
|
||||
data = ret[0]
|
||||
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
|
||||
cands = ret[0]
|
||||
texts = ret[1]
|
||||
|
||||
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
|
||||
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
|
||||
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
|
||||
# data = cast(DataFrame, load_pickle(p_df))
|
||||
# cands = cast(TimelineCandidates, load_pickle(p_cands))
|
||||
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
|
||||
|
||||
table_feats = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
'VorgangsTypName',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
table_feats_dates = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
]
|
||||
|
||||
# ** graph config
|
||||
markers = {
|
||||
'size': 12,
|
||||
'color': 'yellow',
|
||||
'line': {
|
||||
'width': 2,
|
||||
'color': 'red',
|
||||
},
|
||||
}
|
||||
hover_data = {
|
||||
'ErstellungsDatum': '|%d.%m.%Y',
|
||||
'VorgangsBeschreibung': True,
|
||||
}
|
||||
|
||||
|
||||
app = Dash(prevent_initial_callbacks=True)
|
||||
|
||||
app.layout = [
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
]
|
||||
),
|
||||
html.Div(children=[dash_table.DataTable(id='table-candidates')]),
|
||||
]
|
||||
|
||||
|
||||
@callback(
|
||||
Output('object_text', 'children'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_obj_text(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
obj_text = texts[obj_id]
|
||||
headline = f'HObjektText: {obj_text}'
|
||||
return headline
|
||||
|
||||
|
||||
@callback(
|
||||
Output('choice-candidates', 'options'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
cands_obj_id = cands[obj_id]
|
||||
choices = list(range(1, len(cands_obj_id) + 1))
|
||||
return choices
|
||||
|
||||
|
||||
@callback(
|
||||
Output('graph-output', 'figure'),
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_timeline(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
# title
|
||||
obj_text = texts[obj_id]
|
||||
title = f'HObjektText: {obj_text}'
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
# figure
|
||||
fig = px.line(
|
||||
data_frame=df,
|
||||
x='ErstellungsDatum',
|
||||
y='ObjektID',
|
||||
title=title,
|
||||
hover_data=hover_data,
|
||||
)
|
||||
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
|
||||
fig.update_xaxes(
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
)
|
||||
fig.update_yaxes(type='category')
|
||||
fig.update_layout(hovermode='x unified')
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_table_candidates(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
df = df.filter(items=table_feats, axis=1).sort_values(
|
||||
by='ErstellungsDatum', ascending=True
|
||||
)
|
||||
cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# convert dates to strings
|
||||
for col in table_feats_dates:
|
||||
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
table_data = df.to_dict('records')
|
||||
return table_data, cols
|
||||
|
||||
|
||||
def _start_webbrowser():
|
||||
host = '127.0.0.1'
|
||||
port = '8050'
|
||||
adress = f'http://{host}:{port}/'
|
||||
time.sleep(2)
|
||||
webbrowser.open_new(adress)
|
||||
|
||||
|
||||
def main():
|
||||
webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
|
||||
webbrowser_thread.start()
|
||||
app.run(debug=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,9 +1,9 @@
|
||||
import copy
|
||||
import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import cast
|
||||
import copy
|
||||
|
||||
import dash_cytoscape as cyto
|
||||
from dash import Dash, Input, Output, State, dcc, html
|
||||
@@ -30,20 +30,20 @@ app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
cose_layout = {
|
||||
'name': 'cose',
|
||||
'nodeOverlap': 20,
|
||||
'nodeOverlap': 500,
|
||||
'refresh': 20,
|
||||
'fit': True,
|
||||
'padding': 30,
|
||||
'randomize': True,
|
||||
'componentSpacing': 40,
|
||||
'nodeRepulsion': 2000,
|
||||
'padding': 20,
|
||||
'randomize': False,
|
||||
'componentSpacing': 1.2,
|
||||
'nodeRepulsion': 1000,
|
||||
'edgeElasticity': 1000,
|
||||
'idealEdgeLength': 100,
|
||||
'nestingFactor': 1.2,
|
||||
'gravity': 50,
|
||||
'numIter': 2000,
|
||||
'initialTemp': 1000,
|
||||
'coolingFactor': 0.95,
|
||||
'numIter': 3000,
|
||||
'initialTemp': 2000,
|
||||
'coolingFactor': 0.7,
|
||||
'minTemp': 1.0,
|
||||
'nodeDimensionsIncludeLabels': True,
|
||||
}
|
||||
@@ -108,9 +108,8 @@ my_stylesheet = [
|
||||
# {'selector': '.triangle', 'style': {'shape': 'triangle'}},
|
||||
]
|
||||
|
||||
app.layout = html.Div(
|
||||
layout = html.Div(
|
||||
[
|
||||
html.Button('Trigger JS Layout', id='test_js'),
|
||||
html.Button('Trigger JS Weight', id='test_js_weight'),
|
||||
html.Div(id='output'),
|
||||
html.Div(
|
||||
@@ -166,11 +165,13 @@ app.layout = html.Div(
|
||||
style={'width': '40%'},
|
||||
),
|
||||
html.H3('Graph'),
|
||||
html.Button('Re-Layout', id='trigger_relayout'),
|
||||
html.Div(
|
||||
[
|
||||
cyto.Cytoscape(
|
||||
id='cytoscape-graph',
|
||||
style={'width': '100%', 'height': '600px'},
|
||||
layout=cose_layout,
|
||||
stylesheet=my_stylesheet,
|
||||
elements=cyto_data_base,
|
||||
zoom=1,
|
||||
@@ -192,6 +193,9 @@ app.layout = html.Div(
|
||||
)
|
||||
|
||||
|
||||
app.layout = layout
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('layout_choice', 'value'),
|
||||
@@ -266,17 +270,17 @@ app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, layout) {
|
||||
layout.edgeElasticity = function(edge) {
|
||||
return edge.data().weight * 4;
|
||||
return edge.data().weight * 0.05;
|
||||
};
|
||||
layout.idealEdgeLength = function(edge) {
|
||||
return edge.data().weight * 0.8;
|
||||
return edge.data().weight * 0.4;
|
||||
};
|
||||
cy.layout(layout).run();
|
||||
return layout;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('test_js', 'n_clicks'),
|
||||
Input('trigger_relayout', 'n_clicks'),
|
||||
State('cytoscape-graph', 'layout'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
|
||||
@@ -1,368 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import dash
|
||||
import dash_cytoscape as cyto
|
||||
from dash import Input, Output, State, callback, dcc, html
|
||||
|
||||
# Load extra layouts
|
||||
cyto.load_extra_layouts()
|
||||
|
||||
|
||||
# Display utility functions
|
||||
def _merge(a, b):
|
||||
return dict(a, **b)
|
||||
|
||||
|
||||
def _omit(omitted_keys, d):
|
||||
return {k: v for k, v in d.items() if k not in omitted_keys}
|
||||
|
||||
|
||||
# Custom Display Components
|
||||
def Card(children, **kwargs):
|
||||
return html.Section(
|
||||
children,
|
||||
style=_merge(
|
||||
{
|
||||
'padding': 20,
|
||||
'margin': 5,
|
||||
'borderRadius': 5,
|
||||
'border': 'thin lightgrey solid',
|
||||
'background-color': 'white',
|
||||
# Remove possibility to select the text for better UX
|
||||
'user-select': 'none',
|
||||
'-moz-user-select': 'none',
|
||||
'-webkit-user-select': 'none',
|
||||
'-ms-user-select': 'none',
|
||||
},
|
||||
kwargs.get('style', {}),
|
||||
),
|
||||
**_omit(['style'], kwargs),
|
||||
)
|
||||
|
||||
|
||||
def SectionTitle(title, size, align='center', color='#222'):
|
||||
return html.Div(
|
||||
style={'text-align': align, 'color': color},
|
||||
children=dcc.Markdown('#' * size + ' ' + title),
|
||||
)
|
||||
|
||||
|
||||
def NamedCard(title, size, children, **kwargs):
|
||||
size = min(size, 6)
|
||||
size = max(size, 1)
|
||||
|
||||
return html.Div([Card([SectionTitle(title, size, align='left')] + children, **kwargs)])
|
||||
|
||||
|
||||
def NamedSlider(name, **kwargs):
|
||||
return html.Div(
|
||||
style={'padding': '20px 10px 25px 4px'},
|
||||
children=[
|
||||
html.P(f'{name}:'),
|
||||
html.Div(style={'margin-left': '6px'}, children=dcc.Slider(**kwargs)),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def NamedDropdown(name, **kwargs):
|
||||
return html.Div(
|
||||
style={'margin': '10px 0px'},
|
||||
children=[
|
||||
html.P(children=f'{name}:', style={'margin-left': '3px'}),
|
||||
dcc.Dropdown(**kwargs),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def NamedRadioItems(name, **kwargs):
|
||||
return html.Div(
|
||||
style={'padding': '20px 10px 25px 4px'},
|
||||
children=[html.P(children=f'{name}:'), dcc.RadioItems(**kwargs)],
|
||||
)
|
||||
|
||||
|
||||
def NamedInput(name, **kwargs):
|
||||
return html.Div(children=[html.P(children=f'{name}:'), dcc.Input(**kwargs)])
|
||||
|
||||
|
||||
# Utils
|
||||
def DropdownOptionsList(*args):
|
||||
return [{'label': val.capitalize(), 'value': val} for val in args]
|
||||
|
||||
|
||||
asset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'assets')
|
||||
|
||||
app = dash.Dash(__name__, assets_folder=asset_path)
|
||||
server = app.server
|
||||
|
||||
|
||||
# ###################### DATA PREPROCESSING ######################
|
||||
# Load data
|
||||
with open('sample_network.txt', 'r', encoding='utf-8') as f:
|
||||
network_data = f.read().split('\n')
|
||||
|
||||
# We select the first 750 edges and associated nodes for an easier visualization
|
||||
edges = network_data[:750]
|
||||
nodes = set()
|
||||
|
||||
following_node_di = {} # user id -> list of users they are following
|
||||
following_edges_di = {} # user id -> list of cy edges starting from user id
|
||||
|
||||
followers_node_di = {} # user id -> list of followers (cy_node format)
|
||||
followers_edges_di = {} # user id -> list of cy edges ending at user id
|
||||
|
||||
cy_edges = []
|
||||
cy_nodes = []
|
||||
|
||||
for edge in edges:
|
||||
if ' ' not in edge:
|
||||
continue
|
||||
|
||||
source, target = edge.split(' ')
|
||||
|
||||
cy_edge = {'data': {'id': source + target, 'source': source, 'target': target}}
|
||||
cy_target = {'data': {'id': target, 'label': 'User #' + str(target[-5:])}}
|
||||
cy_source = {'data': {'id': source, 'label': 'User #' + str(source[-5:])}}
|
||||
|
||||
if source not in nodes:
|
||||
nodes.add(source)
|
||||
cy_nodes.append(cy_source)
|
||||
if target not in nodes:
|
||||
nodes.add(target)
|
||||
cy_nodes.append(cy_target)
|
||||
|
||||
# Process dictionary of following
|
||||
if not following_node_di.get(source):
|
||||
following_node_di[source] = []
|
||||
if not following_edges_di.get(source):
|
||||
following_edges_di[source] = []
|
||||
|
||||
following_node_di[source].append(cy_target)
|
||||
following_edges_di[source].append(cy_edge)
|
||||
|
||||
# Process dictionary of followers
|
||||
if not followers_node_di.get(target):
|
||||
followers_node_di[target] = []
|
||||
if not followers_edges_di.get(target):
|
||||
followers_edges_di[target] = []
|
||||
|
||||
followers_node_di[target].append(cy_source)
|
||||
followers_edges_di[target].append(cy_edge)
|
||||
|
||||
genesis_node = cy_nodes[0]
|
||||
genesis_node['classes'] = 'genesis'
|
||||
default_elements = [genesis_node]
|
||||
|
||||
default_stylesheet = [
|
||||
{'selector': 'node', 'style': {'opacity': 0.65, 'z-index': 9999}},
|
||||
{
|
||||
'selector': 'edge',
|
||||
'style': {'curve-style': 'bezier', 'opacity': 0.45, 'z-index': 5000},
|
||||
},
|
||||
{'selector': '.followerNode', 'style': {'background-color': '#0074D9'}},
|
||||
{
|
||||
'selector': '.followerEdge',
|
||||
'style': {
|
||||
'mid-target-arrow-color': 'blue',
|
||||
'mid-target-arrow-shape': 'vee',
|
||||
'line-color': '#0074D9',
|
||||
},
|
||||
},
|
||||
{'selector': '.followingNode', 'style': {'background-color': '#FF4136'}},
|
||||
{
|
||||
'selector': '.followingEdge',
|
||||
'style': {
|
||||
'mid-target-arrow-color': 'red',
|
||||
'mid-target-arrow-shape': 'vee',
|
||||
'line-color': '#FF4136',
|
||||
},
|
||||
},
|
||||
{
|
||||
'selector': '.genesis',
|
||||
'style': {
|
||||
'background-color': '#B10DC9',
|
||||
'border-width': 2,
|
||||
'border-color': 'purple',
|
||||
'border-opacity': 1,
|
||||
'opacity': 1,
|
||||
'label': 'data(label)',
|
||||
'color': '#B10DC9',
|
||||
'text-opacity': 1,
|
||||
'font-size': 12,
|
||||
'z-index': 9999,
|
||||
},
|
||||
},
|
||||
{
|
||||
'selector': ':selected',
|
||||
'style': {
|
||||
'border-width': 2,
|
||||
'border-color': 'black',
|
||||
'border-opacity': 1,
|
||||
'opacity': 1,
|
||||
'label': 'data(label)',
|
||||
'color': 'black',
|
||||
'font-size': 12,
|
||||
'z-index': 9999,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# ################################# APP LAYOUT ################################
|
||||
styles = {
|
||||
'json-output': {
|
||||
'overflow-y': 'scroll',
|
||||
'height': 'calc(50% - 25px)',
|
||||
'border': 'thin lightgrey solid',
|
||||
},
|
||||
'tab': {'height': 'calc(98vh - 80px)'},
|
||||
}
|
||||
|
||||
app.layout = html.Div(
|
||||
[
|
||||
html.Div(
|
||||
className='eight columns',
|
||||
children=[
|
||||
cyto.Cytoscape(
|
||||
id='cytoscape',
|
||||
elements=default_elements,
|
||||
stylesheet=default_stylesheet,
|
||||
style={'height': '95vh', 'width': '100%'},
|
||||
)
|
||||
],
|
||||
),
|
||||
html.Div(
|
||||
className='four columns',
|
||||
children=[
|
||||
dcc.Tabs(
|
||||
id='tabs',
|
||||
children=[
|
||||
dcc.Tab(
|
||||
label='Control Panel',
|
||||
children=[
|
||||
NamedDropdown(
|
||||
name='Layout',
|
||||
id='dropdown-layout',
|
||||
options=DropdownOptionsList(
|
||||
'random',
|
||||
'grid',
|
||||
'circle',
|
||||
'concentric',
|
||||
'breadthfirst',
|
||||
'cose',
|
||||
'cose-bilkent',
|
||||
'dagre',
|
||||
'cola',
|
||||
'klay',
|
||||
'spread',
|
||||
'euler',
|
||||
),
|
||||
value='grid',
|
||||
clearable=False,
|
||||
),
|
||||
NamedRadioItems(
|
||||
name='Expand',
|
||||
id='radio-expand',
|
||||
options=DropdownOptionsList('followers', 'following'),
|
||||
value='followers',
|
||||
),
|
||||
],
|
||||
),
|
||||
dcc.Tab(
|
||||
label='JSON',
|
||||
children=[
|
||||
html.Div(
|
||||
style=styles['tab'],
|
||||
children=[
|
||||
html.P('Node Object JSON:'),
|
||||
html.Pre(
|
||||
id='tap-node-json-output',
|
||||
style=styles['json-output'],
|
||||
),
|
||||
html.P('Edge Object JSON:'),
|
||||
html.Pre(
|
||||
id='tap-edge-json-output',
|
||||
style=styles['json-output'],
|
||||
),
|
||||
],
|
||||
)
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
# ############################## CALLBACKS ####################################
|
||||
@callback(Output('tap-node-json-output', 'children'), Input('cytoscape', 'tapNode'))
|
||||
def display_tap_node(data):
|
||||
return json.dumps(data, indent=2)
|
||||
|
||||
|
||||
@callback(Output('tap-edge-json-output', 'children'), Input('cytoscape', 'tapEdge'))
|
||||
def display_tap_edge(data):
|
||||
return json.dumps(data, indent=2)
|
||||
|
||||
|
||||
@callback(Output('cytoscape', 'layout'), Input('dropdown-layout', 'value'))
|
||||
def update_cytoscape_layout(layout):
|
||||
return {'name': layout}
|
||||
|
||||
|
||||
@callback(
|
||||
Output('cytoscape', 'elements'),
|
||||
Input('cytoscape', 'tapNodeData'),
|
||||
State('cytoscape', 'elements'),
|
||||
State('radio-expand', 'value'),
|
||||
)
|
||||
def generate_elements(nodeData, elements, expansion_mode):
|
||||
if not nodeData:
|
||||
return default_elements
|
||||
|
||||
# If the node has already been expanded, we don't expand it again
|
||||
if nodeData.get('expanded'):
|
||||
return elements
|
||||
|
||||
# This retrieves the currently selected element, and tag it as expanded
|
||||
for element in elements:
|
||||
if nodeData['id'] == element.get('data').get('id'):
|
||||
element['data']['expanded'] = True
|
||||
break
|
||||
|
||||
if expansion_mode == 'followers':
|
||||
followers_nodes = followers_node_di.get(nodeData['id'])
|
||||
followers_edges = followers_edges_di.get(nodeData['id'])
|
||||
|
||||
if followers_nodes:
|
||||
for node in followers_nodes:
|
||||
node['classes'] = 'followerNode'
|
||||
elements.extend(followers_nodes)
|
||||
|
||||
if followers_edges:
|
||||
for follower_edge in followers_edges:
|
||||
follower_edge['classes'] = 'followerEdge'
|
||||
elements.extend(followers_edges)
|
||||
|
||||
elif expansion_mode == 'following':
|
||||
following_nodes = following_node_di.get(nodeData['id'])
|
||||
following_edges = following_edges_di.get(nodeData['id'])
|
||||
|
||||
if following_nodes:
|
||||
for node in following_nodes:
|
||||
if node['data']['id'] != genesis_node['data']['id']:
|
||||
node['classes'] = 'followingNode'
|
||||
elements.append(node)
|
||||
|
||||
if following_edges:
|
||||
for follower_edge in following_edges:
|
||||
follower_edge['classes'] = 'followingEdge'
|
||||
elements.extend(following_edges)
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run_server(debug=True)
|
||||
File diff suppressed because it is too large
Load Diff
507
scripts/dashboard/timeline.py
Normal file
507
scripts/dashboard/timeline.py
Normal file
@@ -0,0 +1,507 @@
|
||||
import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import cast
|
||||
|
||||
import dash_cytoscape as cyto
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
from dash import (
|
||||
Dash,
|
||||
Input,
|
||||
Output,
|
||||
State,
|
||||
callback,
|
||||
dash_table,
|
||||
dcc,
|
||||
html,
|
||||
)
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.analysis import graphs
|
||||
from lang_main.io import load_pickle
|
||||
from lang_main.types import ObjectID, TimelineCandidates
|
||||
from lang_main.analysis import tokens
|
||||
from lang_main.constants import SPCY_MODEL
|
||||
|
||||
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
|
||||
# ** data
|
||||
# p_df = Path(r'../Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
|
||||
p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
|
||||
# p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
|
||||
p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
|
||||
ret = cast(tuple[DataFrame], load_pickle(p_df))
|
||||
data = ret[0]
|
||||
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
|
||||
cands = ret[0]
|
||||
texts = ret[1]
|
||||
|
||||
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
|
||||
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
|
||||
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
|
||||
# data = cast(DataFrame, load_pickle(p_df))
|
||||
# cands = cast(TimelineCandidates, load_pickle(p_cands))
|
||||
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
|
||||
|
||||
table_feats = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
'VorgangsTypName',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
table_feats_dates = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
]
|
||||
|
||||
# ** figure config
|
||||
markers = {
|
||||
'size': 12,
|
||||
'color': 'yellow',
|
||||
'line': {
|
||||
'width': 2,
|
||||
'color': 'red',
|
||||
},
|
||||
}
|
||||
hover_data = {
|
||||
'ErstellungsDatum': '|%d.%m.%Y',
|
||||
'VorgangsBeschreibung': True,
|
||||
}
|
||||
|
||||
# ** graphs
|
||||
target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
|
||||
p = Path(target).resolve()
|
||||
ret = load_pickle(p)
|
||||
tk_graph = cast(graphs.TokenGraph, ret[0])
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(150, None)
|
||||
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
cyto_data_base, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
|
||||
MIN_WEIGHT = weight_data['min']
|
||||
MAX_WEIGHT = weight_data['max']
|
||||
|
||||
cyto.load_extra_layouts()
|
||||
|
||||
cose_layout = {
|
||||
'name': 'cose',
|
||||
'nodeOverlap': 500,
|
||||
'refresh': 20,
|
||||
'fit': True,
|
||||
'padding': 20,
|
||||
'randomize': False,
|
||||
'componentSpacing': 1.2,
|
||||
'nodeRepulsion': 1000,
|
||||
'edgeElasticity': 1000,
|
||||
'idealEdgeLength': 100,
|
||||
'nestingFactor': 1.2,
|
||||
'gravity': 50,
|
||||
'numIter': 3000,
|
||||
'initialTemp': 2000,
|
||||
'coolingFactor': 0.7,
|
||||
'minTemp': 1.0,
|
||||
'nodeDimensionsIncludeLabels': True,
|
||||
}
|
||||
|
||||
my_stylesheet = [
|
||||
# Group selectors
|
||||
{
|
||||
'selector': 'node',
|
||||
'style': {
|
||||
'shape': 'circle',
|
||||
'content': 'data(label)',
|
||||
'background-color': '#B10DC9',
|
||||
'border-width': 2,
|
||||
'border-color': 'black',
|
||||
'border-opacity': 1,
|
||||
'opacity': 1,
|
||||
'color': 'black',
|
||||
'text-opacity': 1,
|
||||
'font-size': 12,
|
||||
'z-index': 9999,
|
||||
},
|
||||
},
|
||||
{
|
||||
'selector': 'edge',
|
||||
'style': {
|
||||
#'width': f'mapData(weight, {MIN_WEIGHT}, {MAX_WEIGHT}, 1, 10)',
|
||||
# 'width': """function(ele) {
|
||||
# return ele.data('weight');
|
||||
# """,
|
||||
'curve-style': 'bezier',
|
||||
'line-color': 'grey',
|
||||
'line-style': 'solid',
|
||||
'line-opacity': 1,
|
||||
},
|
||||
},
|
||||
# Class selectors
|
||||
# {'selector': '.red', 'style': {'background-color': 'red', 'line-color': 'red'}},
|
||||
# {'selector': '.triangle', 'style': {'shape': 'triangle'}},
|
||||
]
|
||||
|
||||
# ** app
|
||||
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
||||
app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
graph_layout = html.Div(
|
||||
[
|
||||
html.Button('Trigger JS Weight', id='test_js_weight'),
|
||||
html.Button('Trigger Candidate Graph', id='cand_graph'),
|
||||
html.Div(id='output'),
|
||||
html.Div(
|
||||
[
|
||||
html.H2('Token Graph', style={'margin': 0}),
|
||||
html.Button(
|
||||
'Reset Default',
|
||||
id='bt-reset',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
},
|
||||
),
|
||||
],
|
||||
style={
|
||||
'display': 'flex',
|
||||
'marginBottom': '1em',
|
||||
},
|
||||
),
|
||||
html.H3('Layout'),
|
||||
dcc.Dropdown(
|
||||
id='layout_choice',
|
||||
options=[
|
||||
'cose',
|
||||
'cola',
|
||||
'euler',
|
||||
'random',
|
||||
],
|
||||
value='cose',
|
||||
clearable=False,
|
||||
),
|
||||
html.Div(
|
||||
[
|
||||
html.H3('Graph Filter'),
|
||||
dcc.Input(
|
||||
id='weight_min',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
step=1,
|
||||
placeholder=f'Minimum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
|
||||
debounce=True,
|
||||
style={'width': '40%'},
|
||||
),
|
||||
dcc.Input(
|
||||
id='weight_max',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
step=1,
|
||||
placeholder=f'Maximum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
|
||||
debounce=True,
|
||||
style={'width': '40%'},
|
||||
),
|
||||
html.H3('Graph'),
|
||||
html.Button('Re-Layout', id='trigger_relayout'),
|
||||
html.Div(
|
||||
[
|
||||
cyto.Cytoscape(
|
||||
id='cytoscape-graph',
|
||||
style={'width': '100%', 'height': '600px'},
|
||||
layout=cose_layout,
|
||||
stylesheet=my_stylesheet,
|
||||
elements=cyto_data_base,
|
||||
zoom=1,
|
||||
),
|
||||
],
|
||||
style={
|
||||
'border': '3px solid black',
|
||||
'borderRadius': '25px',
|
||||
'marginTop': '1em',
|
||||
'marginBottom': '2em',
|
||||
'padding': '7px',
|
||||
},
|
||||
),
|
||||
],
|
||||
style={'marginTop': '1em'},
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
app.layout = html.Div(
|
||||
[
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
[dash_table.DataTable(id='table-candidates')], style={'marginBottom': '2em'}
|
||||
),
|
||||
graph_layout,
|
||||
],
|
||||
style={'margin': '2em'},
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('object_text', 'children'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_obj_text(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
obj_text = texts[obj_id]
|
||||
headline = f'HObjektText: {obj_text}'
|
||||
return headline
|
||||
|
||||
|
||||
@callback(
|
||||
Output('choice-candidates', 'options'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
cands_obj_id = cands[obj_id]
|
||||
choices = list(range(1, len(cands_obj_id) + 1))
|
||||
return choices
|
||||
|
||||
|
||||
@callback(
|
||||
Output('graph-output', 'figure'),
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_timeline(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
# title
|
||||
obj_text = texts[obj_id]
|
||||
title = f'HObjektText: {obj_text}'
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
# figure
|
||||
fig = px.line(
|
||||
data_frame=df,
|
||||
x='ErstellungsDatum',
|
||||
y='ObjektID',
|
||||
title=title,
|
||||
hover_data=hover_data,
|
||||
)
|
||||
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
|
||||
fig.update_xaxes(
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
)
|
||||
fig.update_yaxes(type='category')
|
||||
fig.update_layout(hovermode='x unified')
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_table_candidates(index, obj_id):
|
||||
# obj_id = int(obj_id)
|
||||
# # cands
|
||||
# cands_obj_id = cands[obj_id]
|
||||
# cands_choice = cands_obj_id[int(index) - 1]
|
||||
# # data
|
||||
# df = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
df = df.filter(items=table_feats, axis=1).sort_values(
|
||||
by='ErstellungsDatum', ascending=True
|
||||
)
|
||||
cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# convert dates to strings
|
||||
for col in table_feats_dates:
|
||||
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
table_data = df.to_dict('records')
|
||||
return table_data, cols
|
||||
|
||||
|
||||
def pre_filter_data(
|
||||
data: DataFrame,
|
||||
idx: int,
|
||||
obj_id: ObjectID,
|
||||
) -> DataFrame:
|
||||
obj_id = int(obj_id)
|
||||
data = data.copy()
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(idx) - 1]
|
||||
# data
|
||||
data = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# ** graph
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('weight_min', 'min', allow_duplicate=True),
|
||||
Output('weight_min', 'max', allow_duplicate=True),
|
||||
Output('weight_min', 'placeholder', allow_duplicate=True),
|
||||
Output('weight_max', 'min', allow_duplicate=True),
|
||||
Output('weight_max', 'max', allow_duplicate=True),
|
||||
Output('weight_max', 'placeholder', allow_duplicate=True),
|
||||
Input('cand_graph', 'n_clicks'),
|
||||
State('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_graph_candidates(_, index, obj_id):
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
tk_graph_cands, _ = tokens.build_token_graph(
|
||||
data=df,
|
||||
model=SPCY_MODEL,
|
||||
target_feature='VorgangsBeschreibung',
|
||||
build_map=False,
|
||||
)
|
||||
cyto_data, weight_info = graphs.convert_graph_to_cytoscape(tk_graph_cands)
|
||||
weight_min = weight_info['min']
|
||||
weight_max = weight_info['max']
|
||||
placeholder_min = f'Minimum edge weight: {weight_min} - {weight_max}'
|
||||
placeholder_max = f'Minimum edge weight: {weight_min} - {weight_max}'
|
||||
return (
|
||||
cyto_data,
|
||||
weight_min,
|
||||
weight_max,
|
||||
placeholder_min,
|
||||
weight_min,
|
||||
weight_max,
|
||||
placeholder_max,
|
||||
)
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('layout_choice', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_layout_internal(layout_choice):
|
||||
# return {'name': layout_choice}
|
||||
return cose_layout
|
||||
# return cose_bilkent_layout
|
||||
# return cola_layout
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'zoom'),
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('weight_min', 'value'),
|
||||
Output('weight_max', 'value'),
|
||||
Input('bt-reset', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_layout(n_clicks):
|
||||
return (1, cyto_data_base, None, None)
|
||||
|
||||
|
||||
# update edge weight
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Input('weight_min', 'value'),
|
||||
Input('weight_max', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_edge_weight(weight_min, weight_max):
|
||||
if not any([weight_min, weight_max]):
|
||||
return cyto_data_base
|
||||
|
||||
if weight_min is None:
|
||||
weight_min = MIN_WEIGHT
|
||||
if weight_max is None:
|
||||
weight_max = MAX_WEIGHT
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, weight_min, weight_max)
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
cyto_data, _ = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
return cyto_data
|
||||
|
||||
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, layout) {
|
||||
layout.edgeElasticity = function(edge) {
|
||||
return edge.data().weight * 0.05;
|
||||
};
|
||||
layout.idealEdgeLength = function(edge) {
|
||||
return edge.data().weight * 0.4;
|
||||
};
|
||||
cy.layout(layout).run();
|
||||
return layout;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('trigger_relayout', 'n_clicks'),
|
||||
State('cytoscape-graph', 'layout'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, stylesheet) {
|
||||
function edge_weight(ele) {
|
||||
let threshold = 1000;
|
||||
let weight = ele.data('weight');
|
||||
if (weight > threshold) {
|
||||
weight = 12;
|
||||
} else {
|
||||
weight = weight / threshold * 10;
|
||||
weight = Math.max(1, weight);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
stylesheet[1].style.width = edge_weight;
|
||||
cy.style(stylesheet).update();
|
||||
return stylesheet;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'stylesheet'),
|
||||
Input('test_js_weight', 'n_clicks'),
|
||||
State('cytoscape-graph', 'stylesheet'),
|
||||
prevent_initial_call=False,
|
||||
)
|
||||
|
||||
|
||||
def _start_webbrowser():
|
||||
host = '127.0.0.1'
|
||||
port = '8050'
|
||||
adress = f'http://{host}:{port}/'
|
||||
time.sleep(2)
|
||||
webbrowser.open_new(adress)
|
||||
|
||||
|
||||
def main():
|
||||
webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
|
||||
webbrowser_thread.start()
|
||||
app.run(debug=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,38 +0,0 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = 'A:/Arbeitsaufgaben/lang-main/scripts'
|
||||
results = 'A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/'
|
||||
dataset = 'A:/Arbeitsaufgaben/lang-main/data/02_202307/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
[control]
|
||||
preprocessing = true
|
||||
preprocessing_skip = false
|
||||
token_analysis = false
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing = false
|
||||
graph_postprocessing_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis]
|
||||
threshold_unique_texts = 5
|
||||
@@ -2,22 +2,20 @@
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_20240529/'
|
||||
results = './results/test_20240619/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing = false
|
||||
preprocessing_skip = false
|
||||
token_analysis = true
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing = false
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing_skip = true
|
||||
time_analysis = false
|
||||
time_analysis_skip = true
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
Reference in New Issue
Block a user