restructuring project
This commit is contained in:
@@ -1,193 +0,0 @@
|
||||
import cProfile
|
||||
import pstats
|
||||
import typing
|
||||
from pathlib import Path
|
||||
from typing import Final, cast
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.analysis.graphs import (
|
||||
Graph,
|
||||
TokenGraph,
|
||||
save_to_GraphML,
|
||||
)
|
||||
from lang_main.constants import (
|
||||
CYTO_BASE_NETWORK_NAME,
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
SKIP_GRAPH_POSTPROCESSING,
|
||||
SKIP_GRAPH_RESCALING,
|
||||
SKIP_GRAPH_STATIC_RENDERING,
|
||||
SKIP_PREPROCESSING,
|
||||
SKIP_TIME_ANALYSIS,
|
||||
SKIP_TOKEN_ANALYSIS,
|
||||
)
|
||||
from lang_main.io import create_saving_folder, get_entry_point, load_pickle
|
||||
from lang_main.pipelines.base import PipelineContainer
|
||||
from lang_main.pipelines.predefined import (
|
||||
build_base_target_feature_pipe,
|
||||
build_merge_duplicates_pipe,
|
||||
build_timeline_pipe,
|
||||
build_tk_graph_pipe,
|
||||
build_tk_graph_post_pipe,
|
||||
build_tk_graph_render_pipe,
|
||||
build_tk_graph_rescaling_pipe,
|
||||
)
|
||||
from lang_main.types import (
|
||||
EntryPoints,
|
||||
ObjectID,
|
||||
PandasIndex,
|
||||
SpacyDoc,
|
||||
TimelineCandidates,
|
||||
)
|
||||
|
||||
# ** profiling
|
||||
USE_PROFILING: Final[bool] = False
|
||||
ONLY_PROFILING_REPORT: Final[bool] = False
|
||||
PROFILE_REPORT_NAME: Final[str] = 'prof_report.profdata'
|
||||
|
||||
# ** build pipelines
|
||||
pipe_target_feat = build_base_target_feature_pipe()
|
||||
pipe_merge = build_merge_duplicates_pipe()
|
||||
pipe_token_analysis = build_tk_graph_pipe()
|
||||
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
||||
pipe_graph_rescaling = build_tk_graph_rescaling_pipe(
|
||||
save_result=True,
|
||||
exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
|
||||
)
|
||||
pipe_static_graph_rendering = build_tk_graph_render_pipe(
|
||||
with_subgraphs=True,
|
||||
base_network_name=CYTO_BASE_NETWORK_NAME,
|
||||
)
|
||||
pipe_timeline = build_timeline_pipe()
|
||||
|
||||
|
||||
# ** preprocessing pipeline
|
||||
def run_preprocessing() -> None:
|
||||
create_saving_folder(
|
||||
saving_path_folder=SAVE_PATH_FOLDER,
|
||||
overwrite_existing=False,
|
||||
)
|
||||
# run pipelines
|
||||
ret = typing.cast(
|
||||
tuple[DataFrame], pipe_target_feat.run(starting_values=(PATH_TO_DATASET,))
|
||||
)
|
||||
target_feat_data = ret[0]
|
||||
_ = typing.cast(tuple[DataFrame], pipe_merge.run(starting_values=(target_feat_data,)))
|
||||
|
||||
|
||||
# ** token analysis
|
||||
def run_token_analysis() -> None:
|
||||
# load entry point
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TOKEN_ANALYSIS)
|
||||
loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
|
||||
preprocessed_data = loaded_results[0]
|
||||
# build token graph
|
||||
(tk_graph, _) = typing.cast(
|
||||
tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
|
||||
pipe_token_analysis.run(starting_values=(preprocessed_data,)),
|
||||
)
|
||||
tk_graph.to_GraphML(SAVE_PATH_FOLDER, filename='TokenGraph', directed=False)
|
||||
|
||||
|
||||
def run_graph_postprocessing() -> None:
|
||||
# load entry point
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
|
||||
loaded_results = cast(
|
||||
tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
|
||||
load_pickle(entry_point_path),
|
||||
)
|
||||
tk_graph = loaded_results[0]
|
||||
# filter graph by edge weight and remove single nodes (no connection)
|
||||
ret = cast(tuple[TokenGraph], pipe_graph_postprocessing.run(starting_values=(tk_graph,)))
|
||||
tk_graph_filtered = ret[0]
|
||||
tk_graph_filtered.to_GraphML(
|
||||
SAVE_PATH_FOLDER, filename='TokenGraph-filtered', directed=False
|
||||
)
|
||||
|
||||
|
||||
def run_graph_edge_rescaling() -> None:
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_ANALYSIS)
|
||||
loaded_results = cast(
|
||||
tuple[TokenGraph],
|
||||
load_pickle(entry_point_path),
|
||||
)
|
||||
tk_graph = loaded_results[0]
|
||||
tk_graph_rescaled, tk_graph_rescaled_undirected = cast(
|
||||
tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,))
|
||||
)
|
||||
# tk_graph_rescaled = ret[0]
|
||||
# tk_graph_rescaled_undirected = ret[1]
|
||||
tk_graph_rescaled.to_GraphML(
|
||||
SAVE_PATH_FOLDER, filename='TokenGraph-directed-rescaled', directed=False
|
||||
)
|
||||
save_to_GraphML(
|
||||
tk_graph_rescaled_undirected,
|
||||
saving_path=SAVE_PATH_FOLDER,
|
||||
filename='TokenGraph-undirected-rescaled',
|
||||
)
|
||||
|
||||
|
||||
def run_static_graph_rendering() -> None:
|
||||
entry_point_path = get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
|
||||
)
|
||||
loaded_results = cast(
|
||||
tuple[TokenGraph, Graph],
|
||||
load_pickle(entry_point_path),
|
||||
)
|
||||
tk_graph_rescaled = loaded_results[0]
|
||||
tk_graph_rescaled_undirected = loaded_results[1]
|
||||
|
||||
_ = pipe_static_graph_rendering.run(starting_values=(tk_graph_rescaled_undirected,))
|
||||
|
||||
|
||||
# ** time analysis
|
||||
def run_time_analysis() -> None:
|
||||
# load entry point
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
|
||||
loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
|
||||
preprocessed_data = loaded_results[0]
|
||||
|
||||
_ = cast(
|
||||
tuple[TimelineCandidates, dict[ObjectID, str]],
|
||||
pipe_timeline.run(starting_values=(preprocessed_data,)),
|
||||
)
|
||||
|
||||
|
||||
def build_pipeline_container() -> PipelineContainer:
|
||||
container = PipelineContainer(
|
||||
name='Pipeline-Container-Base', working_dir=SAVE_PATH_FOLDER
|
||||
)
|
||||
container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
|
||||
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
|
||||
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
|
||||
container.add(run_graph_edge_rescaling, skip=SKIP_GRAPH_RESCALING)
|
||||
container.add(run_static_graph_rendering, skip=SKIP_GRAPH_STATIC_RENDERING)
|
||||
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
|
||||
|
||||
return container
|
||||
|
||||
|
||||
def main() -> None:
|
||||
procedure = build_pipeline_container()
|
||||
procedure.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
report_path = Path.cwd() / 'profiling'
|
||||
if not report_path.exists():
|
||||
report_path.mkdir(parents=True, exist_ok=True)
|
||||
report_file = report_path / PROFILE_REPORT_NAME
|
||||
if ONLY_PROFILING_REPORT:
|
||||
p_stats = pstats.Stats(str(report_file))
|
||||
p_stats.sort_stats(pstats.SortKey.CUMULATIVE).print_stats(60)
|
||||
p_stats.sort_stats('tottime').print_stats(60)
|
||||
elif USE_PROFILING:
|
||||
cProfile.run('main()', str(report_file))
|
||||
p_stats = pstats.Stats(str(report_file))
|
||||
p_stats.sort_stats(pstats.SortKey.CUMULATIVE).print_stats(30)
|
||||
p_stats.sort_stats('tottime').print_stats(30)
|
||||
else:
|
||||
main()
|
||||
1
scripts/build.ps1
Normal file
1
scripts/build.ps1
Normal file
@@ -0,0 +1 @@
|
||||
pdm build -d build/
|
||||
2
scripts/bump_prerelease_num.ps1
Normal file
2
scripts/bump_prerelease_num.ps1
Normal file
@@ -0,0 +1,2 @@
|
||||
pdm run bump-my-version bump pre_n
|
||||
pdm run bump-my-version show current_version
|
||||
2
scripts/bump_release_type.ps1
Normal file
2
scripts/bump_release_type.ps1
Normal file
@@ -0,0 +1,2 @@
|
||||
pdm run bump-my-version bump pre_l
|
||||
pdm run bump-my-version show current_version
|
||||
337
scripts/cyto.py
337
scripts/cyto.py
@@ -1,337 +0,0 @@
|
||||
import copy
|
||||
import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import cast
|
||||
|
||||
import dash_cytoscape as cyto
|
||||
from dash import Dash, Input, Output, State, dcc, html
|
||||
from dash.exceptions import PreventUpdate
|
||||
|
||||
import lang_main.io
|
||||
from lang_main.analysis import graphs
|
||||
|
||||
target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
|
||||
p = Path(target).resolve()
|
||||
ret = lang_main.io.load_pickle(p)
|
||||
tk_graph = cast(graphs.TokenGraph, ret[0])
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
cyto_data_base, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
|
||||
MIN_WEIGHT = weight_data['min']
|
||||
MAX_WEIGHT = weight_data['max']
|
||||
|
||||
|
||||
cyto.load_extra_layouts()
|
||||
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
||||
app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
cose_layout = {
|
||||
'name': 'cose',
|
||||
'nodeOverlap': 500,
|
||||
'refresh': 20,
|
||||
'fit': True,
|
||||
'padding': 20,
|
||||
'randomize': False,
|
||||
'componentSpacing': 1.2,
|
||||
'nodeRepulsion': 1000,
|
||||
'edgeElasticity': 1000,
|
||||
'idealEdgeLength': 100,
|
||||
'nestingFactor': 1.2,
|
||||
'gravity': 50,
|
||||
'numIter': 3000,
|
||||
'initialTemp': 2000,
|
||||
'coolingFactor': 0.7,
|
||||
'minTemp': 1.0,
|
||||
'nodeDimensionsIncludeLabels': True,
|
||||
}
|
||||
|
||||
cose_bilkent_layout = {
|
||||
'name': 'cose-bilkent',
|
||||
'nodeDimensionsIncludeLabels': True,
|
||||
'idealEdgeLength': 100,
|
||||
'edgeElasticity': 0.45,
|
||||
'nodeRepulsion': 10000,
|
||||
'nestingFactor': 0.1,
|
||||
'gravity': 0.25,
|
||||
'numIter': 2500,
|
||||
'initialTemp': 1000,
|
||||
'coolingFactor': 0.95,
|
||||
'minTemp': 1.0,
|
||||
}
|
||||
|
||||
cola_layout = {
|
||||
'name': 'cola',
|
||||
'nodeDimensionsIncludeLabels': True,
|
||||
'nodeSpacing': 30,
|
||||
'edgeLength': 45,
|
||||
'animate': True,
|
||||
'centerGraph': True,
|
||||
'randomize': False,
|
||||
}
|
||||
|
||||
my_stylesheet = [
|
||||
# Group selectors
|
||||
{
|
||||
'selector': 'node',
|
||||
'style': {
|
||||
'shape': 'circle',
|
||||
'content': 'data(label)',
|
||||
'background-color': '#B10DC9',
|
||||
'border-width': 2,
|
||||
'border-color': 'black',
|
||||
'border-opacity': 1,
|
||||
'opacity': 1,
|
||||
'color': 'black',
|
||||
'text-opacity': 1,
|
||||
'font-size': 12,
|
||||
'z-index': 9999,
|
||||
},
|
||||
},
|
||||
{
|
||||
'selector': 'edge',
|
||||
'style': {
|
||||
#'width': f'mapData(weight, {MIN_WEIGHT}, {MAX_WEIGHT}, 1, 10)',
|
||||
# 'width': """function(ele) {
|
||||
# return ele.data('weight');
|
||||
# """,
|
||||
'curve-style': 'bezier',
|
||||
'line-color': 'grey',
|
||||
'line-style': 'solid',
|
||||
'line-opacity': 1,
|
||||
},
|
||||
},
|
||||
# Class selectors
|
||||
# {'selector': '.red', 'style': {'background-color': 'red', 'line-color': 'red'}},
|
||||
# {'selector': '.triangle', 'style': {'shape': 'triangle'}},
|
||||
]
|
||||
|
||||
layout = html.Div(
|
||||
[
|
||||
html.Button('Trigger JS Weight', id='test_js_weight'),
|
||||
html.Div(id='output'),
|
||||
html.Div(
|
||||
[
|
||||
html.H2('Token Graph', style={'margin': 0}),
|
||||
html.Button(
|
||||
'Reset Default',
|
||||
id='bt-reset',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
},
|
||||
),
|
||||
],
|
||||
style={
|
||||
'display': 'flex',
|
||||
'marginBottom': '1em',
|
||||
},
|
||||
),
|
||||
html.H3('Layout'),
|
||||
dcc.Dropdown(
|
||||
id='layout_choice',
|
||||
options=[
|
||||
'cose',
|
||||
'cola',
|
||||
'euler',
|
||||
'random',
|
||||
],
|
||||
value='cose',
|
||||
clearable=False,
|
||||
),
|
||||
html.Div(
|
||||
[
|
||||
html.H3('Graph Filter'),
|
||||
dcc.Input(
|
||||
id='weight_min',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
step=1,
|
||||
placeholder=f'Minimum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
|
||||
debounce=True,
|
||||
style={'width': '40%'},
|
||||
),
|
||||
dcc.Input(
|
||||
id='weight_max',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
step=1,
|
||||
placeholder=f'Maximum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
|
||||
debounce=True,
|
||||
style={'width': '40%'},
|
||||
),
|
||||
html.H3('Graph'),
|
||||
html.Button('Re-Layout', id='trigger_relayout'),
|
||||
html.Div(
|
||||
[
|
||||
cyto.Cytoscape(
|
||||
id='cytoscape-graph',
|
||||
style={'width': '100%', 'height': '600px'},
|
||||
layout=cose_layout,
|
||||
stylesheet=my_stylesheet,
|
||||
elements=cyto_data_base,
|
||||
zoom=1,
|
||||
),
|
||||
],
|
||||
style={
|
||||
'border': '3px solid black',
|
||||
'borderRadius': '25px',
|
||||
'marginTop': '1em',
|
||||
'marginBottom': '2em',
|
||||
'padding': '7px',
|
||||
},
|
||||
),
|
||||
],
|
||||
style={'marginTop': '1em'},
|
||||
),
|
||||
],
|
||||
style={'margin': '2em'},
|
||||
)
|
||||
|
||||
|
||||
app.layout = layout
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('layout_choice', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_layout_internal(layout_choice):
|
||||
# return {'name': layout_choice}
|
||||
return cose_layout
|
||||
# return cose_bilkent_layout
|
||||
# return cola_layout
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'zoom'),
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('weight_min', 'value'),
|
||||
Output('weight_max', 'value'),
|
||||
Input('bt-reset', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_layout(n_clicks):
|
||||
return (1, cyto_data_base, None, None)
|
||||
|
||||
|
||||
# update edge weight
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Input('weight_min', 'value'),
|
||||
Input('weight_max', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_edge_weight(weight_min, weight_max):
|
||||
if not any([weight_min, weight_max]):
|
||||
return cyto_data_base
|
||||
|
||||
if weight_min is None:
|
||||
weight_min = MIN_WEIGHT
|
||||
if weight_max is None:
|
||||
weight_max = MAX_WEIGHT
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(
|
||||
tk_graph,
|
||||
weight_min,
|
||||
weight_max,
|
||||
)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(
|
||||
tk_graph_filtered,
|
||||
1,
|
||||
None,
|
||||
)
|
||||
cyto_data, _ = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
return cyto_data
|
||||
|
||||
|
||||
# app.clientside_callback(
|
||||
# """
|
||||
# function(n_clicks, layout) {
|
||||
# let threshold = 1000;
|
||||
# layout.edgeLength = function(edge) {
|
||||
# let weight = edge.data().weight;
|
||||
# let length;
|
||||
# if (weight > threshold) {
|
||||
# length = 10;
|
||||
# } else {
|
||||
# length = 1000 / edge.data().weight;
|
||||
# length = Math.max(20, length);
|
||||
# }
|
||||
# return length;
|
||||
# };
|
||||
# cy.layout(layout).run();
|
||||
# return layout;
|
||||
# }
|
||||
# """,
|
||||
# Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
# Input('test_js', 'n_clicks'),
|
||||
# State('cytoscape-graph', 'layout'),
|
||||
# prevent_initial_call=True,
|
||||
# )
|
||||
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, layout) {
|
||||
layout.edgeElasticity = function(edge) {
|
||||
return edge.data().weight * 0.05;
|
||||
};
|
||||
layout.idealEdgeLength = function(edge) {
|
||||
return edge.data().weight * 0.4;
|
||||
};
|
||||
cy.layout(layout).run();
|
||||
return layout;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('trigger_relayout', 'n_clicks'),
|
||||
State('cytoscape-graph', 'layout'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, stylesheet) {
|
||||
function edge_weight(ele) {
|
||||
let threshold = 1000;
|
||||
let weight = ele.data('weight');
|
||||
if (weight > threshold) {
|
||||
weight = 12;
|
||||
} else {
|
||||
weight = weight / threshold * 10;
|
||||
weight = Math.max(1, weight);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
stylesheet[1].style.width = edge_weight;
|
||||
cy.style(stylesheet).update();
|
||||
return stylesheet;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'stylesheet'),
|
||||
Input('test_js_weight', 'n_clicks'),
|
||||
State('cytoscape-graph', 'stylesheet'),
|
||||
prevent_initial_call=False,
|
||||
)
|
||||
|
||||
|
||||
def _start_webbrowser():
|
||||
host = '127.0.0.1'
|
||||
port = '8050'
|
||||
adress = f'http://{host}:{port}/'
|
||||
time.sleep(2)
|
||||
webbrowser.open_new(adress)
|
||||
|
||||
|
||||
def main():
|
||||
webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
|
||||
webbrowser_thread.start()
|
||||
app.run(debug=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,538 +0,0 @@
|
||||
import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import Any, Final, cast
|
||||
|
||||
import dash_cytoscape as cyto
|
||||
import plotly.express as px
|
||||
from dash import (
|
||||
Dash,
|
||||
Input,
|
||||
Output,
|
||||
State,
|
||||
callback,
|
||||
dash_table,
|
||||
dcc,
|
||||
html,
|
||||
)
|
||||
from pandas import DataFrame
|
||||
|
||||
import lang_main.io
|
||||
from lang_main.analysis import graphs, tokens
|
||||
from lang_main.constants import SAVE_PATH_FOLDER, SPCY_MODEL
|
||||
from lang_main.types import EntryPoints, ObjectID, TimelineCandidates
|
||||
|
||||
# ** data
|
||||
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
|
||||
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
|
||||
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
|
||||
# p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
|
||||
p_tl = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
|
||||
cands, texts = cast(
|
||||
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
|
||||
)
|
||||
|
||||
|
||||
TABLE_FEATS: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
'VorgangsTypName',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
TABLE_FEATS_DATES: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
]
|
||||
|
||||
# ** figure config
|
||||
MARKERS: Final[dict[str, Any]] = {
|
||||
'size': 12,
|
||||
'color': 'yellow',
|
||||
'line': {
|
||||
'width': 2,
|
||||
'color': 'red',
|
||||
},
|
||||
}
|
||||
HOVER_DATA: Final[dict[str, Any]] = {
|
||||
'ErstellungsDatum': '|%d.%m.%Y',
|
||||
'VorgangsBeschreibung': True,
|
||||
}
|
||||
|
||||
# ** graph
|
||||
# target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
|
||||
# p = Path(target).resolve()
|
||||
p_tk_graph = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
|
||||
ret = lang_main.io.load_pickle(p_tk_graph)
|
||||
tk_graph = cast(graphs.TokenGraph, ret[0])
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(150, None)
|
||||
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
cyto_data_base, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
|
||||
MIN_WEIGHT = weight_data['min']
|
||||
MAX_WEIGHT = weight_data['max']
|
||||
|
||||
cyto.load_extra_layouts()
|
||||
|
||||
cose_layout = {
|
||||
'name': 'cose',
|
||||
'nodeOverlap': 500,
|
||||
'refresh': 20,
|
||||
'fit': True,
|
||||
'padding': 20,
|
||||
'randomize': False,
|
||||
'componentSpacing': 1.2,
|
||||
'nodeRepulsion': 1000,
|
||||
'edgeElasticity': 1000,
|
||||
'idealEdgeLength': 100,
|
||||
'nestingFactor': 1.2,
|
||||
'gravity': 50,
|
||||
'numIter': 3000,
|
||||
'initialTemp': 2000,
|
||||
'coolingFactor': 0.7,
|
||||
'minTemp': 1.0,
|
||||
'nodeDimensionsIncludeLabels': True,
|
||||
}
|
||||
|
||||
my_stylesheet = [
|
||||
# Group selectors
|
||||
{
|
||||
'selector': 'node',
|
||||
'style': {
|
||||
'shape': 'circle',
|
||||
'content': 'data(label)',
|
||||
'background-color': '#B10DC9',
|
||||
'border-width': 2,
|
||||
'border-color': 'black',
|
||||
'border-opacity': 1,
|
||||
'opacity': 1,
|
||||
'color': 'black',
|
||||
'text-opacity': 1,
|
||||
'font-size': 12,
|
||||
'z-index': 9999,
|
||||
},
|
||||
},
|
||||
{
|
||||
'selector': 'edge',
|
||||
'style': {
|
||||
#'width': f'mapData(weight, {MIN_WEIGHT}, {MAX_WEIGHT}, 1, 10)',
|
||||
# 'width': """function(ele) {
|
||||
# return ele.data('weight');
|
||||
# """,
|
||||
'curve-style': 'bezier',
|
||||
'line-color': 'grey',
|
||||
'line-style': 'solid',
|
||||
'line-opacity': 1,
|
||||
},
|
||||
},
|
||||
# Class selectors
|
||||
# {'selector': '.red', 'style': {'background-color': 'red', 'line-color': 'red'}},
|
||||
# {'selector': '.triangle', 'style': {'shape': 'triangle'}},
|
||||
]
|
||||
|
||||
graph_layout = html.Div(
|
||||
[
|
||||
html.Button('Trigger JS Weight', id='test_js_weight'),
|
||||
html.Button('Trigger Candidate Graph', id='graph-build-btn'),
|
||||
dcc.Store(id='graph-store', storage_type='memory'),
|
||||
dcc.Store(id='graph-store-cyto-curr_cands', storage_type='memory'),
|
||||
html.Div(id='output'),
|
||||
html.Div(
|
||||
[
|
||||
html.H2('Token Graph', style={'margin': 0}),
|
||||
html.Button(
|
||||
'Reset Default',
|
||||
id='bt-reset',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
},
|
||||
),
|
||||
],
|
||||
style={
|
||||
'display': 'flex',
|
||||
'marginBottom': '1em',
|
||||
},
|
||||
),
|
||||
html.H3('Layout'),
|
||||
dcc.Dropdown(
|
||||
id='layout_choice',
|
||||
options=[
|
||||
'cose',
|
||||
'cola',
|
||||
'euler',
|
||||
'random',
|
||||
],
|
||||
value='cose',
|
||||
clearable=False,
|
||||
),
|
||||
html.Div(
|
||||
[
|
||||
html.H3('Graph Filter'),
|
||||
dcc.Input(
|
||||
id='graph-weight_min',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
step=1,
|
||||
placeholder=f'Minimum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
|
||||
debounce=True,
|
||||
style={'width': '40%'},
|
||||
),
|
||||
dcc.Input(
|
||||
id='graph-weight_max',
|
||||
type='number',
|
||||
min=MIN_WEIGHT,
|
||||
max=MAX_WEIGHT,
|
||||
step=1,
|
||||
placeholder=f'Maximum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
|
||||
debounce=True,
|
||||
style={'width': '40%'},
|
||||
),
|
||||
html.H3('Graph'),
|
||||
html.Button('Re-Layout', id='graph-trigger_relayout'),
|
||||
html.Div(
|
||||
[
|
||||
cyto.Cytoscape(
|
||||
id='cytoscape-graph',
|
||||
style={'width': '100%', 'height': '600px'},
|
||||
layout=cose_layout,
|
||||
stylesheet=my_stylesheet,
|
||||
elements=cyto_data_base,
|
||||
zoom=1,
|
||||
),
|
||||
],
|
||||
style={
|
||||
'border': '3px solid black',
|
||||
'borderRadius': '25px',
|
||||
'marginTop': '1em',
|
||||
'marginBottom': '2em',
|
||||
'padding': '7px',
|
||||
},
|
||||
),
|
||||
],
|
||||
style={'marginTop': '1em'},
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# ** app
|
||||
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
||||
app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
|
||||
app.layout = html.Div(
|
||||
[
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='selector-obj_id',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object-text'),
|
||||
dcc.Dropdown(id='selector-candidates'),
|
||||
dcc.Graph(id='graph-candidates'),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
[dash_table.DataTable(id='table-candidates')], style={'marginBottom': '2em'}
|
||||
),
|
||||
graph_layout,
|
||||
],
|
||||
style={'margin': '2em'},
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('object-text', 'children'),
|
||||
Input('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_obj_text(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
obj_text = texts[obj_id]
|
||||
headline = f'HObjektText: {obj_text}'
|
||||
return headline
|
||||
|
||||
|
||||
@callback(
|
||||
[Output('selector-candidates', 'options'), Output('selector-candidates', 'value')],
|
||||
Input('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
cands_obj_id = cands[obj_id]
|
||||
choices = list(range(1, len(cands_obj_id) + 1))
|
||||
return choices, choices[0]
|
||||
|
||||
|
||||
# TODO check possible storage of pre-filtered result
|
||||
# TODO change input of ``update_table_candidates`` and ``display_candidates_as_graph``
|
||||
# TODO to storage component
|
||||
@callback(
|
||||
Output('graph-candidates', 'figure'),
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_timeline(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
# title
|
||||
obj_text = texts[obj_id]
|
||||
title = f'HObjektText: {obj_text}'
|
||||
# cands
|
||||
# cands_per_obj_id = cands[obj_id]
|
||||
# cands_similar = cands_per_obj_id[int(index) - 1]
|
||||
# data
|
||||
# df = data.loc[list(cands_similar)].sort_index() # type: ignore
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
# figure
|
||||
fig = px.line(
|
||||
data_frame=df,
|
||||
x='ErstellungsDatum',
|
||||
y='ObjektID',
|
||||
title=title,
|
||||
hover_data=HOVER_DATA,
|
||||
)
|
||||
fig.update_traces(mode='markers+lines', marker=MARKERS, marker_symbol='diamond')
|
||||
fig.update_xaxes(
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
)
|
||||
fig.update_yaxes(type='category')
|
||||
fig.update_layout(hovermode='x unified')
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_table_candidates(index, obj_id):
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
df = df.filter(items=TABLE_FEATS, axis=1).sort_values(
|
||||
by='ErstellungsDatum', ascending=True
|
||||
)
|
||||
cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# convert dates to strings
|
||||
for col in TABLE_FEATS_DATES:
|
||||
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
table_data = df.to_dict('records')
|
||||
return table_data, cols
|
||||
|
||||
|
||||
def pre_filter_data(
|
||||
data: DataFrame,
|
||||
idx: int,
|
||||
obj_id: ObjectID,
|
||||
) -> DataFrame:
|
||||
idx = int(idx)
|
||||
obj_id = int(obj_id)
|
||||
data = data.copy()
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(idx) - 1]
|
||||
# data
|
||||
data = data.loc[list(cands_choice)].sort_index() # type: ignore
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# ** graph callbacks
|
||||
# TODO store pre-calculated graph
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('graph-weight_min', 'min', allow_duplicate=True),
|
||||
Output('graph-weight_min', 'max', allow_duplicate=True),
|
||||
Output('graph-weight_min', 'placeholder', allow_duplicate=True),
|
||||
Output('graph-weight_max', 'min', allow_duplicate=True),
|
||||
Output('graph-weight_max', 'max', allow_duplicate=True),
|
||||
Output('graph-weight_max', 'placeholder', allow_duplicate=True),
|
||||
Output('graph-store', 'data'),
|
||||
Output('graph-store-cyto-curr_cands', 'data'),
|
||||
# Input('graph-build-btn', 'n_clicks'),
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def display_candidates_as_graph(index, obj_id):
|
||||
t1 = time.perf_counter()
|
||||
df = pre_filter_data(data, idx=index, obj_id=obj_id)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for filtering: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
tk_graph_cands, _ = tokens.build_token_graph(
|
||||
data=df,
|
||||
model=SPCY_MODEL,
|
||||
target_feature='VorgangsBeschreibung',
|
||||
build_map=False,
|
||||
logging_graph=False,
|
||||
)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for graph building: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
cyto_data, weight_info = graphs.convert_graph_to_cytoscape(tk_graph_cands)
|
||||
weight_min = weight_info['min']
|
||||
weight_max = weight_info['max']
|
||||
placeholder_min = f'Minimum edge weight: {weight_min} - {weight_max}'
|
||||
placeholder_max = f'Maximum edge weight: {weight_min} - {weight_max}'
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for graph metadata and conversion: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
graph_to_store = lang_main.io.encode_to_base64_str(tk_graph_cands)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for encoding: {t2 - t1} s')
|
||||
return (
|
||||
cyto_data,
|
||||
weight_min,
|
||||
weight_max,
|
||||
placeholder_min,
|
||||
weight_min,
|
||||
weight_max,
|
||||
placeholder_max,
|
||||
graph_to_store,
|
||||
cyto_data,
|
||||
)
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('layout_choice', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_layout_internal(layout_choice):
|
||||
# return {'name': layout_choice}
|
||||
return cose_layout
|
||||
# return cose_bilkent_layout
|
||||
# return cola_layout
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'zoom'),
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Output('graph-weight_min', 'value'),
|
||||
Output('graph-weight_max', 'value'),
|
||||
Input('bt-reset', 'n_clicks'),
|
||||
State('graph-store-cyto-curr_cands', 'data'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_layout(_, current_cands_cyto_elements):
|
||||
return (1, current_cands_cyto_elements, None, None)
|
||||
|
||||
|
||||
# update edge weight
|
||||
@app.callback(
|
||||
Output('cytoscape-graph', 'elements', allow_duplicate=True),
|
||||
Input('graph-weight_min', 'value'),
|
||||
Input('graph-weight_max', 'value'),
|
||||
State('graph-store', 'data'),
|
||||
State('graph-store-cyto-curr_cands', 'data'),
|
||||
State('graph-weight_min', 'min'),
|
||||
State('graph-weight_min', 'max'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_edge_weight(
|
||||
weight_min,
|
||||
weight_max,
|
||||
current_graph,
|
||||
current_cands_cyto_elements,
|
||||
current_min,
|
||||
current_max,
|
||||
):
|
||||
if not any((weight_min, weight_max)):
|
||||
return current_cands_cyto_elements
|
||||
|
||||
if weight_min is None:
|
||||
weight_min = current_min
|
||||
if weight_max is None:
|
||||
weight_max = current_max
|
||||
|
||||
tk_graph = cast(graphs.TokenGraph, lang_main.io.decode_from_base64_str(current_graph))
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, weight_min, weight_max)
|
||||
# tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
|
||||
cyto_data, _ = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
|
||||
return cyto_data
|
||||
|
||||
|
||||
# ** graph: layout with edge weight
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, layout) {
|
||||
layout.edgeElasticity = function(edge) {
|
||||
return edge.data().weight * 0.05;
|
||||
};
|
||||
layout.idealEdgeLength = function(edge) {
|
||||
return edge.data().weight * 0.4;
|
||||
};
|
||||
cy.layout(layout).run();
|
||||
return layout;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'layout', allow_duplicate=True),
|
||||
Input('graph-trigger_relayout', 'n_clicks'),
|
||||
State('cytoscape-graph', 'layout'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
|
||||
# ** graph: display edge weight (line thickness)
|
||||
app.clientside_callback(
|
||||
"""
|
||||
function(n_clicks, stylesheet) {
|
||||
function edge_weight(ele) {
|
||||
let threshold = 1000;
|
||||
let weight = ele.data('weight');
|
||||
if (weight > threshold) {
|
||||
weight = 12;
|
||||
} else {
|
||||
weight = weight / threshold * 10;
|
||||
weight = Math.max(1, weight);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
stylesheet[1].style.width = edge_weight;
|
||||
cy.style(stylesheet).update();
|
||||
return stylesheet;
|
||||
}
|
||||
""",
|
||||
Output('cytoscape-graph', 'stylesheet'),
|
||||
Input('test_js_weight', 'n_clicks'),
|
||||
State('cytoscape-graph', 'stylesheet'),
|
||||
prevent_initial_call=False,
|
||||
)
|
||||
|
||||
|
||||
def _start_webbrowser():
|
||||
host = '127.0.0.1'
|
||||
port = '8050'
|
||||
adress = f'http://{host}:{port}/'
|
||||
time.sleep(2)
|
||||
webbrowser.open_new(adress)
|
||||
|
||||
|
||||
def main():
|
||||
webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
|
||||
webbrowser_thread.start()
|
||||
app.run(debug=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,644 +0,0 @@
|
||||
import time
|
||||
import webbrowser
|
||||
from collections.abc import Collection, Iterable
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import Any, Final, cast
|
||||
|
||||
# import dash_cytoscape as cyto
|
||||
import plotly.express as px
|
||||
import plotly.io
|
||||
from dash import (
|
||||
Dash,
|
||||
Input,
|
||||
Output,
|
||||
State,
|
||||
callback,
|
||||
dash_table,
|
||||
dcc,
|
||||
html,
|
||||
)
|
||||
from pandas import DataFrame
|
||||
from plotly.graph_objects import Figure
|
||||
|
||||
import lang_main.io
|
||||
from lang_main import model_loader as m_load
|
||||
from lang_main.analysis import graphs, tokens
|
||||
from lang_main.analysis.timeline import (
|
||||
calc_delta_to_next_failure,
|
||||
filter_timeline_cands,
|
||||
)
|
||||
from lang_main.constants import (
|
||||
MODEL_LOADER_MAP,
|
||||
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
|
||||
NAME_DELTA_FEAT_TO_REPAIR,
|
||||
SAVE_PATH_FOLDER,
|
||||
)
|
||||
from lang_main.errors import EmptyEdgesError, EmptyGraphError
|
||||
from lang_main.pipelines.predefined import (
|
||||
build_tk_graph_render_pipe,
|
||||
build_tk_graph_rescaling_pipe,
|
||||
)
|
||||
from lang_main.types import (
|
||||
DataFrameTLFiltered,
|
||||
EntryPoints,
|
||||
HTMLColumns,
|
||||
HTMLTable,
|
||||
LanguageModels,
|
||||
ObjectID,
|
||||
TimelineCandidates,
|
||||
)
|
||||
|
||||
# ** model
|
||||
SPACY_MODEL = m_load.instantiate_model(
|
||||
model_load_map=MODEL_LOADER_MAP,
|
||||
model=LanguageModels.SPACY,
|
||||
)
|
||||
# ** data
|
||||
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
|
||||
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
|
||||
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
|
||||
# data = cleanup_descriptions(data, properties=['ErledigungsBeschreibung'])
|
||||
# p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
|
||||
p_tl = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_CANDS)
|
||||
cands, texts = cast(
|
||||
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
|
||||
)
|
||||
# ** necessary pipelines
|
||||
rescaling_pipe = build_tk_graph_rescaling_pipe(
|
||||
exit_point=EntryPoints.TIMELINE_TK_GRAPH_RESCALED,
|
||||
save_result=False,
|
||||
)
|
||||
BASE_NETWORK_NAME: Final[str] = 'timeline_candidates'
|
||||
# RENDER_FOLDER: Final[Path] = Path.cwd() / 'assets/'
|
||||
graph_render_pipe = build_tk_graph_render_pipe(
|
||||
with_subgraphs=False,
|
||||
base_network_name=BASE_NETWORK_NAME,
|
||||
)
|
||||
# PTH_RENDERED_GRAPH = f'assets/{BASE_NETWORK_NAME}.svg'
|
||||
PTH_RENDERED_TIMELINE = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'chart_timeline',
|
||||
file_ext='.svg',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_TABLE_TIMELINE = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'table_timeline',
|
||||
file_ext='.xlsx',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_RENDERED_DELTA_REPAIR = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'chart_delta_repair',
|
||||
file_ext='.svg',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_TABLE_DELTA_REPAIR = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
'table_delta_repair',
|
||||
file_ext='.xlsx',
|
||||
check_existence=False,
|
||||
)
|
||||
PTH_RENDERED_GRAPH = lang_main.io.get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
BASE_NETWORK_NAME,
|
||||
file_ext='.svg',
|
||||
check_existence=False,
|
||||
)
|
||||
|
||||
# NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
|
||||
TABLE_FEATS_OVERVIEW: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
NAME_DELTA_FEAT_TO_REPAIR,
|
||||
'VorgangsTypName',
|
||||
'VorgangsBeschreibung',
|
||||
'ErledigungsBeschreibung',
|
||||
]
|
||||
TABLE_FEATS_DATES: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
]
|
||||
TABLE_FEATS_BEST_ACTIONS: Final[list[str]] = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
'VorgangsTypName',
|
||||
'VorgangsBeschreibung',
|
||||
'ErledigungsBeschreibung',
|
||||
NAME_DELTA_FEAT_TO_NEXT_FAILURE,
|
||||
]
|
||||
|
||||
# ** figure config
|
||||
MARKERS_OCCURRENCES: Final[dict[str, Any]] = {
|
||||
'size': 12,
|
||||
'color': 'yellow',
|
||||
'line': {
|
||||
'width': 2,
|
||||
'color': 'red',
|
||||
},
|
||||
}
|
||||
MARKERS_DELTA: Final[dict[str, Any]] = {
|
||||
'size': 8,
|
||||
'color': 'red',
|
||||
'symbol': 'cross',
|
||||
}
|
||||
HOVER_DATA: Final[dict[str, Any]] = {
|
||||
'ErstellungsDatum': '|%d.%m.%Y',
|
||||
'ErledigungsDatum': '|%d.%m.%Y',
|
||||
'VorgangsBeschreibung': True,
|
||||
'ErledigungsBeschreibung': True,
|
||||
}
|
||||
HOVER_DATA_DELTA: Final[dict[str, Any]] = {
|
||||
'ErstellungsDatum': '|%d.%m.%Y',
|
||||
'ErledigungsDatum': '|%d.%m.%Y',
|
||||
'VorgangsDatum': '|%d.%m.%Y',
|
||||
NAME_DELTA_FEAT_TO_REPAIR: True,
|
||||
'VorgangsBeschreibung': True,
|
||||
'ErledigungsBeschreibung': True,
|
||||
}
|
||||
|
||||
# ** graph
|
||||
p_tk_graph = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
|
||||
ret = lang_main.io.load_pickle(p_tk_graph)
|
||||
tk_graph = cast(graphs.TokenGraph, ret[0])
|
||||
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
|
||||
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
|
||||
|
||||
|
||||
graph_layout = html.Div(
|
||||
[
|
||||
dcc.Store(id='graph-store', storage_type='memory'),
|
||||
# dcc.Store(id='graph-store-cyto-curr_cands', storage_type='memory'),
|
||||
html.Div(id='output'),
|
||||
html.Div(
|
||||
[
|
||||
html.H2('Token Graph', style={'margin': 0}),
|
||||
],
|
||||
style={
|
||||
'display': 'flex',
|
||||
'marginBottom': '1em',
|
||||
},
|
||||
),
|
||||
html.Div(
|
||||
[
|
||||
html.H3('Graph'),
|
||||
html.Button(
|
||||
'Download Bild',
|
||||
id='bt-reset',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
},
|
||||
),
|
||||
dcc.Download(id='static-graph-download'),
|
||||
dcc.Loading(
|
||||
id='loading-graph-render',
|
||||
children=html.Div(
|
||||
[
|
||||
html.Img(
|
||||
id='static-graph-img',
|
||||
alt='static rendered graph',
|
||||
style={
|
||||
'width': '900px',
|
||||
'height': 'auto',
|
||||
},
|
||||
),
|
||||
html.P(id='info-graph-errors', children=[]),
|
||||
],
|
||||
style={
|
||||
'border': '3px solid black',
|
||||
'borderRadius': '25px',
|
||||
'marginTop': '1em',
|
||||
'marginBottom': '2em',
|
||||
'padding': '7px',
|
||||
},
|
||||
),
|
||||
),
|
||||
],
|
||||
style={'marginTop': '1em'},
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# ** app
|
||||
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
||||
app = Dash(__name__, external_stylesheets=external_stylesheets)
|
||||
|
||||
|
||||
app.layout = html.Div(
|
||||
[
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='selector-obj_id',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object-text'),
|
||||
dcc.Dropdown(id='selector-candidates'),
|
||||
html.Button(
|
||||
'Download Diagramm',
|
||||
id='bt-dl-timeline',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dcc.Download(id='dl-timeline'),
|
||||
dcc.Graph(id='figure-occurrences'),
|
||||
html.Button(
|
||||
'Download Diagramm',
|
||||
id='bt-dl-deltarepair',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dcc.Download(id='dl-deltarepair'),
|
||||
dcc.Graph(id='figure-delta'),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.Div(
|
||||
[
|
||||
html.H5('Überblick ähnlicher Vorgänge'),
|
||||
dcc.Download(id='dl-table-timeline'),
|
||||
html.Button(
|
||||
'Download Table',
|
||||
id='bt-table-timeline',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dash_table.DataTable(id='table-candidates'),
|
||||
],
|
||||
style={'paddingBottom': '1em'},
|
||||
),
|
||||
html.Div(
|
||||
[
|
||||
html.H5(
|
||||
(
|
||||
'Maßnahmen sortiert nach längstem Zeitraum '
|
||||
'bis zum nächsten Ereignis'
|
||||
)
|
||||
),
|
||||
dcc.Download(id='dl-table-deltarepair'),
|
||||
html.Button(
|
||||
'Download Table',
|
||||
id='bt-table-deltarepair',
|
||||
style={
|
||||
'marginLeft': 'auto',
|
||||
'width': '300px',
|
||||
'marginTop': '1em',
|
||||
},
|
||||
),
|
||||
dash_table.DataTable(id='table-best-actions'),
|
||||
]
|
||||
),
|
||||
],
|
||||
style={'marginBottom': '2em', 'padding': '2em'},
|
||||
),
|
||||
graph_layout,
|
||||
],
|
||||
style={'margin': '2em'},
|
||||
)
|
||||
|
||||
|
||||
# ** selectors of candidates
|
||||
@callback(
|
||||
Output('object-text', 'children'),
|
||||
Input('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_obj_text(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
obj_text = texts[obj_id]
|
||||
headline = f'HObjektText: {obj_text}'
|
||||
return headline
|
||||
|
||||
|
||||
@callback(
|
||||
[
|
||||
Output('selector-candidates', 'options'),
|
||||
Output('selector-candidates', 'value'),
|
||||
],
|
||||
Input('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
choices = list(range(1, len(cands[obj_id]) + 1))
|
||||
return choices, choices[0]
|
||||
|
||||
|
||||
# ** helpers to filter DataFrame
|
||||
def filter_candidates(
|
||||
data: DataFrame,
|
||||
idx: int,
|
||||
obj_id: ObjectID,
|
||||
) -> DataFrameTLFiltered:
|
||||
# assert correct data type because of Dash
|
||||
idx = int(idx)
|
||||
obj_id = int(obj_id)
|
||||
|
||||
data = filter_timeline_cands(
|
||||
data=data,
|
||||
cands=cands,
|
||||
obj_id=obj_id,
|
||||
entry_idx=(idx - 1), # idx in Dashboard starts with 1
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# ** figure generation
|
||||
# TODO check possible storage of pre-filtered result
|
||||
# TODO change input of ``update_table_candidates`` and ``display_candidates_as_graph``
|
||||
# TODO to storage component
|
||||
@callback(
|
||||
[
|
||||
Output('figure-occurrences', 'figure'),
|
||||
Output('figure-delta', 'figure'),
|
||||
],
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_timeline(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
obj_text = texts[obj_id]
|
||||
title_occurrences = f'HObjektText: {obj_text}'
|
||||
title_delta = f'HObjektText: {obj_text}, Differenz Erstellung und Erledigung'
|
||||
df = filter_candidates(data, idx=index, obj_id=obj_id)
|
||||
# figure
|
||||
fig_occurrences = fig_timeline_occurrences(df, title_occurrences)
|
||||
fig_delta = fig_timeline_delta(df, title_delta, delta_feature=NAME_DELTA_FEAT_TO_REPAIR)
|
||||
|
||||
return fig_occurrences, fig_delta
|
||||
|
||||
|
||||
def fig_timeline_occurrences(
|
||||
df: DataFrame,
|
||||
title: str,
|
||||
) -> Figure:
|
||||
fig = px.line(
|
||||
data_frame=df,
|
||||
x='ErstellungsDatum',
|
||||
y='ObjektID',
|
||||
title=title,
|
||||
hover_data=HOVER_DATA,
|
||||
)
|
||||
fig.update_traces(
|
||||
mode='markers+lines', marker=MARKERS_OCCURRENCES, marker_symbol='diamond'
|
||||
)
|
||||
fig.update_xaxes(
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
)
|
||||
fig.update_yaxes(type='category')
|
||||
fig.update_layout(hovermode='x unified')
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def fig_timeline_delta(
|
||||
df: DataFrame,
|
||||
title: str,
|
||||
delta_feature: str,
|
||||
) -> Figure:
|
||||
fig = px.scatter(
|
||||
data_frame=df,
|
||||
x='ErstellungsDatum',
|
||||
y=delta_feature,
|
||||
title=title,
|
||||
hover_data=HOVER_DATA_DELTA,
|
||||
)
|
||||
fig.update_traces(marker=MARKERS_DELTA)
|
||||
fig.update_xaxes(tickformat='%B\n%Y')
|
||||
fig.update_yaxes(dtick=1)
|
||||
fig.update_layout(hovermode='x unified')
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def transform_to_HTML_table(
|
||||
data: DataFrame,
|
||||
target_features: Collection[str],
|
||||
date_cols: Iterable[str] | None = None,
|
||||
sorting_feature: str | None = None,
|
||||
sorting_ascending: bool = True,
|
||||
save_path: Path | None = None,
|
||||
) -> tuple[HTMLColumns, HTMLTable]:
|
||||
target_features = list(target_features)
|
||||
data = data.copy()
|
||||
data = data.filter(items=target_features, axis=1)
|
||||
|
||||
if sorting_feature is not None:
|
||||
data = data.sort_values(by='ErstellungsDatum', ascending=sorting_ascending)
|
||||
|
||||
if date_cols is not None:
|
||||
for col in date_cols:
|
||||
data[col] = data[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
columns = [{'name': col, 'id': col} for col in data.columns]
|
||||
table_data = data.to_dict('records')
|
||||
|
||||
if save_path is not None:
|
||||
data.to_excel(save_path)
|
||||
|
||||
return columns, table_data
|
||||
|
||||
|
||||
# 'table-best-actions'
|
||||
# ** HTML table
|
||||
@callback(
|
||||
[
|
||||
Output('table-candidates', 'columns'),
|
||||
Output('table-candidates', 'data'),
|
||||
Output('table-best-actions', 'columns'),
|
||||
Output('table-best-actions', 'data'),
|
||||
],
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def update_tables_candidates(
|
||||
index,
|
||||
obj_id,
|
||||
) -> tuple[HTMLColumns, HTMLTable, HTMLColumns, HTMLTable]:
|
||||
cands = filter_candidates(data, idx=index, obj_id=obj_id)
|
||||
overview_cols, overview_table = transform_to_HTML_table(
|
||||
data=cands,
|
||||
target_features=TABLE_FEATS_OVERVIEW,
|
||||
date_cols=TABLE_FEATS_DATES,
|
||||
sorting_feature='ErstellungsDatum',
|
||||
sorting_ascending=True,
|
||||
save_path=PTH_TABLE_TIMELINE,
|
||||
)
|
||||
# df = df.filter(items=TABLE_FEATS_OVERVIEW, axis=1).sort_values(
|
||||
# by='ErstellungsDatum', ascending=True
|
||||
# )
|
||||
# cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# # convert dates to strings
|
||||
# for col in TABLE_FEATS_DATES:
|
||||
# df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
# table_data = df.to_dict('records')
|
||||
|
||||
cands_best_actions = calc_delta_to_next_failure(
|
||||
data=cands,
|
||||
date_feature='ErstellungsDatum',
|
||||
name_delta_feature=NAME_DELTA_FEAT_TO_NEXT_FAILURE,
|
||||
)
|
||||
best_actions_cols, best_actions_table = transform_to_HTML_table(
|
||||
data=cands_best_actions,
|
||||
target_features=TABLE_FEATS_BEST_ACTIONS,
|
||||
date_cols=TABLE_FEATS_DATES,
|
||||
save_path=PTH_TABLE_DELTA_REPAIR,
|
||||
)
|
||||
|
||||
return overview_cols, overview_table, best_actions_cols, best_actions_table
|
||||
|
||||
|
||||
# ** graph callbacks
|
||||
@app.callback(
|
||||
[
|
||||
Output('graph-store', 'data'),
|
||||
Output('static-graph-img', 'src'),
|
||||
Output('info-graph-errors', 'children'),
|
||||
],
|
||||
# Input('graph-build-btn', 'n_clicks'),
|
||||
Input('selector-candidates', 'value'),
|
||||
State('selector-obj_id', 'value'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def display_candidates_as_graph(index, obj_id):
|
||||
error_msg = ''
|
||||
t1 = time.perf_counter()
|
||||
df = filter_candidates(data, idx=index, obj_id=obj_id)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for filtering: {t2 - t1} s')
|
||||
|
||||
t1 = time.perf_counter()
|
||||
tk_graph_cands, _ = tokens.build_token_graph(
|
||||
data=df,
|
||||
model=SPACY_MODEL,
|
||||
target_feature='VorgangsBeschreibung',
|
||||
build_map=False,
|
||||
logging_graph=False,
|
||||
)
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for graph building: {t2 - t1} s')
|
||||
|
||||
# ** now start rendering pipeline in Cytoscape
|
||||
# rescale graph
|
||||
try:
|
||||
t1 = time.perf_counter()
|
||||
_, tk_graph_rescaled_undirected = cast(
|
||||
tuple[graphs.TokenGraph, graphs.Graph],
|
||||
rescaling_pipe.run(starting_values=(tk_graph_cands,)),
|
||||
)
|
||||
# render graph in Cytoscape and export image
|
||||
_ = graph_render_pipe.run(starting_values=(tk_graph_rescaled_undirected,))
|
||||
# load image as b64 encoded string
|
||||
b64_img = lang_main.io.encode_file_to_base64_str(PTH_RENDERED_GRAPH)
|
||||
static_img = f'data:image/svg+xml;base64,{b64_img}'
|
||||
graph_to_store = lang_main.io.encode_to_base64_str(tk_graph_cands)
|
||||
|
||||
# place image in browser
|
||||
t2 = time.perf_counter()
|
||||
print(f'Time for graph rescaling and rendering: {t2 - t1} s')
|
||||
except (EmptyGraphError, EmptyEdgesError):
|
||||
graph_to_store = ''
|
||||
static_img = ''
|
||||
error_msg = 'Graph ist leer und konnte nicht generiert werden!'
|
||||
finally:
|
||||
return graph_to_store, static_img, error_msg
|
||||
|
||||
|
||||
@callback(
|
||||
Output('static-graph-download', 'data'),
|
||||
Input('bt-reset', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_graph(_):
|
||||
return dcc.send_file(path=PTH_RENDERED_GRAPH)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-timeline', 'data'),
|
||||
Input('bt-dl-timeline', 'n_clicks'),
|
||||
State('figure-occurrences', 'figure'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_timeline(_, fig: dict):
|
||||
# add these lines before fig = go.Figure(fig_raw)
|
||||
if 'rangeslider' in fig['layout']['xaxis']:
|
||||
del fig['layout']['xaxis']['rangeslider']['yaxis']
|
||||
figure = Figure(fig)
|
||||
figure.write_image(PTH_RENDERED_TIMELINE)
|
||||
return dcc.send_file(path=PTH_RENDERED_TIMELINE)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-deltarepair', 'data'),
|
||||
Input('bt-dl-deltarepair', 'n_clicks'),
|
||||
State('figure-delta', 'figure'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_delta_repair(_, fig: dict):
|
||||
# add these lines before fig = go.Figure(fig_raw)
|
||||
if 'rangeslider' in fig['layout']['xaxis']:
|
||||
del fig['layout']['xaxis']['rangeslider']['yaxis']
|
||||
figure = Figure(fig)
|
||||
figure.write_image(PTH_RENDERED_DELTA_REPAIR)
|
||||
return dcc.send_file(path=PTH_RENDERED_DELTA_REPAIR)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-table-timeline', 'data'),
|
||||
Input('bt-table-timeline', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_table_timeline(_):
|
||||
return dcc.send_file(path=PTH_TABLE_TIMELINE)
|
||||
|
||||
|
||||
@callback(
|
||||
Output('dl-table-deltarepair', 'data'),
|
||||
Input('bt-table-deltarepair', 'n_clicks'),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def download_table_delta_repair(_):
|
||||
return dcc.send_file(path=PTH_TABLE_DELTA_REPAIR)
|
||||
|
||||
|
||||
def _start_webbrowser():
|
||||
host = '127.0.0.1'
|
||||
port = '8050'
|
||||
adress = f'http://{host}:{port}/'
|
||||
time.sleep(2)
|
||||
webbrowser.open_new(adress)
|
||||
|
||||
|
||||
def main():
|
||||
webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
|
||||
webbrowser_thread.start()
|
||||
app.run(debug=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Binary file not shown.
@@ -1,58 +0,0 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
# results = './results/dummy_N_1000/'
|
||||
# dataset = '../data/Dummy_Dataset_N_1000.csv'
|
||||
results = './results/test_20240807/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
graph_static_rendering_skip = true
|
||||
time_analysis_skip = true
|
||||
|
||||
[preprocess]
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_number = 330
|
||||
# threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.preparation]
|
||||
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
|
||||
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
|
||||
|
||||
[time_analysis.model_input]
|
||||
# input_features = [
|
||||
# 'VorgangsTypName',
|
||||
# 'VorgangsArtText',
|
||||
# 'VorgangsBeschreibung',
|
||||
# ]
|
||||
input_features = [
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
@@ -1,63 +0,0 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
# results = './results/dummy_N_1000/'
|
||||
# dataset = '../data/Dummy_Dataset_N_1000.csv'
|
||||
results = './results/test_20240807/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
|
||||
[logging]
|
||||
enabled = true
|
||||
stderr = true
|
||||
file = true
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = false
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing_skip = true
|
||||
graph_rescaling_skip = true
|
||||
graph_static_rendering_skip = true
|
||||
time_analysis_skip = true
|
||||
|
||||
[preprocess]
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_number = 330
|
||||
# threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.preparation]
|
||||
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
|
||||
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
|
||||
|
||||
[time_analysis.model_input]
|
||||
# input_features = [
|
||||
# 'VorgangsTypName',
|
||||
# 'VorgangsArtText',
|
||||
# 'VorgangsBeschreibung',
|
||||
# ]
|
||||
input_features = [
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
@@ -1,15 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
from lang_main.constants import (
|
||||
INPUT_PATH_FOLDER,
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
input_path_cfg,
|
||||
)
|
||||
|
||||
print(SAVE_PATH_FOLDER, '\n')
|
||||
print(INPUT_PATH_FOLDER, '\n')
|
||||
print(PATH_TO_DATASET, '\n')
|
||||
|
||||
print('------------------------')
|
||||
print(Path.cwd(), '\n', input_path_cfg)
|
||||
Binary file not shown.
Binary file not shown.
1
scripts/publish.ps1
Normal file
1
scripts/publish.ps1
Normal file
@@ -0,0 +1 @@
|
||||
pdm publish -r local --skip-existing
|
||||
2
scripts/run_test_wo_models+cyto.ps1
Normal file
2
scripts/run_test_wo_models+cyto.ps1
Normal file
@@ -0,0 +1,2 @@
|
||||
Remove-Item "./logs" -Force -Recurse
|
||||
pdm run coverage run -m pytest -m "not mload and not cyto"
|
||||
10
scripts/run_tests_all.ps1
Normal file
10
scripts/run_tests_all.ps1
Normal file
@@ -0,0 +1,10 @@
|
||||
Remove-Item "./logs" -Force -Recurse
|
||||
pdm run pytest --cov -n 4
|
||||
# run docker desktop
|
||||
docker desktop start
|
||||
# . "C:\Program Files\Docker\Docker\Docker Desktop.exe"
|
||||
docker start cyrest
|
||||
# run Cytoscape tests in single process
|
||||
pdm run coverage run -a -m pytest -m "cyto"
|
||||
docker stop cyrest
|
||||
pdm run coverage html
|
||||
@@ -1 +0,0 @@
|
||||
import py4cytoscape
|
||||
Reference in New Issue
Block a user