new pipeline management, proto graph display timeline

2024-06-19 16:58:26 +02:00
parent c2714b8060
commit fb4437a3a2
21 changed files with 2838 additions and 11383 deletions
--- a/scripts/analyse_dataset.py
+++ b/scripts/analyse_dataset.py
@@ -1,42 +1,44 @@
 import typing
-import warnings
-from pathlib import Path
 from typing import cast

+from pandas import DataFrame, Series
+
 from lang_main.analysis.graphs import TokenGraph
 from lang_main.constants import (
-    DO_GRAPH_POSTPROCESSING,
-    DO_PREPROCESSING,
-    DO_TIME_ANALYSIS,
-    DO_TOKEN_ANALYSIS,
-    INPUT_PATH_FOLDER,
    PATH_TO_DATASET,
    SAVE_PATH_FOLDER,
    SKIP_GRAPH_POSTPROCESSING,
    SKIP_PREPROCESSING,
    SKIP_TIME_ANALYSIS,
    SKIP_TOKEN_ANALYSIS,
-    THRESHOLD_AMOUNT_CHARACTERS,
-    THRESHOLD_EDGE_WEIGHT,
 )
-from lang_main.io import create_saving_folder, load_pickle
+from lang_main.io import create_saving_folder, get_entry_point, load_pickle
+from lang_main.pipelines.base import PipelineContainer
 from lang_main.pipelines.predefined import (
-    pipe_merge,
-    pipe_target_feat,
-    pipe_timeline,
-    pipe_token_analysis,
+    build_base_target_feature_pipe,
+    build_merge_duplicates_pipe,
+    build_timeline_pipe,
+    build_tk_graph_pipe,
+    build_tk_graph_post_pipe,
 )
 from lang_main.types import (
+    EntryPoints,
    ObjectID,
    PandasIndex,
    SpacyDoc,
    TimelineCandidates,
 )
-from pandas import DataFrame, Series
+
+# ** build pipelines
+pipe_merge = build_merge_duplicates_pipe()
+pipe_target_feat = build_base_target_feature_pipe()
+pipe_timeline = build_timeline_pipe()
+pipe_token_analysis = build_tk_graph_pipe()
+pipe_graph_postprocessing = build_tk_graph_post_pipe()


-# ** processing pipeline
-def run_preprocessing() -> DataFrame:
+# ** preprocessing pipeline
+def run_preprocessing() -> None:
    create_saving_folder(
        saving_path_folder=SAVE_PATH_FOLDER,
        overwrite_existing=False,
@@ -46,134 +48,69 @@ def run_preprocessing() -> DataFrame:
        tuple[DataFrame], pipe_target_feat.run(starting_values=(PATH_TO_DATASET,))
    )
    target_feat_data = ret[0]
-    # only entries with more than threshold amount of characters
-    data_filter = typing.cast(Series, (target_feat_data['len'] > THRESHOLD_AMOUNT_CHARACTERS))
-    subset_data = target_feat_data.loc[data_filter].copy()
-    # merge duplicates, results saved separately
-    ret = typing.cast(tuple[DataFrame], pipe_merge.run(starting_values=(subset_data,)))
-    preprocessed_data = ret[0]
-
-    return preprocessed_data
+    _ = typing.cast(tuple[DataFrame], pipe_merge.run(starting_values=(target_feat_data,)))


-def run_token_analysis(
-    preprocessed_data: DataFrame,
-) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
+# ** token analysis
+def run_token_analysis() -> None:
+    # load entry point
+    entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TOKEN_ANALYSIS)
+    loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
+    preprocessed_data = loaded_results[0]
    # build token graph
    (tk_graph, docs_mapping) = typing.cast(
-        tuple[TokenGraph, dict[PandasIndex, SpacyDoc]],
+        tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
        pipe_token_analysis.run(starting_values=(preprocessed_data,)),
    )
-    tk_graph.save_graph(SAVE_PATH_FOLDER, directed=False)
-    tk_graph.to_pickle(SAVE_PATH_FOLDER, filename=f'{pipe_token_analysis.name}-TokenGraph')
-
-    return tk_graph, docs_mapping
+    tk_graph.to_GraphML(SAVE_PATH_FOLDER, filename='TokenGraph', directed=False)


-def run_graph_postprocessing(
-    tk_graph: TokenGraph,
-) -> TokenGraph:
+def run_graph_postprocessing() -> None:
+    # load entry point
+    entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
+    loaded_results = cast(
+        tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
+        load_pickle(entry_point_path),
+    )
+    tk_graph = loaded_results[0]
    # filter graph by edge weight and remove single nodes (no connection)
-    tk_graph_filtered = tk_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT)
-    tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)
-    tk_graph_filtered.save_graph(
+    ret = cast(tuple[TokenGraph], pipe_graph_postprocessing.run(starting_values=(tk_graph,)))
+    tk_graph_filtered = ret[0]
+    # tk_graph_filtered = tk_graph.filter_by_edge_weight(THRESHOLD_EDGE_WEIGHT, None)
+    # tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
+    tk_graph_filtered.to_GraphML(
        SAVE_PATH_FOLDER, filename='TokenGraph-filtered', directed=False
    )
-    tk_graph_filtered.to_pickle(
-        SAVE_PATH_FOLDER, filename=f'{pipe_token_analysis.name}-TokenGraph-filtered'
-    )
-
-    return tk_graph_filtered


-def run_time_analysis() -> tuple[TimelineCandidates, dict[ObjectID, str]]:
-    filename = 'without_nan'
-    loading_path = INPUT_PATH_FOLDER.joinpath(filename).with_suffix('.pkl')
-    verify_path(loading_path)
-    ret = load_pickle(loading_path)
-    preprocessed_data = ret[0]
+# ** time analysis
+def run_time_analysis() -> None:
+    # load entry point
+    entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
+    loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
+    preprocessed_data = loaded_results[0]

-    ret = cast(
+    _ = cast(
        tuple[TimelineCandidates, dict[ObjectID, str]],
        pipe_timeline.run(starting_values=(preprocessed_data,)),
    )
-    return ret


-def verify_path(
-    loading_path: Path,
-) -> None:
-    if not loading_path.exists():
-        raise FileNotFoundError(f'Could not load results. File not found: {loading_path}')
+def build_pipeline_container() -> PipelineContainer:
+    container = PipelineContainer(
+        name='Pipeline-Container-Base', working_dir=SAVE_PATH_FOLDER
+    )
+    container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
+    container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
+    container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
+    container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
+
+    return container


 def main() -> None:
-    pre_step_skipped: bool = False
-    # ** preprocess
-    if DO_PREPROCESSING and not SKIP_PREPROCESSING:
-        preprocessed_data = run_preprocessing()
-    elif not SKIP_PREPROCESSING:
-        # !! hardcoded result filenames
-        target_pattern: str = r'*Pipe-Merge_Duplicates_Step-1*'
-        loading_path = list(SAVE_PATH_FOLDER.glob(target_pattern))[0]
-        verify_path(loading_path)
-        ret = typing.cast(tuple[DataFrame], load_pickle(loading_path))
-        preprocessed_data = ret[0]
-    else:
-        pre_step_skipped = True
-        warnings.warn('No preprocessing action selected. Skipped.')
-    # sys.exit(0)
-    # ** token analysis
-    if DO_TOKEN_ANALYSIS and not SKIP_TOKEN_ANALYSIS:
-        if pre_step_skipped:
-            raise RuntimeError(
-                'Preprocessing step skipped. Token analysis cannot be performed.'
-            )
-        preprocessed_data_trunc = typing.cast(
-            DataFrame, preprocessed_data[['batched_idxs', 'entry', 'num_occur']].copy()
-        )  # type: ignore
-        tk_graph, docs_mapping = run_token_analysis(preprocessed_data_trunc)
-    elif not SKIP_TOKEN_ANALYSIS:
-        # !! hardcoded result filenames
-        # whole graph
-        filename: str = f'{pipe_token_analysis.name}-TokenGraph'
-        loading_path = SAVE_PATH_FOLDER.joinpath(filename).with_suffix('.pkl')
-        verify_path(loading_path)
-        # tk_graph = typing.cast(TokenGraph, load_pickle(loading_path))
-        tk_graph = TokenGraph.from_pickle(loading_path)
-        pre_step_skipped = False
-    else:
-        pre_step_skipped = True
-        warnings.warn('No token analysis action selected. Skipped.')
-    # ** graph postprocessing
-    if DO_GRAPH_POSTPROCESSING and not SKIP_GRAPH_POSTPROCESSING:
-        if pre_step_skipped:
-            raise RuntimeError(
-                (
-                    'Preprocessing or token analysis step skipped. '
-                    'Graph postprocessing cannot be performed.'
-                )
-            )
-        tk_graph_filtered = run_graph_postprocessing(tk_graph)
-    elif not SKIP_GRAPH_POSTPROCESSING:
-        # !! hardcoded result filenames
-        # filtered graph
-        filename: str = f'{pipe_token_analysis.name}-TokenGraph-filtered'
-        loading_path = SAVE_PATH_FOLDER.joinpath(filename).with_suffix('.pkl')
-        verify_path(loading_path)
-        # tk_graph_filtered = typing.cast(TokenGraph, load_pickle(loading_path))
-        tk_graph_filtered = TokenGraph.from_pickle(loading_path)
-        pre_step_skipped = False
-    else:
-        warnings.warn('No graph postprocessing action selected. Skipped.')
-    # ** time analysis
-    if DO_TIME_ANALYSIS and not SKIP_TIME_ANALYSIS:
-        # no check for fails, runs separately
-        ret = run_time_analysis()
-    elif not SKIP_TIME_ANALYSIS:
-        ...
-    else:
-        warnings.warn('No time analysis action selected. Skipped.')
+    procedure = build_pipeline_container()
+    procedure.run()


 if __name__ == '__main__':
--- a/scripts/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
+++ b/scripts/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
--- a/scripts/dashboard/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl
+++ b/scripts/dashboard/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl
--- a/scripts/dashboard/app.py
+++ b/scripts/dashboard/app.py
@@ -1,190 +0,0 @@
-import time
-import webbrowser
-from pathlib import Path
-from threading import Thread
-from typing import cast
-
-import pandas as pd
-import plotly.express as px
-from dash import (
-    Dash,
-    Input,
-    Output,
-    State,
-    callback,
-    dash_table,
-    dcc,
-    html,
-)
-from lang_main.io import load_pickle
-from lang_main.types import ObjectID, TimelineCandidates
-from pandas import DataFrame
-
-# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
-
-# ** data
-p_df = Path(r'./Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
-p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
-ret = cast(DataFrame, load_pickle(p_df))
-data = ret[0]
-ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
-cands = ret[0]
-texts = ret[1]
-
-# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
-# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
-# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
-# data = cast(DataFrame, load_pickle(p_df))
-# cands = cast(TimelineCandidates, load_pickle(p_cands))
-# texts = cast(dict[ObjectID, str], load_pickle(p_map))
-
-table_feats = [
-    'ErstellungsDatum',
-    'ErledigungsDatum',
-    'VorgangsTypName',
-    'VorgangsBeschreibung',
-]
-table_feats_dates = [
-    'ErstellungsDatum',
-    'ErledigungsDatum',
-]
-
-# ** graph config
-markers = {
-    'size': 12,
-    'color': 'yellow',
-    'line': {
-        'width': 2,
-        'color': 'red',
-    },
-}
-hover_data = {
-    'ErstellungsDatum': '|%d.%m.%Y',
-    'VorgangsBeschreibung': True,
-}
-
-
-app = Dash(prevent_initial_callbacks=True)
-
-app.layout = [
-    html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
-    html.Div(
-        children=[
-            html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
-            dcc.Dropdown(
-                list(cands.keys()),
-                id='dropdown-selection',
-                placeholder='ObjektID auswählen...',
-            ),
-        ]
-    ),
-    html.Div(
-        children=[
-            html.H3(id='object_text'),
-            dcc.Dropdown(id='choice-candidates'),
-            dcc.Graph(id='graph-output'),
-        ]
-    ),
-    html.Div(children=[dash_table.DataTable(id='table-candidates')]),
-]
-
-
-@callback(
-    Output('object_text', 'children'),
-    Input('dropdown-selection', 'value'),
-    prevent_initial_call=True,
-)
-def update_obj_text(obj_id):
-    obj_id = int(obj_id)
-    obj_text = texts[obj_id]
-    headline = f'HObjektText: {obj_text}'
-    return headline
-
-
-@callback(
-    Output('choice-candidates', 'options'),
-    Input('dropdown-selection', 'value'),
-    prevent_initial_call=True,
-)
-def update_choice_candidates(obj_id):
-    obj_id = int(obj_id)
-    cands_obj_id = cands[obj_id]
-    choices = list(range(1, len(cands_obj_id) + 1))
-    return choices
-
-
-@callback(
-    Output('graph-output', 'figure'),
-    Input('choice-candidates', 'value'),
-    State('dropdown-selection', 'value'),
-    prevent_initial_call=True,
-)
-def update_timeline(index, obj_id):
-    obj_id = int(obj_id)
-    # title
-    obj_text = texts[obj_id]
-    title = f'HObjektText: {obj_text}'
-    # cands
-    cands_obj_id = cands[obj_id]
-    cands_choice = cands_obj_id[int(index) - 1]
-    # data
-    df = data.loc[list(cands_choice)].sort_index()  # type: ignore
-    # figure
-    fig = px.line(
-        data_frame=df,
-        x='ErstellungsDatum',
-        y='ObjektID',
-        title=title,
-        hover_data=hover_data,
-    )
-    fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
-    fig.update_xaxes(
-        tickformat='%B\n%Y',
-        rangeslider_visible=True,
-    )
-    fig.update_yaxes(type='category')
-    fig.update_layout(hovermode='x unified')
-    return fig
-
-
-@callback(
-    [Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
-    Input('choice-candidates', 'value'),
-    State('dropdown-selection', 'value'),
-    prevent_initial_call=True,
-)
-def update_table_candidates(index, obj_id):
-    obj_id = int(obj_id)
-    # cands
-    cands_obj_id = cands[obj_id]
-    cands_choice = cands_obj_id[int(index) - 1]
-    # data
-    df = data.loc[list(cands_choice)].sort_index()  # type: ignore
-    df = df.filter(items=table_feats, axis=1).sort_values(
-        by='ErstellungsDatum', ascending=True
-    )
-    cols = [{'name': i, 'id': i} for i in df.columns]
-    # convert dates to strings
-    for col in table_feats_dates:
-        df[col] = df[col].dt.strftime(r'%Y-%m-%d')
-
-    table_data = df.to_dict('records')
-    return table_data, cols
-
-
-def _start_webbrowser():
-    host = '127.0.0.1'
-    port = '8050'
-    adress = f'http://{host}:{port}/'
-    time.sleep(2)
-    webbrowser.open_new(adress)
-
-
-def main():
-    webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
-    webbrowser_thread.start()
-    app.run(debug=True)
-
-
-if __name__ == '__main__':
-    main()
--- a/scripts/dashboard/cyto.py
+++ b/scripts/dashboard/cyto.py
@@ -1,9 +1,9 @@
+import copy
 import time
 import webbrowser
 from pathlib import Path
 from threading import Thread
 from typing import cast
-import copy

 import dash_cytoscape as cyto
 from dash import Dash, Input, Output, State, dcc, html
@@ -30,20 +30,20 @@ app = Dash(__name__, external_stylesheets=external_stylesheets)

 cose_layout = {
    'name': 'cose',
-    'nodeOverlap': 20,
+    'nodeOverlap': 500,
    'refresh': 20,
    'fit': True,
-    'padding': 30,
-    'randomize': True,
-    'componentSpacing': 40,
-    'nodeRepulsion': 2000,
+    'padding': 20,
+    'randomize': False,
+    'componentSpacing': 1.2,
+    'nodeRepulsion': 1000,
    'edgeElasticity': 1000,
    'idealEdgeLength': 100,
    'nestingFactor': 1.2,
    'gravity': 50,
-    'numIter': 2000,
-    'initialTemp': 1000,
-    'coolingFactor': 0.95,
+    'numIter': 3000,
+    'initialTemp': 2000,
+    'coolingFactor': 0.7,
    'minTemp': 1.0,
    'nodeDimensionsIncludeLabels': True,
 }
@@ -108,9 +108,8 @@ my_stylesheet = [
    # {'selector': '.triangle', 'style': {'shape': 'triangle'}},
 ]

-app.layout = html.Div(
+layout = html.Div(
    [
-        html.Button('Trigger JS Layout', id='test_js'),
        html.Button('Trigger JS Weight', id='test_js_weight'),
        html.Div(id='output'),
        html.Div(
@@ -166,11 +165,13 @@ app.layout = html.Div(
                    style={'width': '40%'},
                ),
                html.H3('Graph'),
+                html.Button('Re-Layout', id='trigger_relayout'),
                html.Div(
                    [
                        cyto.Cytoscape(
                            id='cytoscape-graph',
                            style={'width': '100%', 'height': '600px'},
+                            layout=cose_layout,
                            stylesheet=my_stylesheet,
                            elements=cyto_data_base,
                            zoom=1,
@@ -192,6 +193,9 @@ app.layout = html.Div(
 )


+app.layout = layout
+
+
@app.callback(
    Output('cytoscape-graph', 'layout', allow_duplicate=True),
    Input('layout_choice', 'value'),
@@ -266,17 +270,17 @@ app.clientside_callback(
    """
    function(n_clicks, layout) {
        layout.edgeElasticity = function(edge) {
-            return edge.data().weight * 4;
+            return edge.data().weight * 0.05;
        };
        layout.idealEdgeLength = function(edge) {
-            return edge.data().weight * 0.8;
+            return edge.data().weight * 0.4;
        };
        cy.layout(layout).run();
        return layout;
    }
    """,
    Output('cytoscape-graph', 'layout', allow_duplicate=True),
-    Input('test_js', 'n_clicks'),
+    Input('trigger_relayout', 'n_clicks'),
    State('cytoscape-graph', 'layout'),
    prevent_initial_call=True,
 )
--- a/scripts/dashboard/cyto_2.py
+++ b/scripts/dashboard/cyto_2.py
@@ -1,368 +0,0 @@
-import json
-import os
-
-import dash
-import dash_cytoscape as cyto
-from dash import Input, Output, State, callback, dcc, html
-
-# Load extra layouts
-cyto.load_extra_layouts()
-
-
-# Display utility functions
-def _merge(a, b):
-    return dict(a, **b)
-
-
-def _omit(omitted_keys, d):
-    return {k: v for k, v in d.items() if k not in omitted_keys}
-
-
-# Custom Display Components
-def Card(children, **kwargs):
-    return html.Section(
-        children,
-        style=_merge(
-            {
-                'padding': 20,
-                'margin': 5,
-                'borderRadius': 5,
-                'border': 'thin lightgrey solid',
-                'background-color': 'white',
-                # Remove possibility to select the text for better UX
-                'user-select': 'none',
-                '-moz-user-select': 'none',
-                '-webkit-user-select': 'none',
-                '-ms-user-select': 'none',
-            },
-            kwargs.get('style', {}),
-        ),
-        **_omit(['style'], kwargs),
-    )
-
-
-def SectionTitle(title, size, align='center', color='#222'):
-    return html.Div(
-        style={'text-align': align, 'color': color},
-        children=dcc.Markdown('#' * size + ' ' + title),
-    )
-
-
-def NamedCard(title, size, children, **kwargs):
-    size = min(size, 6)
-    size = max(size, 1)
-
-    return html.Div([Card([SectionTitle(title, size, align='left')] + children, **kwargs)])
-
-
-def NamedSlider(name, **kwargs):
-    return html.Div(
-        style={'padding': '20px 10px 25px 4px'},
-        children=[
-            html.P(f'{name}:'),
-            html.Div(style={'margin-left': '6px'}, children=dcc.Slider(**kwargs)),
-        ],
-    )
-
-
-def NamedDropdown(name, **kwargs):
-    return html.Div(
-        style={'margin': '10px 0px'},
-        children=[
-            html.P(children=f'{name}:', style={'margin-left': '3px'}),
-            dcc.Dropdown(**kwargs),
-        ],
-    )
-
-
-def NamedRadioItems(name, **kwargs):
-    return html.Div(
-        style={'padding': '20px 10px 25px 4px'},
-        children=[html.P(children=f'{name}:'), dcc.RadioItems(**kwargs)],
-    )
-
-
-def NamedInput(name, **kwargs):
-    return html.Div(children=[html.P(children=f'{name}:'), dcc.Input(**kwargs)])
-
-
-# Utils
-def DropdownOptionsList(*args):
-    return [{'label': val.capitalize(), 'value': val} for val in args]
-
-
-asset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'assets')
-
-app = dash.Dash(__name__, assets_folder=asset_path)
-server = app.server
-
-
-# ###################### DATA PREPROCESSING ######################
-# Load data
-with open('sample_network.txt', 'r', encoding='utf-8') as f:
-    network_data = f.read().split('\n')
-
-# We select the first 750 edges and associated nodes for an easier visualization
-edges = network_data[:750]
-nodes = set()
-
-following_node_di = {}  # user id -> list of users they are following
-following_edges_di = {}  # user id -> list of cy edges starting from user id
-
-followers_node_di = {}  # user id -> list of followers (cy_node format)
-followers_edges_di = {}  # user id -> list of cy edges ending at user id
-
-cy_edges = []
-cy_nodes = []
-
-for edge in edges:
-    if ' ' not in edge:
-        continue
-
-    source, target = edge.split(' ')
-
-    cy_edge = {'data': {'id': source + target, 'source': source, 'target': target}}
-    cy_target = {'data': {'id': target, 'label': 'User #' + str(target[-5:])}}
-    cy_source = {'data': {'id': source, 'label': 'User #' + str(source[-5:])}}
-
-    if source not in nodes:
-        nodes.add(source)
-        cy_nodes.append(cy_source)
-    if target not in nodes:
-        nodes.add(target)
-        cy_nodes.append(cy_target)
-
-    # Process dictionary of following
-    if not following_node_di.get(source):
-        following_node_di[source] = []
-    if not following_edges_di.get(source):
-        following_edges_di[source] = []
-
-    following_node_di[source].append(cy_target)
-    following_edges_di[source].append(cy_edge)
-
-    # Process dictionary of followers
-    if not followers_node_di.get(target):
-        followers_node_di[target] = []
-    if not followers_edges_di.get(target):
-        followers_edges_di[target] = []
-
-    followers_node_di[target].append(cy_source)
-    followers_edges_di[target].append(cy_edge)
-
-genesis_node = cy_nodes[0]
-genesis_node['classes'] = 'genesis'
-default_elements = [genesis_node]
-
-default_stylesheet = [
-    {'selector': 'node', 'style': {'opacity': 0.65, 'z-index': 9999}},
-    {
-        'selector': 'edge',
-        'style': {'curve-style': 'bezier', 'opacity': 0.45, 'z-index': 5000},
-    },
-    {'selector': '.followerNode', 'style': {'background-color': '#0074D9'}},
-    {
-        'selector': '.followerEdge',
-        'style': {
-            'mid-target-arrow-color': 'blue',
-            'mid-target-arrow-shape': 'vee',
-            'line-color': '#0074D9',
-        },
-    },
-    {'selector': '.followingNode', 'style': {'background-color': '#FF4136'}},
-    {
-        'selector': '.followingEdge',
-        'style': {
-            'mid-target-arrow-color': 'red',
-            'mid-target-arrow-shape': 'vee',
-            'line-color': '#FF4136',
-        },
-    },
-    {
-        'selector': '.genesis',
-        'style': {
-            'background-color': '#B10DC9',
-            'border-width': 2,
-            'border-color': 'purple',
-            'border-opacity': 1,
-            'opacity': 1,
-            'label': 'data(label)',
-            'color': '#B10DC9',
-            'text-opacity': 1,
-            'font-size': 12,
-            'z-index': 9999,
-        },
-    },
-    {
-        'selector': ':selected',
-        'style': {
-            'border-width': 2,
-            'border-color': 'black',
-            'border-opacity': 1,
-            'opacity': 1,
-            'label': 'data(label)',
-            'color': 'black',
-            'font-size': 12,
-            'z-index': 9999,
-        },
-    },
-]
-
-# ################################# APP LAYOUT ################################
-styles = {
-    'json-output': {
-        'overflow-y': 'scroll',
-        'height': 'calc(50% - 25px)',
-        'border': 'thin lightgrey solid',
-    },
-    'tab': {'height': 'calc(98vh - 80px)'},
-}
-
-app.layout = html.Div(
-    [
-        html.Div(
-            className='eight columns',
-            children=[
-                cyto.Cytoscape(
-                    id='cytoscape',
-                    elements=default_elements,
-                    stylesheet=default_stylesheet,
-                    style={'height': '95vh', 'width': '100%'},
-                )
-            ],
-        ),
-        html.Div(
-            className='four columns',
-            children=[
-                dcc.Tabs(
-                    id='tabs',
-                    children=[
-                        dcc.Tab(
-                            label='Control Panel',
-                            children=[
-                                NamedDropdown(
-                                    name='Layout',
-                                    id='dropdown-layout',
-                                    options=DropdownOptionsList(
-                                        'random',
-                                        'grid',
-                                        'circle',
-                                        'concentric',
-                                        'breadthfirst',
-                                        'cose',
-                                        'cose-bilkent',
-                                        'dagre',
-                                        'cola',
-                                        'klay',
-                                        'spread',
-                                        'euler',
-                                    ),
-                                    value='grid',
-                                    clearable=False,
-                                ),
-                                NamedRadioItems(
-                                    name='Expand',
-                                    id='radio-expand',
-                                    options=DropdownOptionsList('followers', 'following'),
-                                    value='followers',
-                                ),
-                            ],
-                        ),
-                        dcc.Tab(
-                            label='JSON',
-                            children=[
-                                html.Div(
-                                    style=styles['tab'],
-                                    children=[
-                                        html.P('Node Object JSON:'),
-                                        html.Pre(
-                                            id='tap-node-json-output',
-                                            style=styles['json-output'],
-                                        ),
-                                        html.P('Edge Object JSON:'),
-                                        html.Pre(
-                                            id='tap-edge-json-output',
-                                            style=styles['json-output'],
-                                        ),
-                                    ],
-                                )
-                            ],
-                        ),
-                    ],
-                ),
-            ],
-        ),
-    ]
-)
-
-
-# ############################## CALLBACKS ####################################
-@callback(Output('tap-node-json-output', 'children'), Input('cytoscape', 'tapNode'))
-def display_tap_node(data):
-    return json.dumps(data, indent=2)
-
-
-@callback(Output('tap-edge-json-output', 'children'), Input('cytoscape', 'tapEdge'))
-def display_tap_edge(data):
-    return json.dumps(data, indent=2)
-
-
-@callback(Output('cytoscape', 'layout'), Input('dropdown-layout', 'value'))
-def update_cytoscape_layout(layout):
-    return {'name': layout}
-
-
-@callback(
-    Output('cytoscape', 'elements'),
-    Input('cytoscape', 'tapNodeData'),
-    State('cytoscape', 'elements'),
-    State('radio-expand', 'value'),
-)
-def generate_elements(nodeData, elements, expansion_mode):
-    if not nodeData:
-        return default_elements
-
-    # If the node has already been expanded, we don't expand it again
-    if nodeData.get('expanded'):
-        return elements
-
-    # This retrieves the currently selected element, and tag it as expanded
-    for element in elements:
-        if nodeData['id'] == element.get('data').get('id'):
-            element['data']['expanded'] = True
-            break
-
-    if expansion_mode == 'followers':
-        followers_nodes = followers_node_di.get(nodeData['id'])
-        followers_edges = followers_edges_di.get(nodeData['id'])
-
-        if followers_nodes:
-            for node in followers_nodes:
-                node['classes'] = 'followerNode'
-            elements.extend(followers_nodes)
-
-        if followers_edges:
-            for follower_edge in followers_edges:
-                follower_edge['classes'] = 'followerEdge'
-            elements.extend(followers_edges)
-
-    elif expansion_mode == 'following':
-        following_nodes = following_node_di.get(nodeData['id'])
-        following_edges = following_edges_di.get(nodeData['id'])
-
-        if following_nodes:
-            for node in following_nodes:
-                if node['data']['id'] != genesis_node['data']['id']:
-                    node['classes'] = 'followingNode'
-                    elements.append(node)
-
-        if following_edges:
-            for follower_edge in following_edges:
-                follower_edge['classes'] = 'followingEdge'
-            elements.extend(following_edges)
-
-    return elements
-
-
-if __name__ == '__main__':
-    app.run_server(debug=True)
--- a/scripts/dashboard/sample_network.txt
+++ b/scripts/dashboard/sample_network.txt
--- a/scripts/dashboard/timeline.py
+++ b/scripts/dashboard/timeline.py
@@ -0,0 +1,507 @@
+import time
+import webbrowser
+from pathlib import Path
+from threading import Thread
+from typing import cast
+
+import dash_cytoscape as cyto
+import pandas as pd
+import plotly.express as px
+from dash import (
+    Dash,
+    Input,
+    Output,
+    State,
+    callback,
+    dash_table,
+    dcc,
+    html,
+)
+from pandas import DataFrame
+
+from lang_main.analysis import graphs
+from lang_main.io import load_pickle
+from lang_main.types import ObjectID, TimelineCandidates
+from lang_main.analysis import tokens
+from lang_main.constants import SPCY_MODEL
+
+# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
+
+# ** data
+# p_df = Path(r'../Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
+p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
+# p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
+p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
+ret = cast(tuple[DataFrame], load_pickle(p_df))
+data = ret[0]
+ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
+cands = ret[0]
+texts = ret[1]
+
+# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
+# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
+# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
+# data = cast(DataFrame, load_pickle(p_df))
+# cands = cast(TimelineCandidates, load_pickle(p_cands))
+# texts = cast(dict[ObjectID, str], load_pickle(p_map))
+
+table_feats = [
+    'ErstellungsDatum',
+    'ErledigungsDatum',
+    'VorgangsTypName',
+    'VorgangsBeschreibung',
+]
+table_feats_dates = [
+    'ErstellungsDatum',
+    'ErledigungsDatum',
+]
+
+# ** figure config
+markers = {
+    'size': 12,
+    'color': 'yellow',
+    'line': {
+        'width': 2,
+        'color': 'red',
+    },
+}
+hover_data = {
+    'ErstellungsDatum': '|%d.%m.%Y',
+    'VorgangsBeschreibung': True,
+}
+
+# ** graphs
+target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
+p = Path(target).resolve()
+ret = load_pickle(p)
+tk_graph = cast(graphs.TokenGraph, ret[0])
+tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
+tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
+# tk_graph_filtered = tk_graph.filter_by_edge_weight(150, None)
+# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
+cyto_data_base, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
+
+MIN_WEIGHT = weight_data['min']
+MAX_WEIGHT = weight_data['max']
+
+cyto.load_extra_layouts()
+
+cose_layout = {
+    'name': 'cose',
+    'nodeOverlap': 500,
+    'refresh': 20,
+    'fit': True,
+    'padding': 20,
+    'randomize': False,
+    'componentSpacing': 1.2,
+    'nodeRepulsion': 1000,
+    'edgeElasticity': 1000,
+    'idealEdgeLength': 100,
+    'nestingFactor': 1.2,
+    'gravity': 50,
+    'numIter': 3000,
+    'initialTemp': 2000,
+    'coolingFactor': 0.7,
+    'minTemp': 1.0,
+    'nodeDimensionsIncludeLabels': True,
+}
+
+my_stylesheet = [
+    # Group selectors
+    {
+        'selector': 'node',
+        'style': {
+            'shape': 'circle',
+            'content': 'data(label)',
+            'background-color': '#B10DC9',
+            'border-width': 2,
+            'border-color': 'black',
+            'border-opacity': 1,
+            'opacity': 1,
+            'color': 'black',
+            'text-opacity': 1,
+            'font-size': 12,
+            'z-index': 9999,
+        },
+    },
+    {
+        'selector': 'edge',
+        'style': {
+            #'width': f'mapData(weight, {MIN_WEIGHT}, {MAX_WEIGHT}, 1, 10)',
+            # 'width': """function(ele) {
+            #     return ele.data('weight');
+            # """,
+            'curve-style': 'bezier',
+            'line-color': 'grey',
+            'line-style': 'solid',
+            'line-opacity': 1,
+        },
+    },
+    # Class selectors
+    # {'selector': '.red', 'style': {'background-color': 'red', 'line-color': 'red'}},
+    # {'selector': '.triangle', 'style': {'shape': 'triangle'}},
+]
+
+# ** app
+external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
+app = Dash(__name__, external_stylesheets=external_stylesheets)
+
+graph_layout = html.Div(
+    [
+        html.Button('Trigger JS Weight', id='test_js_weight'),
+        html.Button('Trigger Candidate Graph', id='cand_graph'),
+        html.Div(id='output'),
+        html.Div(
+            [
+                html.H2('Token Graph', style={'margin': 0}),
+                html.Button(
+                    'Reset Default',
+                    id='bt-reset',
+                    style={
+                        'marginLeft': 'auto',
+                        'width': '300px',
+                    },
+                ),
+            ],
+            style={
+                'display': 'flex',
+                'marginBottom': '1em',
+            },
+        ),
+        html.H3('Layout'),
+        dcc.Dropdown(
+            id='layout_choice',
+            options=[
+                'cose',
+                'cola',
+                'euler',
+                'random',
+            ],
+            value='cose',
+            clearable=False,
+        ),
+        html.Div(
+            [
+                html.H3('Graph Filter'),
+                dcc.Input(
+                    id='weight_min',
+                    type='number',
+                    min=MIN_WEIGHT,
+                    max=MAX_WEIGHT,
+                    step=1,
+                    placeholder=f'Minimum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
+                    debounce=True,
+                    style={'width': '40%'},
+                ),
+                dcc.Input(
+                    id='weight_max',
+                    type='number',
+                    min=MIN_WEIGHT,
+                    max=MAX_WEIGHT,
+                    step=1,
+                    placeholder=f'Maximum edge weight: {MIN_WEIGHT} - {MAX_WEIGHT}',
+                    debounce=True,
+                    style={'width': '40%'},
+                ),
+                html.H3('Graph'),
+                html.Button('Re-Layout', id='trigger_relayout'),
+                html.Div(
+                    [
+                        cyto.Cytoscape(
+                            id='cytoscape-graph',
+                            style={'width': '100%', 'height': '600px'},
+                            layout=cose_layout,
+                            stylesheet=my_stylesheet,
+                            elements=cyto_data_base,
+                            zoom=1,
+                        ),
+                    ],
+                    style={
+                        'border': '3px solid black',
+                        'borderRadius': '25px',
+                        'marginTop': '1em',
+                        'marginBottom': '2em',
+                        'padding': '7px',
+                    },
+                ),
+            ],
+            style={'marginTop': '1em'},
+        ),
+    ],
+)
+
+app.layout = html.Div(
+    [
+        html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
+        html.Div(
+            children=[
+                html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
+                dcc.Dropdown(
+                    list(cands.keys()),
+                    id='dropdown-selection',
+                    placeholder='ObjektID auswählen...',
+                ),
+            ]
+        ),
+        html.Div(
+            children=[
+                html.H3(id='object_text'),
+                dcc.Dropdown(id='choice-candidates'),
+                dcc.Graph(id='graph-output'),
+            ]
+        ),
+        html.Div(
+            [dash_table.DataTable(id='table-candidates')], style={'marginBottom': '2em'}
+        ),
+        graph_layout,
+    ],
+    style={'margin': '2em'},
+)
+
+
+@callback(
+    Output('object_text', 'children'),
+    Input('dropdown-selection', 'value'),
+    prevent_initial_call=True,
+)
+def update_obj_text(obj_id):
+    obj_id = int(obj_id)
+    obj_text = texts[obj_id]
+    headline = f'HObjektText: {obj_text}'
+    return headline
+
+
+@callback(
+    Output('choice-candidates', 'options'),
+    Input('dropdown-selection', 'value'),
+    prevent_initial_call=True,
+)
+def update_choice_candidates(obj_id):
+    obj_id = int(obj_id)
+    cands_obj_id = cands[obj_id]
+    choices = list(range(1, len(cands_obj_id) + 1))
+    return choices
+
+
+@callback(
+    Output('graph-output', 'figure'),
+    Input('choice-candidates', 'value'),
+    State('dropdown-selection', 'value'),
+    prevent_initial_call=True,
+)
+def update_timeline(index, obj_id):
+    obj_id = int(obj_id)
+    # title
+    obj_text = texts[obj_id]
+    title = f'HObjektText: {obj_text}'
+    # cands
+    cands_obj_id = cands[obj_id]
+    cands_choice = cands_obj_id[int(index) - 1]
+    # data
+    df = data.loc[list(cands_choice)].sort_index()  # type: ignore
+    # figure
+    fig = px.line(
+        data_frame=df,
+        x='ErstellungsDatum',
+        y='ObjektID',
+        title=title,
+        hover_data=hover_data,
+    )
+    fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
+    fig.update_xaxes(
+        tickformat='%B\n%Y',
+        rangeslider_visible=True,
+    )
+    fig.update_yaxes(type='category')
+    fig.update_layout(hovermode='x unified')
+    return fig
+
+
+@callback(
+    [Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
+    Input('choice-candidates', 'value'),
+    State('dropdown-selection', 'value'),
+    prevent_initial_call=True,
+)
+def update_table_candidates(index, obj_id):
+    # obj_id = int(obj_id)
+    # # cands
+    # cands_obj_id = cands[obj_id]
+    # cands_choice = cands_obj_id[int(index) - 1]
+    # # data
+    # df = data.loc[list(cands_choice)].sort_index()  # type: ignore
+    df = pre_filter_data(data, idx=index, obj_id=obj_id)
+    df = df.filter(items=table_feats, axis=1).sort_values(
+        by='ErstellungsDatum', ascending=True
+    )
+    cols = [{'name': i, 'id': i} for i in df.columns]
+    # convert dates to strings
+    for col in table_feats_dates:
+        df[col] = df[col].dt.strftime(r'%Y-%m-%d')
+
+    table_data = df.to_dict('records')
+    return table_data, cols
+
+
+def pre_filter_data(
+    data: DataFrame,
+    idx: int,
+    obj_id: ObjectID,
+) -> DataFrame:
+    obj_id = int(obj_id)
+    data = data.copy()
+    # cands
+    cands_obj_id = cands[obj_id]
+    cands_choice = cands_obj_id[int(idx) - 1]
+    # data
+    data = data.loc[list(cands_choice)].sort_index()  # type: ignore
+
+    return data
+
+
+# ** graph
+@app.callback(
+    Output('cytoscape-graph', 'elements', allow_duplicate=True),
+    Output('weight_min', 'min', allow_duplicate=True),
+    Output('weight_min', 'max', allow_duplicate=True),
+    Output('weight_min', 'placeholder', allow_duplicate=True),
+    Output('weight_max', 'min', allow_duplicate=True),
+    Output('weight_max', 'max', allow_duplicate=True),
+    Output('weight_max', 'placeholder', allow_duplicate=True),
+    Input('cand_graph', 'n_clicks'),
+    State('choice-candidates', 'value'),
+    State('dropdown-selection', 'value'),
+    prevent_initial_call=True,
+)
+def update_graph_candidates(_, index, obj_id):
+    df = pre_filter_data(data, idx=index, obj_id=obj_id)
+    tk_graph_cands, _ = tokens.build_token_graph(
+        data=df,
+        model=SPCY_MODEL,
+        target_feature='VorgangsBeschreibung',
+        build_map=False,
+    )
+    cyto_data, weight_info = graphs.convert_graph_to_cytoscape(tk_graph_cands)
+    weight_min = weight_info['min']
+    weight_max = weight_info['max']
+    placeholder_min = f'Minimum edge weight: {weight_min} - {weight_max}'
+    placeholder_max = f'Minimum edge weight: {weight_min} - {weight_max}'
+    return (
+        cyto_data,
+        weight_min,
+        weight_max,
+        placeholder_min,
+        weight_min,
+        weight_max,
+        placeholder_max,
+    )
+
+
+@app.callback(
+    Output('cytoscape-graph', 'layout', allow_duplicate=True),
+    Input('layout_choice', 'value'),
+    prevent_initial_call=True,
+)
+def update_layout_internal(layout_choice):
+    # return {'name': layout_choice}
+    return cose_layout
+    # return cose_bilkent_layout
+    # return cola_layout
+
+
+@app.callback(
+    Output('cytoscape-graph', 'zoom'),
+    Output('cytoscape-graph', 'elements', allow_duplicate=True),
+    Output('weight_min', 'value'),
+    Output('weight_max', 'value'),
+    Input('bt-reset', 'n_clicks'),
+    prevent_initial_call=True,
+)
+def reset_layout(n_clicks):
+    return (1, cyto_data_base, None, None)
+
+
+# update edge weight
+@app.callback(
+    Output('cytoscape-graph', 'elements', allow_duplicate=True),
+    Input('weight_min', 'value'),
+    Input('weight_max', 'value'),
+    prevent_initial_call=True,
+)
+def update_edge_weight(weight_min, weight_max):
+    if not any([weight_min, weight_max]):
+        return cyto_data_base
+
+    if weight_min is None:
+        weight_min = MIN_WEIGHT
+    if weight_max is None:
+        weight_max = MAX_WEIGHT
+    tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, weight_min, weight_max)
+    # tk_graph_filtered = tk_graph.filter_by_edge_weight(weight_min, weight_max)
+    tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
+    # tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
+    cyto_data, _ = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
+    return cyto_data
+
+
+app.clientside_callback(
+    """
+    function(n_clicks, layout) {
+        layout.edgeElasticity = function(edge) {
+            return edge.data().weight * 0.05;
+        };
+        layout.idealEdgeLength = function(edge) {
+            return edge.data().weight * 0.4;
+        };
+        cy.layout(layout).run();
+        return layout;
+    }
+    """,
+    Output('cytoscape-graph', 'layout', allow_duplicate=True),
+    Input('trigger_relayout', 'n_clicks'),
+    State('cytoscape-graph', 'layout'),
+    prevent_initial_call=True,
+)
+
+app.clientside_callback(
+    """
+    function(n_clicks, stylesheet) {
+        function edge_weight(ele) {
+            let threshold = 1000;
+            let weight = ele.data('weight');
+            if (weight > threshold) {
+                weight = 12;
+            } else {
+                weight = weight / threshold * 10;
+                weight = Math.max(1, weight);
+            }
+            return weight;
+        }
+        stylesheet[1].style.width = edge_weight;
+        cy.style(stylesheet).update();
+        return stylesheet;
+    }
+    """,
+    Output('cytoscape-graph', 'stylesheet'),
+    Input('test_js_weight', 'n_clicks'),
+    State('cytoscape-graph', 'stylesheet'),
+    prevent_initial_call=False,
+)
+
+
+def _start_webbrowser():
+    host = '127.0.0.1'
+    port = '8050'
+    adress = f'http://{host}:{port}/'
+    time.sleep(2)
+    webbrowser.open_new(adress)
+
+
+def main():
+    webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
+    webbrowser_thread.start()
+    app.run(debug=True)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/lang_main_config
+++ b/scripts/lang_main_config
@@ -1,38 +0,0 @@
-# lang_main: Config file
-
-[paths]
-inputs = 'A:/Arbeitsaufgaben/lang-main/scripts'
-results = 'A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/'
-dataset = 'A:/Arbeitsaufgaben/lang-main/data/02_202307/Export4.csv'
-#results = './results/Export7/'
-#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
-#results = './results/Export7_trunc/'
-#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
-
-[control]
-preprocessing = true
-preprocessing_skip = false
-token_analysis = false
-token_analysis_skip = true
-graph_postprocessing = false
-graph_postprocessing_skip = true
-
-#[export_filenames]
-#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
-
-[preprocess]
-filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
-date_cols = [
-    "VorgangsDatum", 
-    "ErledigungsDatum", 
-    "Arbeitsbeginn", 
-    "ErstellungsDatum",
-]
-threshold_amount_characters = 5
-threshold_similarity = 0.8
-
-[graph_postprocessing]
-threshold_edge_weight = 150
-
-[time_analysis]
-threshold_unique_texts = 5
--- a/scripts/lang_main_config.toml
+++ b/scripts/lang_main_config.toml
@@ -2,22 +2,20 @@

 [paths]
 inputs = './inputs/'
-results = './results/test_20240529/'
+results = './results/test_20240619/'
 dataset = '../data/02_202307/Export4.csv'
 #results = './results/Export7/'
 #dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
 #results = './results/Export7_trunc/'
 #dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'

+# only debugging features, production-ready pipelines should always
+# be fully executed
 [control]
-preprocessing = false
-preprocessing_skip = false
-token_analysis = true
-token_analysis_skip = false
-graph_postprocessing = false
+preprocessing_skip = true
+token_analysis_skip = true
 graph_postprocessing_skip = true
-time_analysis = false
-time_analysis_skip = true
+time_analysis_skip = false

 #[export_filenames]
 #filename_cossim_filter_candidates = 'CosSim-FilterCandidates'