using dash-cytoscape

2024-06-05 16:37:23 +02:00 · 2024-06-05 16:37:23 +02:00 · b3e35e7dd1
commit b3e35e7dd1
parent b3cc012791
33 changed files with 12332 additions and 110 deletions
--- a/pdm.lock
+++ b/pdm.lock
@ -5,7 +5,7 @@
 groups = ["default", "notebooks", "trials"]
 strategy = ["cross_platform", "inherit_metadata"]
 lock_version = "4.4.1"
-content_hash = "sha256:7574154c6728ede3eaf76a8b1a3b5d4339fcc8f2dc8c41042401004b6583e151"
+content_hash = "sha256:8781981bde2786c60273cd73599f4ab6a388d0b435484d5ba0afa0656723dd98"

 [[package]]
 name = "annotated-types"
@ -432,6 +432,19 @@ files = [
    {file = "dash_core_components-2.0.0.tar.gz", hash = "sha256:c6733874af975e552f95a1398a16c2ee7df14ce43fa60bb3718a3c6e0b63ffee"},
 ]

+[[package]]
+name = "dash-cytoscape"
+version = "1.0.1"
+requires_python = ">=3.8"
+summary = "A Component Library for Dash aimed at facilitating network visualization in Python, wrapped around Cytoscape.js"
+groups = ["trials"]
+dependencies = [
+    "dash",
+]
+files = [
+    {file = "dash_cytoscape-1.0.1.tar.gz", hash = "sha256:1bcd1587b2d8b432945585e2295e76393d3eb829f606c198693cd2b45bea6adc"},
+]
+
 [[package]]
 name = "dash-html-components"
 version = "2.0.0"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -33,6 +33,7 @@ notebooks = [
 trials = [
    "plotly>=5.22.0",
    "dash>=2.17.0",
+    "dash-cytoscape>=1.0.1",
 ]

 [tool.ruff]
--- a/scripts/analyse_dataset.py
+++ b/scripts/analyse_dataset.py
@ -28,6 +28,8 @@ from lang_main.pipelines.predefined import (
 )
 from lang_main.types import (
    ObjectID,
+    PandasIndex,
+    SpacyDoc,
    TimelineCandidates,
 )
 from pandas import DataFrame, Series
@ -37,7 +39,7 @@ from pandas import DataFrame, Series
 def run_preprocessing() -> DataFrame:
    create_saving_folder(
        saving_path_folder=SAVE_PATH_FOLDER,
-        overwrite_existing=True,
+        overwrite_existing=False,
    )
    # run pipelines
    ret = typing.cast(
@ -56,15 +58,16 @@ def run_preprocessing() -> DataFrame:

 def run_token_analysis(
    preprocessed_data: DataFrame,
-) -> TokenGraph:
+) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
    # build token graph
-    (tk_graph,) = typing.cast(
-        tuple[TokenGraph], pipe_token_analysis.run(starting_values=(preprocessed_data,))
+    (tk_graph, docs_mapping) = typing.cast(
+        tuple[TokenGraph, dict[PandasIndex, SpacyDoc]],
+        pipe_token_analysis.run(starting_values=(preprocessed_data,)),
    )
    tk_graph.save_graph(SAVE_PATH_FOLDER, directed=False)
    tk_graph.to_pickle(SAVE_PATH_FOLDER, filename=f'{pipe_token_analysis.name}-TokenGraph')

-    return tk_graph
+    return tk_graph, docs_mapping


 def run_graph_postprocessing(
@ -127,9 +130,9 @@ def main() -> None:
                'Preprocessing step skipped. Token analysis cannot be performed.'
            )
        preprocessed_data_trunc = typing.cast(
-            DataFrame, preprocessed_data[['entry', 'num_occur']].copy()
+            DataFrame, preprocessed_data[['batched_idxs', 'entry', 'num_occur']].copy()
        )  # type: ignore
-        tk_graph = run_token_analysis(preprocessed_data_trunc)
+        tk_graph, docs_mapping = run_token_analysis(preprocessed_data_trunc)
    elif not SKIP_TOKEN_ANALYSIS:
        # !! hardcoded result filenames
        # whole graph
--- a/test-notebooks/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
+++ b/test-notebooks/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
--- a/test-notebooks/dashboard/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl
+++ b/test-notebooks/dashboard/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl
--- a/test-notebooks/dashboard/app.py
+++ b/test-notebooks/dashboard/app.py
@ -16,7 +16,6 @@ from dash import (
    dcc,
    html,
 )
-from lang_main import CALLER_PATH
 from lang_main.io import load_pickle
 from lang_main.types import ObjectID, TimelineCandidates
 from pandas import DataFrame
@ -24,12 +23,8 @@ from pandas import DataFrame
 # df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')

 # ** data
-# p_df = Path(r'.\test-notebooks\dashboard\Pipe-TargetFeature_Step-3_remove_NA.pkl')
-p_df = CALLER_PATH.joinpath('./Pipe-TargetFeature_Step-3_remove_NA.pkl')
-# p_tl = Path(
-#     r'.\test-notebooks\dashboard\Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl'
-# )
-p_tl = CALLER_PATH.joinpath('./Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl')
+p_df = Path(r'./Pipe-TargetFeature_Step-3_remove_NA.pkl').resolve()
+p_tl = Path(r'/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl').resolve()
 ret = cast(DataFrame, load_pickle(p_df))
 data = ret[0]
 ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
@ -133,7 +128,7 @@ def update_timeline(index, obj_id):
    cands_obj_id = cands[obj_id]
    cands_choice = cands_obj_id[int(index) - 1]
    # data
-    df = data.loc[list(cands_choice)].sort_index()
+    df = data.loc[list(cands_choice)].sort_index()  # type: ignore
    # figure
    fig = px.line(
        data_frame=df,
@ -164,7 +159,7 @@ def update_table_candidates(index, obj_id):
    cands_obj_id = cands[obj_id]
    cands_choice = cands_obj_id[int(index) - 1]
    # data
-    df = data.loc[list(cands_choice)].sort_index()
+    df = data.loc[list(cands_choice)].sort_index()  # type: ignore
    df = df.filter(items=table_feats, axis=1).sort_values(
        by='ErstellungsDatum', ascending=True
    )
--- a/test-notebooks/dashboard/archive/data.pkl
+++ b/test-notebooks/dashboard/archive/data.pkl
--- a/test-notebooks/dashboard/archive/map_candidates.pkl
+++ b/test-notebooks/dashboard/archive/map_candidates.pkl
--- a/test-notebooks/dashboard/archive/map_texts.pkl
+++ b/test-notebooks/dashboard/archive/map_texts.pkl
--- a/scripts/dashboard/cyto.py
+++ b/scripts/dashboard/cyto.py
@ -0,0 +1,203 @@
+import time
+import webbrowser
+from pathlib import Path
+from threading import Thread
+from typing import cast
+
+import dash_cytoscape as cyto
+import lang_main.io
+from dash import Dash, Input, Output, State, dcc, html
+from lang_main.analysis import graphs
+
+target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
+p = Path(target).resolve()
+ret = lang_main.io.load_pickle(p)
+tk_graph = cast(graphs.TokenGraph, ret[0])
+tk_graph_filtered = tk_graph.filter_by_edge_weight(150)
+tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)
+cyto_data, weight_data = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
+
+MIN_WEIGHT = weight_data['min']
+MAX_WEIGHT = weight_data['max']
+
+
+cyto.load_extra_layouts()
+app = Dash(__name__)
+
+my_stylesheet = [
+    # Group selectors
+    {
+        'selector': 'node',
+        'style': {
+            'shape': 'circle',
+            'content': 'data(label)',
+            'background-color': '#B10DC9',
+            'border-width': 2,
+            'border-color': 'black',
+            'border-opacity': 1,
+            'opacity': 1,
+            'color': 'black',
+            'text-opacity': 1,
+            'font-size': 12,
+            'z-index': 9999,
+        },
+    },
+    {
+        'selector': 'edge',
+        'style': {
+            'width': 2,
+            'curve-style': 'bezier',
+            'line-color': 'grey',
+            'line-style': 'solid',
+            'line-opacity': 1,
+        },
+    },
+    # Class selectors
+    # {'selector': '.red', 'style': {'background-color': 'red', 'line-color': 'red'}},
+    # {'selector': '.triangle', 'style': {'shape': 'triangle'}},
+]
+
+app.layout = html.Div(
+    [
+        html.Button('Reset', id='bt-reset'),
+        dcc.Dropdown(
+            id='layout_choice_internal',
+            options=[
+                'random',
+                'grid',
+                'circle',
+                'concentric',
+                'breadthfirst',
+                'cose',
+            ],
+            value='cose',
+            clearable=False,
+        ),
+        dcc.Dropdown(
+            id='layout_choice_external',
+            options=[
+                'cose-bilkent',
+                'cola',
+                'euler',
+                'spread',
+                'dagre',
+                'klay',
+            ],
+            clearable=False,
+        ),
+        dcc.RangeSlider(
+            id='weight_slider',
+            min=MIN_WEIGHT,
+            max=MAX_WEIGHT,
+            step=1000,
+        ),
+        cyto.Cytoscape(
+            id='cytoscape-graph',
+            layout={'name': 'cose'},
+            style={'width': '100%', 'height': '600px'},
+            stylesheet=my_stylesheet,
+            elements=cyto_data,
+            zoom=1,
+        ),
+    ]
+)
+
+
+@app.callback(
+    Output('cytoscape-graph', 'layout', allow_duplicate=True),
+    Input('layout_choice_internal', 'value'),
+    prevent_initial_call=True,
+)
+def update_layout_internal(layout_choice):
+    return {'name': layout_choice}
+
+
+@app.callback(
+    Output('cytoscape-graph', 'layout', allow_duplicate=True),
+    Input('layout_choice_external', 'value'),
+    prevent_initial_call=True,
+)
+def update_layout_external(layout_choice):
+    return {'name': layout_choice}
+
+
+@app.callback(
+    Output('cytoscape-graph', 'zoom'),
+    Output('cytoscape-graph', 'elements'),
+    Input('bt-reset', 'n_clicks'),
+    prevent_initial_call=True,
+)
+def reset_layout(n_clicks):
+    return (1, cyto_data)
+
+
+# @app.callback(
+#     Output('cytoscape-graph', 'stylesheet'),
+#     Input('weight_slider', 'value'),
+#     State('cytoscape-graph', 'stylesheet'),
+#     prevent_initial_call=True,
+# )
+# def select_weight(range_chosen, stylesheet):
+#     min_weight, max_weight = range_chosen
+#     new_stylesheet = stylesheet.copy()
+#     new_stylesheet.append(
+#         {
+#             'selector': f'[weight >= {min_weight}]',
+#             'style': {'line-color': 'blue', 'line-style': 'dashed'},
+#         }
+#     )
+#     new_stylesheet.append(
+#         {
+#             'selector': f'[weight <= {max_weight}]',
+#             'style': {'line-color': 'blue', 'line-style': 'dashed'},
+#         }
+#     )
+#     return new_stylesheet
+
+
+# app.layout = html.Div(
+#     [
+#         cyto.Cytoscape(
+#             id='cytoscape-two-nodes',
+#             layout={'name': 'preset'},
+#             style={'width': '100%', 'height': '400px'},
+#             stylesheet=my_stylesheet,
+#             elements=[
+#                 {
+#                     'data': {
+#                         'id': 'one',
+#                         'label': 'Titel 1',
+#                     },
+#                     'position': {'x': 75, 'y': 75},
+#                     'grabbable': False,
+#                     #'locked': True,
+#                     'classes': 'red',
+#                 },
+#                 {
+#                     'data': {'id': 'two', 'label': 'Title 2'},
+#                     'position': {'x': 200, 'y': 200},
+#                     'classes': 'triangle',
+#                 },
+#                 {'data': {'source': 'one', 'target': 'two', 'weight': 2000}},
+#             ],
+#         )
+#     ]
+# )
+
+
+def _start_webbrowser():
+    host = '127.0.0.1'
+    port = '8050'
+    adress = f'http://{host}:{port}/'
+    time.sleep(2)
+    webbrowser.open_new(adress)
+
+
+def main():
+    webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
+    webbrowser_thread.start()
+    app.run(debug=True)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/dashboard/cyto_2.py
+++ b/scripts/dashboard/cyto_2.py
@ -0,0 +1,368 @@
+import json
+import os
+
+import dash
+import dash_cytoscape as cyto
+from dash import Input, Output, State, callback, dcc, html
+
+# Load extra layouts
+cyto.load_extra_layouts()
+
+
+# Display utility functions
+def _merge(a, b):
+    return dict(a, **b)
+
+
+def _omit(omitted_keys, d):
+    return {k: v for k, v in d.items() if k not in omitted_keys}
+
+
+# Custom Display Components
+def Card(children, **kwargs):
+    return html.Section(
+        children,
+        style=_merge(
+            {
+                'padding': 20,
+                'margin': 5,
+                'borderRadius': 5,
+                'border': 'thin lightgrey solid',
+                'background-color': 'white',
+                # Remove possibility to select the text for better UX
+                'user-select': 'none',
+                '-moz-user-select': 'none',
+                '-webkit-user-select': 'none',
+                '-ms-user-select': 'none',
+            },
+            kwargs.get('style', {}),
+        ),
+        **_omit(['style'], kwargs),
+    )
+
+
+def SectionTitle(title, size, align='center', color='#222'):
+    return html.Div(
+        style={'text-align': align, 'color': color},
+        children=dcc.Markdown('#' * size + ' ' + title),
+    )
+
+
+def NamedCard(title, size, children, **kwargs):
+    size = min(size, 6)
+    size = max(size, 1)
+
+    return html.Div([Card([SectionTitle(title, size, align='left')] + children, **kwargs)])
+
+
+def NamedSlider(name, **kwargs):
+    return html.Div(
+        style={'padding': '20px 10px 25px 4px'},
+        children=[
+            html.P(f'{name}:'),
+            html.Div(style={'margin-left': '6px'}, children=dcc.Slider(**kwargs)),
+        ],
+    )
+
+
+def NamedDropdown(name, **kwargs):
+    return html.Div(
+        style={'margin': '10px 0px'},
+        children=[
+            html.P(children=f'{name}:', style={'margin-left': '3px'}),
+            dcc.Dropdown(**kwargs),
+        ],
+    )
+
+
+def NamedRadioItems(name, **kwargs):
+    return html.Div(
+        style={'padding': '20px 10px 25px 4px'},
+        children=[html.P(children=f'{name}:'), dcc.RadioItems(**kwargs)],
+    )
+
+
+def NamedInput(name, **kwargs):
+    return html.Div(children=[html.P(children=f'{name}:'), dcc.Input(**kwargs)])
+
+
+# Utils
+def DropdownOptionsList(*args):
+    return [{'label': val.capitalize(), 'value': val} for val in args]
+
+
+asset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'assets')
+
+app = dash.Dash(__name__, assets_folder=asset_path)
+server = app.server
+
+
+# ###################### DATA PREPROCESSING ######################
+# Load data
+with open('sample_network.txt', 'r', encoding='utf-8') as f:
+    network_data = f.read().split('\n')
+
+# We select the first 750 edges and associated nodes for an easier visualization
+edges = network_data[:750]
+nodes = set()
+
+following_node_di = {}  # user id -> list of users they are following
+following_edges_di = {}  # user id -> list of cy edges starting from user id
+
+followers_node_di = {}  # user id -> list of followers (cy_node format)
+followers_edges_di = {}  # user id -> list of cy edges ending at user id
+
+cy_edges = []
+cy_nodes = []
+
+for edge in edges:
+    if ' ' not in edge:
+        continue
+
+    source, target = edge.split(' ')
+
+    cy_edge = {'data': {'id': source + target, 'source': source, 'target': target}}
+    cy_target = {'data': {'id': target, 'label': 'User #' + str(target[-5:])}}
+    cy_source = {'data': {'id': source, 'label': 'User #' + str(source[-5:])}}
+
+    if source not in nodes:
+        nodes.add(source)
+        cy_nodes.append(cy_source)
+    if target not in nodes:
+        nodes.add(target)
+        cy_nodes.append(cy_target)
+
+    # Process dictionary of following
+    if not following_node_di.get(source):
+        following_node_di[source] = []
+    if not following_edges_di.get(source):
+        following_edges_di[source] = []
+
+    following_node_di[source].append(cy_target)
+    following_edges_di[source].append(cy_edge)
+
+    # Process dictionary of followers
+    if not followers_node_di.get(target):
+        followers_node_di[target] = []
+    if not followers_edges_di.get(target):
+        followers_edges_di[target] = []
+
+    followers_node_di[target].append(cy_source)
+    followers_edges_di[target].append(cy_edge)
+
+genesis_node = cy_nodes[0]
+genesis_node['classes'] = 'genesis'
+default_elements = [genesis_node]
+
+default_stylesheet = [
+    {'selector': 'node', 'style': {'opacity': 0.65, 'z-index': 9999}},
+    {
+        'selector': 'edge',
+        'style': {'curve-style': 'bezier', 'opacity': 0.45, 'z-index': 5000},
+    },
+    {'selector': '.followerNode', 'style': {'background-color': '#0074D9'}},
+    {
+        'selector': '.followerEdge',
+        'style': {
+            'mid-target-arrow-color': 'blue',
+            'mid-target-arrow-shape': 'vee',
+            'line-color': '#0074D9',
+        },
+    },
+    {'selector': '.followingNode', 'style': {'background-color': '#FF4136'}},
+    {
+        'selector': '.followingEdge',
+        'style': {
+            'mid-target-arrow-color': 'red',
+            'mid-target-arrow-shape': 'vee',
+            'line-color': '#FF4136',
+        },
+    },
+    {
+        'selector': '.genesis',
+        'style': {
+            'background-color': '#B10DC9',
+            'border-width': 2,
+            'border-color': 'purple',
+            'border-opacity': 1,
+            'opacity': 1,
+            'label': 'data(label)',
+            'color': '#B10DC9',
+            'text-opacity': 1,
+            'font-size': 12,
+            'z-index': 9999,
+        },
+    },
+    {
+        'selector': ':selected',
+        'style': {
+            'border-width': 2,
+            'border-color': 'black',
+            'border-opacity': 1,
+            'opacity': 1,
+            'label': 'data(label)',
+            'color': 'black',
+            'font-size': 12,
+            'z-index': 9999,
+        },
+    },
+]
+
+# ################################# APP LAYOUT ################################
+styles = {
+    'json-output': {
+        'overflow-y': 'scroll',
+        'height': 'calc(50% - 25px)',
+        'border': 'thin lightgrey solid',
+    },
+    'tab': {'height': 'calc(98vh - 80px)'},
+}
+
+app.layout = html.Div(
+    [
+        html.Div(
+            className='eight columns',
+            children=[
+                cyto.Cytoscape(
+                    id='cytoscape',
+                    elements=default_elements,
+                    stylesheet=default_stylesheet,
+                    style={'height': '95vh', 'width': '100%'},
+                )
+            ],
+        ),
+        html.Div(
+            className='four columns',
+            children=[
+                dcc.Tabs(
+                    id='tabs',
+                    children=[
+                        dcc.Tab(
+                            label='Control Panel',
+                            children=[
+                                NamedDropdown(
+                                    name='Layout',
+                                    id='dropdown-layout',
+                                    options=DropdownOptionsList(
+                                        'random',
+                                        'grid',
+                                        'circle',
+                                        'concentric',
+                                        'breadthfirst',
+                                        'cose',
+                                        'cose-bilkent',
+                                        'dagre',
+                                        'cola',
+                                        'klay',
+                                        'spread',
+                                        'euler',
+                                    ),
+                                    value='grid',
+                                    clearable=False,
+                                ),
+                                NamedRadioItems(
+                                    name='Expand',
+                                    id='radio-expand',
+                                    options=DropdownOptionsList('followers', 'following'),
+                                    value='followers',
+                                ),
+                            ],
+                        ),
+                        dcc.Tab(
+                            label='JSON',
+                            children=[
+                                html.Div(
+                                    style=styles['tab'],
+                                    children=[
+                                        html.P('Node Object JSON:'),
+                                        html.Pre(
+                                            id='tap-node-json-output',
+                                            style=styles['json-output'],
+                                        ),
+                                        html.P('Edge Object JSON:'),
+                                        html.Pre(
+                                            id='tap-edge-json-output',
+                                            style=styles['json-output'],
+                                        ),
+                                    ],
+                                )
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        ),
+    ]
+)
+
+
+# ############################## CALLBACKS ####################################
+@callback(Output('tap-node-json-output', 'children'), Input('cytoscape', 'tapNode'))
+def display_tap_node(data):
+    return json.dumps(data, indent=2)
+
+
+@callback(Output('tap-edge-json-output', 'children'), Input('cytoscape', 'tapEdge'))
+def display_tap_edge(data):
+    return json.dumps(data, indent=2)
+
+
+@callback(Output('cytoscape', 'layout'), Input('dropdown-layout', 'value'))
+def update_cytoscape_layout(layout):
+    return {'name': layout}
+
+
+@callback(
+    Output('cytoscape', 'elements'),
+    Input('cytoscape', 'tapNodeData'),
+    State('cytoscape', 'elements'),
+    State('radio-expand', 'value'),
+)
+def generate_elements(nodeData, elements, expansion_mode):
+    if not nodeData:
+        return default_elements
+
+    # If the node has already been expanded, we don't expand it again
+    if nodeData.get('expanded'):
+        return elements
+
+    # This retrieves the currently selected element, and tag it as expanded
+    for element in elements:
+        if nodeData['id'] == element.get('data').get('id'):
+            element['data']['expanded'] = True
+            break
+
+    if expansion_mode == 'followers':
+        followers_nodes = followers_node_di.get(nodeData['id'])
+        followers_edges = followers_edges_di.get(nodeData['id'])
+
+        if followers_nodes:
+            for node in followers_nodes:
+                node['classes'] = 'followerNode'
+            elements.extend(followers_nodes)
+
+        if followers_edges:
+            for follower_edge in followers_edges:
+                follower_edge['classes'] = 'followerEdge'
+            elements.extend(followers_edges)
+
+    elif expansion_mode == 'following':
+        following_nodes = following_node_di.get(nodeData['id'])
+        following_edges = following_edges_di.get(nodeData['id'])
+
+        if following_nodes:
+            for node in following_nodes:
+                if node['data']['id'] != genesis_node['data']['id']:
+                    node['classes'] = 'followingNode'
+                    elements.append(node)
+
+        if following_edges:
+            for follower_edge in following_edges:
+                follower_edge['classes'] = 'followingEdge'
+            elements.extend(following_edges)
+
+    return elements
+
+
+if __name__ == '__main__':
+    app.run_server(debug=True)
--- a/test-notebooks/dashboard/lang_main_config.toml
+++ b/test-notebooks/dashboard/lang_main_config.toml
--- a/test-notebooks/dashboard/new/Pipe-TargetFeature_Step-3_remove_NA.pkl
+++ b/test-notebooks/dashboard/new/Pipe-TargetFeature_Step-3_remove_NA.pkl
--- a/test-notebooks/dashboard/new/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl
+++ b/test-notebooks/dashboard/new/Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl
--- a/scripts/dashboard/sample_network.txt
+++ b/scripts/dashboard/sample_network.txt
--- a/scripts/lang_main_config.toml
+++ b/scripts/lang_main_config.toml
@ -10,14 +10,14 @@ dataset = '../data/02_202307/Export4.csv'
 #dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'

 [control]
-preprocessing = true
-preprocessing_skip = true
-token_analysis = false
-token_analysis_skip = true
+preprocessing = false
+preprocessing_skip = false
+token_analysis = true
+token_analysis_skip = false
 graph_postprocessing = false
 graph_postprocessing_skip = true
-time_analysis = true
-time_analysis_skip = false
+time_analysis = false
+time_analysis_skip = true

 #[export_filenames]
 #filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
--- a/scripts/pre_test_examples.py
+++ b/scripts/pre_test_examples.py
@ -1,9 +1,15 @@
+from pathlib import Path
+
 from lang_main.constants import (
    INPUT_PATH_FOLDER,
    PATH_TO_DATASET,
    SAVE_PATH_FOLDER,
+    input_path_conf,
 )

 print(SAVE_PATH_FOLDER, '\n')
 print(INPUT_PATH_FOLDER, '\n')
 print(PATH_TO_DATASET, '\n')
+
+print('------------------------')
+print(Path.cwd(), '\n', input_path_conf)
--- a/src/lang_main/init
+++ b/src/lang_main/init
@ -0,0 +1,51 @@
+import inspect
+import logging
+import shutil
+import sys
+from pathlib import Path
+from time import gmtime
+from typing import Any, Final
+import warnings
+
+from lang_main.io import load_toml_config
+
+__all__ = [
+    'CALLER_PATH',
+]
+
+logging.Formatter.converter = gmtime
+LOG_FMT: Final[str] = '%(asctime)s | %(module)s:%(levelname)s | %(message)s'
+LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
+logging.basicConfig(
+    stream=sys.stdout,
+    format=LOG_FMT,
+    datefmt=LOG_DATE_FMT,
+)
+
+CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
+USE_INTERNAL_CONFIG: Final[bool] = True
+pkg_dir = Path(__file__).parent
+cfg_path_internal = pkg_dir / CONFIG_FILENAME
+caller_file = Path(inspect.stack()[-1].filename)
+CALLER_PATH: Final[Path] = caller_file.parent.resolve()
+
+# load config data: internal/external
+if USE_INTERNAL_CONFIG:
+    loaded_cfg = load_toml_config(path_to_toml=cfg_path_internal)
+else:
+    cfg_path_external = CALLER_PATH / CONFIG_FILENAME
+    if not caller_file.exists():
+        warnings.warn('Caller file could not be correctly retrieved.')
+    if not cfg_path_external.exists():
+        shutil.copy(cfg_path_internal, cfg_path_external)
+        sys.exit(
+            (
+                'No config file was found. A new one with default values was created '
+                'in the execution path. Please fill in the necessary values and '
+                'restart the programm.'
+            )
+        )
+    # raise NotImplementedError("External config data not implemented yet.")
+    loaded_cfg = load_toml_config(path_to_toml=cfg_path_external)
+
+CONFIG: Final[dict[str, Any]] = loaded_cfg.copy()
--- a/src/lang_main/init.py
+++ b/src/lang_main/init.py
@ -1,4 +1,3 @@
-import inspect
 import logging
 import shutil
 import sys
@ -8,10 +7,6 @@ from typing import Any, Final

 from lang_main.io import load_toml_config

-__all__ = [
-    'CALLER_PATH',
-]
-
 logging.Formatter.converter = gmtime
 LOG_FMT: Final[str] = '%(asctime)s | %(module)s:%(levelname)s | %(message)s'
 LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
@ -24,17 +19,15 @@ logging.basicConfig(
 CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
 USE_INTERNAL_CONFIG: Final[bool] = False
 pkg_dir = Path(__file__).parent
-cfg_path_internal = pkg_dir / CONFIG_FILENAME
-caller_file = Path(inspect.stack()[-1].filename)
-CALLER_PATH: Final[Path] = caller_file.parent.resolve()
+cfg_path_internal = (pkg_dir / CONFIG_FILENAME).resolve()
+# caller_file = Path(inspect.stack()[-1].filename)
+# CALLER_PATH: Final[Path] = caller_file.parent.resolve()

 # load config data: internal/external
 if USE_INTERNAL_CONFIG:
    loaded_cfg = load_toml_config(path_to_toml=cfg_path_internal)
 else:
-    cfg_path_external = CALLER_PATH / CONFIG_FILENAME
-    if not caller_file.exists():
-        raise FileNotFoundError('Caller file could not be correctly retrieved.')
+    cfg_path_external = (Path.cwd() / CONFIG_FILENAME).resolve()
    if not cfg_path_external.exists():
        shutil.copy(cfg_path_internal, cfg_path_external)
        sys.exit(
--- a/src/lang_main/analysis/graphs.py
+++ b/src/lang_main/analysis/graphs.py
@ -3,7 +3,7 @@ import sys
 import typing
 from collections.abc import Hashable, Iterable
 from pathlib import Path
-from typing import Any, Final, Literal, Self, overload
+from typing import Any, Final, Literal, Self, cast, overload

 import networkx as nx
 import numpy as np
@ -13,6 +13,12 @@ from pandas import DataFrame

 from lang_main.io import load_pickle, save_pickle
 from lang_main.loggers import logger_graphs as logger
+from lang_main.types import (
+    CytoscapeData,
+    EdgeWeight,
+    NodeTitle,
+    WeightData,
+)

 # TODO change logging behaviour, add logging to file
 LOGGING_DEFAULT: Final[bool] = False
@ -67,7 +73,7 @@ def update_graph(
    batch: Iterable[tuple[Hashable, Hashable]] | None = None,
    parent: Hashable | None = None,
    child: Hashable | None = None,
-    weight_connection: int = 1,
+    weight_connection: int | None = None,
 ) -> None:
    # !! not necessary to check for existence of nodes
    # !! feature already implemented in NetworkX ``add_edge``
@ -78,6 +84,8 @@ def update_graph(
    if child not in graph:
        graph.add_node(child)
    """
+    if weight_connection is None:
+        weight_connection = 1
    # check if edge not in Graph
    if batch is not None:
        graph.add_edges_from(batch, weight=weight_connection)
@ -116,6 +124,51 @@ def convert_graph_to_undirected(
    return graph_undir


+def convert_graph_to_cytoscape(
+    graph: Graph | DiGraph,
+) -> tuple[list[CytoscapeData], WeightData]:
+    cyto_data: list[CytoscapeData] = []
+    # iterate over nodes
+    nodes = cast(Iterable[NodeTitle], graph.nodes)
+    for i, node in enumerate(nodes):
+        node_data: CytoscapeData = {
+            'data': {
+                'id': node,
+                'label': node,
+            }
+        }
+        cyto_data.append(node_data)
+    # iterate over edges
+    weights: set[int] = set()
+
+    edges = cast(
+        Iterable[
+            tuple[
+                NodeTitle,
+                NodeTitle,
+                EdgeWeight,
+            ]
+        ],
+        graph.edges.data('weight', default=1),  # type: ignore
+    )
+    for i, (source, target, weight) in enumerate(edges):
+        weights.add(weight)
+        edge_data: CytoscapeData = {
+            'data': {
+                'source': source,
+                'target': target,
+                'weight': weight,
+            }
+        }
+        cyto_data.append(edge_data)
+
+    min_weight = min(weights)
+    max_weight = max(weights)
+    weight_metadata: WeightData = {'min': min_weight, 'max': max_weight}
+
+    return cyto_data, weight_metadata
+
+
 class TokenGraph(DiGraph):
    def __init__(
        self,
@ -200,7 +253,9 @@ class TokenGraph(DiGraph):

    @overload
    def to_undirected(
-        self, inplace: bool = ..., logging: bool | None = ...
+        self,
+        inplace: bool = ...,
+        logging: bool | None = ...,
    ) -> Graph | None: ...

    def to_undirected(
--- a/src/lang_main/analysis/preprocessing.py
+++ b/src/lang_main/analysis/preprocessing.py
@ -214,20 +214,23 @@ def analyse_feature(
    unique_feature_entries = feature_entries.unique()

    # prepare result DataFrame
-    cols = ['entry', 'len', 'num_occur', 'assoc_obj_ids', 'num_assoc_obj_ids']
+    cols = ['batched_idxs', 'entry', 'len', 'num_occur', 'assoc_obj_ids', 'num_assoc_obj_ids']
    result_df = pd.DataFrame(columns=cols)

    for entry in tqdm(unique_feature_entries, mininterval=1.0):
        len_entry = len(entry)
        filt = data[target_feature] == entry
        temp = data[filt]
+        batched_idxs = temp.index.to_numpy()
        assoc_obj_ids = temp['ObjektID'].unique()
        assoc_obj_ids = np.sort(assoc_obj_ids, kind='stable')
        num_assoc_obj_ids = len(assoc_obj_ids)
        num_dupl = filt.sum()

        conc_df = pd.DataFrame(
-            data=[[entry, len_entry, num_dupl, assoc_obj_ids, num_assoc_obj_ids]],
+            data=[
+                [batched_idxs, entry, len_entry, num_dupl, assoc_obj_ids, num_assoc_obj_ids]
+            ],
            columns=cols,
        )

--- a/src/lang_main/analysis/shared.py
+++ b/src/lang_main/analysis/shared.py
@ -10,7 +10,6 @@ from networkx import Graph
 from pandas import Series
 from sentence_transformers import SentenceTransformer
 from torch import Tensor
-from tqdm.auto import tqdm

 from lang_main.analysis.graphs import get_graph_metadata, update_graph
 from lang_main.types import PandasIndex
@ -40,9 +39,8 @@ def candidates_by_index(

    Yields
    ------
-    Iterator[tuple[ObjectID, tuple[PandasIndex, PandasIndex]]]
-        ObjectID and tuple of index pairs which meet the cosine
-        similarity threshold
+    Iterator[tuple[PandasIndex, PandasIndex]]
+        tuple of index pairs which meet the cosine similarity threshold
    """
    # embeddings
    batch = cast(list[str], data_model_input.to_list())
--- a/src/lang_main/analysis/tokens.py
+++ b/src/lang_main/analysis/tokens.py
@ -1,11 +1,11 @@
 import re
 from collections.abc import Iterator
 from itertools import combinations
-from typing import cast
+from typing import Literal, cast, overload

 from dateutil.parser import parse
 from pandas import DataFrame
-from spacy.lang.de import German as GermanSpacyModel
+from spacy.language import Language as GermanSpacyModel
 from spacy.tokens.doc import Doc as SpacyDoc
 from spacy.tokens.token import Token as SpacyToken
 from tqdm.auto import tqdm
@ -15,6 +15,7 @@ from lang_main.analysis.graphs import (
    update_graph,
 )
 from lang_main.loggers import logger_token_analysis as logger
+from lang_main.types import PandasIndex

 # ** POS
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'ADJ', 'VERB', 'AUX'])
@ -104,7 +105,7 @@ def obtain_relevant_descendants(
 def add_doc_info_to_graph(
    graph: TokenGraph,
    doc: SpacyDoc,
-    weight: int,
+    weight: int | None,
 ) -> None:
    # iterate over sentences
    for sent in doc.sents:
@ -142,9 +143,121 @@ def add_doc_info_to_graph(
                    )


+@overload
 def build_token_graph(
    data: DataFrame,
    model: GermanSpacyModel,
+    *,
+    target_feature: str = ...,
+    weights_feature: str | None = ...,
+    batch_idx_feature: str = ...,
+    build_map: Literal[False],
+    batch_size_model: int = ...,
+) -> tuple[TokenGraph, None]: ...
+
+
+@overload
+def build_token_graph(
+    data: DataFrame,
+    model: GermanSpacyModel,
+    *,
+    target_feature: str = ...,
+    weights_feature: str | None = ...,
+    batch_idx_feature: str = ...,
+    build_map: Literal[True] = ...,
+    batch_size_model: int = ...,
+) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]: ...
+
+
+def build_token_graph(
+    data: DataFrame,
+    model: GermanSpacyModel,
+    *,
+    target_feature: str = 'entry',
+    weights_feature: str | None = None,
+    batch_idx_feature: str = 'batched_idxs',
+    build_map: bool = True,
+    batch_size_model: int = 50,
+) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None]:
+    graph = TokenGraph()
+    model_input = cast(tuple[str], tuple(data[target_feature].to_list()))
+    if weights_feature is not None:
+        weights = cast(tuple[int], tuple(data[weights_feature].to_list()))
+    else:
+        weights = None
+
+    docs_mapping: dict[PandasIndex, SpacyDoc] | None
+    if build_map:
+        indices = cast(tuple[list[PandasIndex]], tuple(data[batch_idx_feature].to_list()))
+        docs_mapping = {}
+    else:
+        indices = None
+        docs_mapping = None
+
+    index: int = 0
+
+    for doc in tqdm(
+        model.pipe(model_input, batch_size=batch_size_model), total=len(model_input)
+    ):
+        if weights is not None:
+            weight = weights[index]
+        else:
+            weight = None
+        add_doc_info_to_graph(
+            graph=graph,
+            doc=doc,
+            weight=weight,
+        )
+        # build map if option chosen
+        if indices is not None and docs_mapping is not None:
+            corresponding_indices = indices[index]
+            for idx in corresponding_indices:
+                docs_mapping[idx] = doc
+
+        index += 1
+
+    # metadata
+    graph.update_metadata()
+    # convert to undirected
+    graph.to_undirected()
+
+    return graph, docs_mapping
+
+
+def build_token_graph_simple(
+    data: DataFrame,
+    model: GermanSpacyModel,
+) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
+    graph = TokenGraph()
+    model_input = cast(tuple[str], tuple(data['entry'].to_list()))
+    weights = cast(tuple[int], tuple(data['num_occur'].to_list()))
+    indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list()))
+    index: int = 0
+    docs_mapping: dict[PandasIndex, SpacyDoc] = {}
+
+    for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)):
+        add_doc_info_to_graph(
+            graph=graph,
+            doc=doc,
+            weight=weights[index],
+        )
+        corresponding_indices = indices[index]
+        for idx in corresponding_indices:
+            docs_mapping[idx] = doc
+
+        index += 1
+
+    # metadata
+    graph.update_metadata()
+    # convert to undirected
+    graph.to_undirected()
+
+    return graph, docs_mapping
+
+
+def build_token_graph_old(
+    data: DataFrame,
+    model: GermanSpacyModel,
 ) -> tuple[TokenGraph]:
    # empty NetworkX directed graph
    # graph = nx.DiGraph()
--- a/src/lang_main/constants.py
+++ b/src/lang_main/constants.py
@ -1,15 +1,28 @@
 from pathlib import Path
 from typing import Final

-from lang_main import CALLER_PATH, CONFIG
+import spacy
+from sentence_transformers import SentenceTransformer
+from spacy.language import Language as GermanSpacyModel
+
+from lang_main import CONFIG
+from lang_main.types import STFRDeviceTypes

 # ** paths
-input_path_conf = Path(CONFIG['paths']['inputs'])
-INPUT_PATH_FOLDER: Final[Path] = (CALLER_PATH / input_path_conf).resolve()
-save_path_conf = Path(CONFIG['paths']['results'])
-SAVE_PATH_FOLDER: Final[Path] = (CALLER_PATH / save_path_conf).resolve()
-path_dataset_conf = Path(CONFIG['paths']['dataset'])
-PATH_TO_DATASET: Final[Path] = (CALLER_PATH / path_dataset_conf).resolve()
+input_path_conf = Path.cwd() / Path(CONFIG['paths']['inputs'])
+INPUT_PATH_FOLDER: Final[Path] = input_path_conf.resolve()
+# INPUT_PATH_FOLDER: Final[Path] = (CALLER_PATH / input_path_conf).resolve()
+# TODO reactivate later
+# if not INPUT_PATH_FOLDER.exists():
+#     raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.')
+save_path_conf = Path.cwd() / Path(CONFIG['paths']['results'])
+SAVE_PATH_FOLDER: Final[Path] = save_path_conf.resolve()
+# SAVE_PATH_FOLDER: Final[Path] = (CALLER_PATH / save_path_conf).resolve()
+path_dataset_conf = Path.cwd() / Path(CONFIG['paths']['dataset'])
+PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve()
+# PATH_TO_DATASET: Final[Path] = (CALLER_PATH / path_dataset_conf).resolve()
+# if not PATH_TO_DATASET.exists():
+#     raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
 # ** control
 DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
 SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
@ -19,8 +32,18 @@ DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing']
 SKIP_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing_skip']
 DO_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis']
 SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
-# ** export

+# ** models
+# ** sentence_transformers
+STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU
+STFR_MODEL: Final[SentenceTransformer] = SentenceTransformer(
+    'sentence-transformers/all-mpnet-base-v2', device=STFR_DEVICE
+)
+
+# ** spacy
+SPCY_MODEL: Final[GermanSpacyModel] = spacy.load('de_dep_news_trf')
+
+# ** export
 # ** preprocessing
 FILENAME_COSSIM_FILTER_CANDIDATES: Final[str] = CONFIG['preprocess'][
    'filename_cossim_filter_candidates'
--- a/src/lang_main/pipelines/predefined.py
+++ b/src/lang_main/pipelines/predefined.py
@ -1,6 +1,3 @@
-import spacy
-from sentence_transformers import SentenceTransformer
-
 from lang_main.analysis.preprocessing import (
    analyse_feature,
    clean_string_slim,
@ -24,6 +21,8 @@ from lang_main.constants import (
    FEATURE_NAME_OBJ_ID,
    MODEL_INPUT_FEATURES,
    SAVE_PATH_FOLDER,
+    SPCY_MODEL,
+    STFR_MODEL,
    THRESHOLD_NUM_ACTIVITIES,
    THRESHOLD_SIMILARITY,
    THRESHOLD_TIMELINE_SIMILARITY,
@ -49,6 +48,7 @@ pipe_target_feat.add(
        'target_feature': 'VorgangsBeschreibung',
        'cleansing_func': clean_string_slim,
    },
+    save_result=True,
 )
 pipe_target_feat.add(
    analyse_feature,
@ -64,8 +64,7 @@ pipe_target_feat.add(
 # ?? still needed?
 # using similarity between entries to catch duplicates with typo or similar content
 # pipe_embds = BasePipeline(name='Embedding1', working_dir=SAVE_PATH_FOLDER)
-model_spacy = spacy.load('de_dep_news_trf')
-model_stfr = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
+

 # pipe_embds.add(build_cosSim_matrix, {'model': model_stfr}, save_result=True)
 # pipe_embds.add(
@ -88,7 +87,7 @@ pipe_merge = BasePipeline(name='Merge_Duplicates', working_dir=SAVE_PATH_FOLDER)
 pipe_merge.add(
    merge_similarity_dupl,
    {
-        'model': model_stfr,
+        'model': STFR_MODEL,
        'cos_sim_threshold': THRESHOLD_SIMILARITY,
    },
    save_result=True,
@ -99,7 +98,12 @@ pipe_token_analysis = BasePipeline(name='Token_Analysis', working_dir=SAVE_PATH_
 pipe_token_analysis.add(
    build_token_graph,
    {
-        'model': model_spacy,
+        'model': SPCY_MODEL,
+        'target_feature': 'entry',
+        'weights_feature': 'num_occur',
+        'batch_idx_feature': 'batched_idxs',
+        'build_map': True,
+        'batch_size_model': 50,
    },
    save_result=True,
 )
@ -135,7 +139,7 @@ pipe_timeline.add(
 pipe_timeline.add(
    get_timeline_candidates,
    {
-        'model': model_stfr,
+        'model': STFR_MODEL,
        'cos_sim_threshold': THRESHOLD_TIMELINE_SIMILARITY,
        'feature_obj_id': FEATURE_NAME_OBJ_ID,
        'model_input_feature': 'nlp_model_input',
--- a/src/lang_main/types.py
+++ b/src/lang_main/types.py
@ -1,11 +1,12 @@
 import enum
-from typing import TypeAlias
+from typing import Required, TypeAlias, TypedDict

 import numpy as np
 from spacy.tokens.doc import Doc as SpacyDoc
 from torch import Tensor


+# ** logging
 class LoggingLevels(enum.IntEnum):
    DEBUG = 10
    INFO = 20
@ -14,8 +15,50 @@ class LoggingLevels(enum.IntEnum):
    CRITICAL = 50


+# ** devices
+class STFRDeviceTypes(enum.StrEnum):
+    CPU = 'cpu'
+    GPU = 'cuda'
+
+
+# ** datatsets
 PandasIndex: TypeAlias = int | np.int64
 ObjectID: TypeAlias = int
 Embedding: TypeAlias = SpacyDoc | Tensor

+# ** graphs
+NodeTitle: TypeAlias = str
+EdgeWeight: TypeAlias = int
+
+
+class NodeData(TypedDict):
+    id: NodeTitle
+    label: NodeTitle
+
+
+class EdgeData(TypedDict):
+    source: NodeTitle
+    target: NodeTitle
+    weight: EdgeWeight
+
+
+class WeightData(TypedDict):
+    min: EdgeWeight
+    max: EdgeWeight
+
+
+class CytoscapePosition(TypedDict):
+    x: int
+    y: int
+
+
+class CytoscapeData(TypedDict, total=False):
+    data: Required[EdgeData | NodeData]
+    position: CytoscapePosition
+    grabbable: bool
+    locked: bool
+    classes: str
+
+
+# ** timeline
 TimelineCandidates: TypeAlias = dict[ObjectID, tuple[tuple[PandasIndex, ...], ...]]
--- a/test-notebooks/Analyse_5-1_Timeline.ipynb
+++ b/test-notebooks/Analyse_5-1_Timeline.ipynb
@ -3087,7 +3087,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/test-notebooks/Preprocess_Pipeline.ipynb
+++ b/test-notebooks/Preprocess_Pipeline.ipynb
@ -1077,7 +1077,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/test-notebooks/Token_Analysis.ipynb
+++ b/test-notebooks/Token_Analysis.ipynb
@ -2267,7 +2267,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/test-notebooks/display_results.ipynb
+++ b/test-notebooks/display_results.ipynb
--- a/test-notebooks/lang_main_config.toml
+++ b/test-notebooks/lang_main_config.toml
@ -0,0 +1,56 @@
+# lang_main: Config file
+
+[paths]
+inputs = './inputs/'
+results = './results/test_new2/'
+dataset = './01_2_Rohdaten_neu/Export4.csv'
+#results = './results/Export7/'
+#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
+#results = './results/Export7_trunc/'
+#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
+
+[control]
+preprocessing = true
+preprocessing_skip = false
+token_analysis = false
+token_analysis_skip = false
+graph_postprocessing = false
+graph_postprocessing_skip = false
+time_analysis = false
+time_analysis_skip = false
+
+#[export_filenames]
+#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
+
+[preprocess]
+filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
+date_cols = [
+    "VorgangsDatum", 
+    "ErledigungsDatum", 
+    "Arbeitsbeginn", 
+    "ErstellungsDatum",
+]
+threshold_amount_characters = 5
+threshold_similarity = 0.8
+
+[graph_postprocessing]
+threshold_edge_weight = 150
+
+[time_analysis.uniqueness]
+threshold_unique_texts = 4
+criterion_feature = 'HObjektText'
+feature_name_obj_id = 'ObjektID'
+
+[time_analysis.model_input]
+input_features = [
+    'VorgangsTypName',
+    'VorgangsArtText',
+    'VorgangsBeschreibung',
+]
+activity_feature = 'VorgangsTypName'
+activity_types = [
+    'Reparaturauftrag (Portal)',
+    'Störungsmeldung',
+]
+threshold_num_acitivities = 1
+threshold_similarity = 0.8
--- a/test-notebooks/timeline_analysis.ipynb
+++ b/test-notebooks/timeline_analysis.ipynb
@ -2327,7 +2327,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/tests/lang_main_config.toml
+++ b/tests/lang_main_config.toml
@ -1,9 +1,9 @@
 # lang_main: Config file

 [paths]
-inputs = '../inputs/'
-results = './results/test_new2/'
-dataset = './01_2_Rohdaten_neu/Export4.csv'
+inputs = '../scripts/inputs/'
+results = '../scripts/results/test_new2/'
+dataset = '../data/02_202307/Export4.csv'
 #results = './results/Export7/'
 #dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
 #results = './results/Export7_trunc/'