prepare usage of cytoscape API

This commit is contained in:
Florian Förster 2024-07-10 16:52:16 +02:00
parent 0acce25243
commit 1b2d5597b0
17 changed files with 1258 additions and 263 deletions

6
.gitignore vendored
View File

@ -10,6 +10,12 @@ __pycache__/
*.py[cod]
*$py.class
# images
*.jog
*.png
*.svg
*.bmp
# C extensions
*.so

View File

@ -1,13 +1,14 @@
import typing
from typing import cast
from pandas import DataFrame, Series
from pandas import DataFrame
from lang_main.analysis.graphs import TokenGraph
from lang_main.analysis.graphs import Graph, TokenGraph, save_to_GraphML
from lang_main.constants import (
PATH_TO_DATASET,
SAVE_PATH_FOLDER,
SKIP_GRAPH_POSTPROCESSING,
SKIP_GRAPH_RESCALING,
SKIP_PREPROCESSING,
SKIP_TIME_ANALYSIS,
SKIP_TOKEN_ANALYSIS,
@ -20,6 +21,7 @@ from lang_main.pipelines.predefined import (
build_timeline_pipe,
build_tk_graph_pipe,
build_tk_graph_post_pipe,
build_tk_graph_rescaling,
)
from lang_main.types import (
EntryPoints,
@ -34,6 +36,7 @@ pipe_target_feat = build_base_target_feature_pipe()
pipe_merge = build_merge_duplicates_pipe()
pipe_token_analysis = build_tk_graph_pipe()
pipe_graph_postprocessing = build_tk_graph_post_pipe()
pipe_graph_rescaling = build_tk_graph_rescaling()
pipe_timeline = build_timeline_pipe()
@ -81,6 +84,24 @@ def run_graph_postprocessing() -> None:
)
def run_graph_edge_rescaling() -> None:
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_ANALYSIS)
loaded_results = cast(
tuple[TokenGraph],
load_pickle(entry_point_path),
)
tk_graph = loaded_results[0]
ret = cast(
tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,))
)
undirected_rescaled_graph = ret[1]
save_to_GraphML(
undirected_rescaled_graph,
saving_path=SAVE_PATH_FOLDER,
filename='TokenGraph-undirected-rescaled',
)
# ** time analysis
def run_time_analysis() -> None:
# load entry point
@ -101,6 +122,7 @@ def build_pipeline_container() -> PipelineContainer:
container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
container.add(run_graph_edge_rescaling, skip=SKIP_GRAPH_RESCALING)
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
return container

View File

@ -13,9 +13,10 @@ dataset = '../data/02_202307/Export4.csv'
# be fully executed
[control]
preprocessing_skip = true
token_analysis_skip = true
graph_postprocessing_skip = true
time_analysis_skip = false
token_analysis_skip = false
graph_postprocessing_skip = false
graph_rescaling_skip = false
time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'

View File

@ -1,12 +1 @@
from lang_main.analysis.preprocessing import clean_string_slim
from lang_main.constants import SAVE_PATH_FOLDER
print(SAVE_PATH_FOLDER)
txt = """
Wir feiern den Jahrestag am 23.11.2023, olé!
tel:::: !!!!???? +++49 123 456 789
Doch leben wir länger.
"""
print(txt)
print(clean_string_slim(txt))
import py4cytoscape

View File

@ -14,6 +14,7 @@ from networkx import DiGraph, Graph
from pandas import DataFrame
from lang_main.constants import EDGE_WEIGHT_DECIMALS
from lang_main.errors import EdgePropertyNotContainedError
from lang_main.io import load_pickle, save_pickle
from lang_main.loggers import logger_graphs as logger
from lang_main.types import (
@ -27,6 +28,18 @@ from lang_main.types import (
LOGGING_DEFAULT: Final[bool] = False
def save_to_GraphML(
graph: DiGraph | Graph,
saving_path: Path,
filename: str | None = None,
) -> None:
if filename is not None:
saving_path = saving_path.joinpath(filename)
saving_path = saving_path.with_suffix('.graphml')
nx.write_graphml(G=graph, path=saving_path)
logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
def get_graph_metadata(
graph: Graph | DiGraph,
logging: bool = LOGGING_DEFAULT,
@ -270,6 +283,24 @@ def filter_graph_by_node_degree(
return filtered_graph
def apply_rescaling_to_graph(
graph: TokenGraph,
) -> tuple[TokenGraph, Graph]:
"""helper function to allow calls in pipelines
Parameters
----------
graph : TokenGraph
token graph pushed through pipeline
Returns
-------
tuple[TokenGraph, Graph]
token graph (directed) and undirected version with rescaled edge weights
"""
return graph.rescale_edge_weights()
def normalise_array_linear(
array: npt.NDArray[np.float_],
) -> npt.NDArray[np.float32]:
@ -323,22 +354,57 @@ def weight_scaling(
return np.round(adjusted_weights, decimals=EDGE_WEIGHT_DECIMALS)
def verify_property(
graph: Graph | DiGraph,
property: str,
) -> None:
for idx, (node_1, node_2) in enumerate(graph.edges):
if property not in graph[node_1][node_2]:
raise EdgePropertyNotContainedError(
(
f'Edge property >>{property}<< not '
f'available for edge >>({node_1}, {node_2})<<'
)
)
@overload
def rescale_edge_weights(
graph: TokenGraph,
) -> TokenGraph:
weight_property: str = ...,
) -> TokenGraph: ...
@overload
def rescale_edge_weights(
graph: DiGraph,
weight_property: str = ...,
) -> DiGraph: ...
@overload
def rescale_edge_weights(
graph: Graph,
weight_property: str = ...,
) -> Graph: ...
def rescale_edge_weights(
graph: Graph | DiGraph | TokenGraph,
weight_property: str = 'weight',
) -> Graph | DiGraph | TokenGraph:
graph = graph.copy()
# check if all edges contain weight property
verify_property(graph, property=weight_property)
weights = cast(list[int], [data['weight'] for data in graph.edges.values()])
w_log = cast(npt.NDArray[np.float32], np.log(weights, dtype=np.float32))
weights_norm = normalise_array_linear(w_log)
weights_adjusted = weight_scaling(weights_norm)
# assign new weight values
for idx, (node_1, node_2) in enumerate(list(graph.edges)):
for idx, (node_1, node_2) in enumerate(graph.edges):
graph[node_1][node_2]['weight'] = weights_adjusted[idx]
graph.rescaled_weights = True
graph.update_metadata(logging=False)
return graph
@ -405,7 +471,10 @@ class TokenGraph(DiGraph):
return self._directed
@property
def undirected(self) -> Graph | None:
def undirected(self) -> Graph:
if self._undirected is None:
self._undirected = self.to_undirected(inplace=False, logging=False)
return self._undirected
@property
@ -464,6 +533,35 @@ class TokenGraph(DiGraph):
graph=self._undirected, logging=logging
)
def rescale_edge_weights(
self,
) -> tuple[TokenGraph, Graph]:
"""generate new instances of the directed and undirected TokenGraph with
rescaled edge weights
Only this method ensures that undirected graphs are scaled properly. If
the underlying `to_undirected` method of the directed and rescaled
TokenGraph instance is called the weights are not rescaled again. Thus,
the maximum edge weight can exceed the theoretical maximum value of 1. To
ensure consistent behaviour across different application of the conversion to
undirected graphs new instances are returned, especially for the undirected
graph.
In contrast, the new directed TokenGraph contains an undirected version without
rescaling of the weights. Therefore, this undirected version differs from the version
returned by this method.
Returns
-------
tuple[TokenGraph, Graph]
directed and undirected instances
"""
token_graph = rescale_edge_weights(self.directed)
token_graph.rescaled_weights = True
token_graph.to_undirected(inplace=True, logging=False)
token_graph.update_metadata(logging=False)
undirected = rescale_edge_weights(self.undirected)
return token_graph, undirected
def _save_prepare(
self,
path: Path,
@ -508,9 +606,10 @@ class TokenGraph(DiGraph):
else:
raise ValueError('No undirected graph available.')
saving_path = saving_path.with_suffix('.graphml')
nx.write_graphml(G=target_graph, path=saving_path)
logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
save_to_GraphML(graph=target_graph, saving_path=saving_path)
# saving_path = saving_path.with_suffix('.graphml')
# nx.write_graphml(G=target_graph, path=saving_path)
# logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
def to_pickle(
self,

View File

@ -24,13 +24,10 @@ PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve()
# if not PATH_TO_DATASET.exists():
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
# ** control
# DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
# DO_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis']
SKIP_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis_skip']
# DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing']
SKIP_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing_skip']
# DO_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis']
SKIP_GRAPH_RESCALING: Final[bool] = CONFIG['control']['graph_rescaling_skip']
SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
# ** models

View File

@ -0,0 +1,123 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<vizmap id="VizMap-2024_07_10-16_50" documentVersion="3.1">
<visualStyle name="template">
<network>
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
<visualProperty default="" name="NETWORK_TITLE"/>
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
</network>
<node>
<dependency value="true" name="nodeCustomGraphicsSizeSync"/>
<dependency value="true" name="nodeSizeLocked"/>
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
<visualProperty default="true" name="NODE_VISIBLE"/>
<visualProperty default="" name="NODE_TOOLTIP"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
<visualProperty default="0.0" name="NODE_DEPTH"/>
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
<visualProperty default="60.0" name="NODE_WIDTH"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
<visualProperty default="18.0" name="NODE_SIZE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
<visualProperty default="" name="NODE_LABEL">
<passthroughMapping attributeName="name" attributeType="string"/>
</visualProperty>
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
<visualProperty default="false" name="NODE_SELECTED"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
<visualProperty default="255" name="NODE_TRANSPARENCY"/>
<visualProperty default="40.0" name="NODE_HEIGHT"/>
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
</node>
<edge>
<dependency value="true" name="arrowColorMatchesEdge"/>
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
<visualProperty default="" name="EDGE_TOOLTIP"/>
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
<visualProperty default="false" name="EDGE_SELECTED"/>
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
<visualProperty default="true" name="EDGE_CURVED"/>
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
<visualProperty default="3.0" name="EDGE_WIDTH"/>
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
<visualProperty default="true" name="EDGE_VISIBLE"/>
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
<visualProperty default="" name="EDGE_LABEL"/>
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
<visualProperty default="0.0" name="EDGE_Z_ORDER"/>
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
</edge>
</visualStyle>
</vizmap>

Binary file not shown.

2
src/lang_main/errors.py Normal file
View File

@ -0,0 +1,2 @@
class EdgePropertyNotContainedError(Exception):
"""Error raised if a needed edge property is not contained in graph edges"""

View File

@ -1,5 +1,5 @@
import pickle
import base64
import pickle
import shutil
import tomllib
from pathlib import Path

View File

@ -2,8 +2,8 @@
[paths]
inputs = './inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
results = './results/test_20240619/'
dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
@ -12,10 +12,11 @@ dataset = './01_2_Rohdaten_neu/Export4.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
preprocessing_skip = false
preprocessing_skip = true
token_analysis_skip = false
graph_postprocessing_skip = false
time_analysis_skip = false
graph_rescaling_skip = false
time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'

View File

@ -0,0 +1,57 @@
# lang_main: Config file
[paths]
inputs = './inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
preprocessing_skip = false
token_analysis_skip = false
graph_postprocessing_skip = false
time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_similarity = 0.8

View File

@ -163,6 +163,17 @@ def build_tk_graph_post_pipe() -> Pipeline:
return pipe_graph_postprocessing
def build_tk_graph_rescaling() -> Pipeline:
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
pipe_graph_rescaling.add(
graphs.apply_rescaling_to_graph,
save_result=True,
filename=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
)
return pipe_graph_rescaling
# ** timeline analysis
def build_timeline_pipe() -> Pipeline:
pipe_timeline = Pipeline(name='Timeline_Analysis', working_dir=SAVE_PATH_FOLDER)

View File

@ -30,6 +30,7 @@ class EntryPoints(enum.StrEnum):
TIMELINE_POST = 'TIMELINE_POSTPROCESSING'
TK_GRAPH_POST = 'TK-GRAPH_POSTPROCESSING'
TK_GRAPH_ANALYSIS = 'TK-GRAPH_ANALYSIS'
TK_GRAPH_ANALYSIS_RESCALED = 'TK-GRAPH_ANALYSIS_RESCALED'
TOKEN_ANALYSIS = 'TOKEN_ANALYSIS'

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 34 KiB

View File

@ -2,22 +2,21 @@
[paths]
inputs = './inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
results = './results/test_20240619/'
dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
preprocessing = true
preprocessing_skip = false
token_analysis = false
preprocessing_skip = true
token_analysis_skip = false
graph_postprocessing = false
graph_postprocessing_skip = false
time_analysis = false
time_analysis_skip = false
graph_rescaling_skip = false
time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
@ -42,9 +41,12 @@ criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [
'VorgangsTypName',
'VorgangsArtText',
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'

File diff suppressed because one or more lines are too long