prepare usage of cytoscape API
This commit is contained in:
parent
0acce25243
commit
1b2d5597b0
6
.gitignore
vendored
6
.gitignore
vendored
@ -10,6 +10,12 @@ __pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# images
|
||||
*.jog
|
||||
*.png
|
||||
*.svg
|
||||
*.bmp
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
import typing
|
||||
from typing import cast
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.analysis.graphs import TokenGraph
|
||||
from lang_main.analysis.graphs import Graph, TokenGraph, save_to_GraphML
|
||||
from lang_main.constants import (
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
SKIP_GRAPH_POSTPROCESSING,
|
||||
SKIP_GRAPH_RESCALING,
|
||||
SKIP_PREPROCESSING,
|
||||
SKIP_TIME_ANALYSIS,
|
||||
SKIP_TOKEN_ANALYSIS,
|
||||
@ -20,6 +21,7 @@ from lang_main.pipelines.predefined import (
|
||||
build_timeline_pipe,
|
||||
build_tk_graph_pipe,
|
||||
build_tk_graph_post_pipe,
|
||||
build_tk_graph_rescaling,
|
||||
)
|
||||
from lang_main.types import (
|
||||
EntryPoints,
|
||||
@ -34,6 +36,7 @@ pipe_target_feat = build_base_target_feature_pipe()
|
||||
pipe_merge = build_merge_duplicates_pipe()
|
||||
pipe_token_analysis = build_tk_graph_pipe()
|
||||
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
||||
pipe_graph_rescaling = build_tk_graph_rescaling()
|
||||
pipe_timeline = build_timeline_pipe()
|
||||
|
||||
|
||||
@ -81,6 +84,24 @@ def run_graph_postprocessing() -> None:
|
||||
)
|
||||
|
||||
|
||||
def run_graph_edge_rescaling() -> None:
|
||||
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_ANALYSIS)
|
||||
loaded_results = cast(
|
||||
tuple[TokenGraph],
|
||||
load_pickle(entry_point_path),
|
||||
)
|
||||
tk_graph = loaded_results[0]
|
||||
ret = cast(
|
||||
tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,))
|
||||
)
|
||||
undirected_rescaled_graph = ret[1]
|
||||
save_to_GraphML(
|
||||
undirected_rescaled_graph,
|
||||
saving_path=SAVE_PATH_FOLDER,
|
||||
filename='TokenGraph-undirected-rescaled',
|
||||
)
|
||||
|
||||
|
||||
# ** time analysis
|
||||
def run_time_analysis() -> None:
|
||||
# load entry point
|
||||
@ -101,6 +122,7 @@ def build_pipeline_container() -> PipelineContainer:
|
||||
container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
|
||||
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
|
||||
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
|
||||
container.add(run_graph_edge_rescaling, skip=SKIP_GRAPH_RESCALING)
|
||||
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
|
||||
|
||||
return container
|
||||
|
||||
@ -13,9 +13,10 @@ dataset = '../data/02_202307/Export4.csv'
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing_skip = true
|
||||
time_analysis_skip = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
time_analysis_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
@ -1,12 +1 @@
|
||||
from lang_main.analysis.preprocessing import clean_string_slim
|
||||
from lang_main.constants import SAVE_PATH_FOLDER
|
||||
|
||||
print(SAVE_PATH_FOLDER)
|
||||
txt = """
|
||||
Wir feiern den Jahrestag am 23.11.2023, olé!
|
||||
tel:::: !!!!???? +++49 123 456 789
|
||||
|
||||
Doch leben wir länger.
|
||||
"""
|
||||
print(txt)
|
||||
print(clean_string_slim(txt))
|
||||
import py4cytoscape
|
||||
|
||||
@ -14,6 +14,7 @@ from networkx import DiGraph, Graph
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.constants import EDGE_WEIGHT_DECIMALS
|
||||
from lang_main.errors import EdgePropertyNotContainedError
|
||||
from lang_main.io import load_pickle, save_pickle
|
||||
from lang_main.loggers import logger_graphs as logger
|
||||
from lang_main.types import (
|
||||
@ -27,6 +28,18 @@ from lang_main.types import (
|
||||
LOGGING_DEFAULT: Final[bool] = False
|
||||
|
||||
|
||||
def save_to_GraphML(
|
||||
graph: DiGraph | Graph,
|
||||
saving_path: Path,
|
||||
filename: str | None = None,
|
||||
) -> None:
|
||||
if filename is not None:
|
||||
saving_path = saving_path.joinpath(filename)
|
||||
saving_path = saving_path.with_suffix('.graphml')
|
||||
nx.write_graphml(G=graph, path=saving_path)
|
||||
logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
|
||||
|
||||
|
||||
def get_graph_metadata(
|
||||
graph: Graph | DiGraph,
|
||||
logging: bool = LOGGING_DEFAULT,
|
||||
@ -270,6 +283,24 @@ def filter_graph_by_node_degree(
|
||||
return filtered_graph
|
||||
|
||||
|
||||
def apply_rescaling_to_graph(
|
||||
graph: TokenGraph,
|
||||
) -> tuple[TokenGraph, Graph]:
|
||||
"""helper function to allow calls in pipelines
|
||||
|
||||
Parameters
|
||||
----------
|
||||
graph : TokenGraph
|
||||
token graph pushed through pipeline
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[TokenGraph, Graph]
|
||||
token graph (directed) and undirected version with rescaled edge weights
|
||||
"""
|
||||
return graph.rescale_edge_weights()
|
||||
|
||||
|
||||
def normalise_array_linear(
|
||||
array: npt.NDArray[np.float_],
|
||||
) -> npt.NDArray[np.float32]:
|
||||
@ -323,22 +354,57 @@ def weight_scaling(
|
||||
return np.round(adjusted_weights, decimals=EDGE_WEIGHT_DECIMALS)
|
||||
|
||||
|
||||
def verify_property(
|
||||
graph: Graph | DiGraph,
|
||||
property: str,
|
||||
) -> None:
|
||||
for idx, (node_1, node_2) in enumerate(graph.edges):
|
||||
if property not in graph[node_1][node_2]:
|
||||
raise EdgePropertyNotContainedError(
|
||||
(
|
||||
f'Edge property >>{property}<< not '
|
||||
f'available for edge >>({node_1}, {node_2})<<'
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@overload
|
||||
def rescale_edge_weights(
|
||||
graph: TokenGraph,
|
||||
) -> TokenGraph:
|
||||
weight_property: str = ...,
|
||||
) -> TokenGraph: ...
|
||||
|
||||
|
||||
@overload
|
||||
def rescale_edge_weights(
|
||||
graph: DiGraph,
|
||||
weight_property: str = ...,
|
||||
) -> DiGraph: ...
|
||||
|
||||
|
||||
@overload
|
||||
def rescale_edge_weights(
|
||||
graph: Graph,
|
||||
weight_property: str = ...,
|
||||
) -> Graph: ...
|
||||
|
||||
|
||||
def rescale_edge_weights(
|
||||
graph: Graph | DiGraph | TokenGraph,
|
||||
weight_property: str = 'weight',
|
||||
) -> Graph | DiGraph | TokenGraph:
|
||||
graph = graph.copy()
|
||||
# check if all edges contain weight property
|
||||
verify_property(graph, property=weight_property)
|
||||
|
||||
weights = cast(list[int], [data['weight'] for data in graph.edges.values()])
|
||||
w_log = cast(npt.NDArray[np.float32], np.log(weights, dtype=np.float32))
|
||||
weights_norm = normalise_array_linear(w_log)
|
||||
weights_adjusted = weight_scaling(weights_norm)
|
||||
# assign new weight values
|
||||
for idx, (node_1, node_2) in enumerate(list(graph.edges)):
|
||||
for idx, (node_1, node_2) in enumerate(graph.edges):
|
||||
graph[node_1][node_2]['weight'] = weights_adjusted[idx]
|
||||
|
||||
graph.rescaled_weights = True
|
||||
graph.update_metadata(logging=False)
|
||||
|
||||
return graph
|
||||
|
||||
|
||||
@ -405,7 +471,10 @@ class TokenGraph(DiGraph):
|
||||
return self._directed
|
||||
|
||||
@property
|
||||
def undirected(self) -> Graph | None:
|
||||
def undirected(self) -> Graph:
|
||||
if self._undirected is None:
|
||||
self._undirected = self.to_undirected(inplace=False, logging=False)
|
||||
|
||||
return self._undirected
|
||||
|
||||
@property
|
||||
@ -464,6 +533,35 @@ class TokenGraph(DiGraph):
|
||||
graph=self._undirected, logging=logging
|
||||
)
|
||||
|
||||
def rescale_edge_weights(
|
||||
self,
|
||||
) -> tuple[TokenGraph, Graph]:
|
||||
"""generate new instances of the directed and undirected TokenGraph with
|
||||
rescaled edge weights
|
||||
Only this method ensures that undirected graphs are scaled properly. If
|
||||
the underlying `to_undirected` method of the directed and rescaled
|
||||
TokenGraph instance is called the weights are not rescaled again. Thus,
|
||||
the maximum edge weight can exceed the theoretical maximum value of 1. To
|
||||
ensure consistent behaviour across different application of the conversion to
|
||||
undirected graphs new instances are returned, especially for the undirected
|
||||
graph.
|
||||
In contrast, the new directed TokenGraph contains an undirected version without
|
||||
rescaling of the weights. Therefore, this undirected version differs from the version
|
||||
returned by this method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[TokenGraph, Graph]
|
||||
directed and undirected instances
|
||||
"""
|
||||
token_graph = rescale_edge_weights(self.directed)
|
||||
token_graph.rescaled_weights = True
|
||||
token_graph.to_undirected(inplace=True, logging=False)
|
||||
token_graph.update_metadata(logging=False)
|
||||
undirected = rescale_edge_weights(self.undirected)
|
||||
|
||||
return token_graph, undirected
|
||||
|
||||
def _save_prepare(
|
||||
self,
|
||||
path: Path,
|
||||
@ -508,9 +606,10 @@ class TokenGraph(DiGraph):
|
||||
else:
|
||||
raise ValueError('No undirected graph available.')
|
||||
|
||||
saving_path = saving_path.with_suffix('.graphml')
|
||||
nx.write_graphml(G=target_graph, path=saving_path)
|
||||
logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
|
||||
save_to_GraphML(graph=target_graph, saving_path=saving_path)
|
||||
# saving_path = saving_path.with_suffix('.graphml')
|
||||
# nx.write_graphml(G=target_graph, path=saving_path)
|
||||
# logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
|
||||
|
||||
def to_pickle(
|
||||
self,
|
||||
|
||||
@ -24,13 +24,10 @@ PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve()
|
||||
# if not PATH_TO_DATASET.exists():
|
||||
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
|
||||
# ** control
|
||||
# DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
|
||||
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
|
||||
# DO_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis']
|
||||
SKIP_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis_skip']
|
||||
# DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing']
|
||||
SKIP_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing_skip']
|
||||
# DO_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis']
|
||||
SKIP_GRAPH_RESCALING: Final[bool] = CONFIG['control']['graph_rescaling_skip']
|
||||
SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
|
||||
|
||||
# ** models
|
||||
|
||||
123
src/lang_main/cytoscape_config/styles_template.xml
Normal file
123
src/lang_main/cytoscape_config/styles_template.xml
Normal file
@ -0,0 +1,123 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<vizmap id="VizMap-2024_07_10-16_50" documentVersion="3.1">
|
||||
<visualStyle name="template">
|
||||
<network>
|
||||
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
|
||||
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
|
||||
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
|
||||
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
|
||||
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
|
||||
<visualProperty default="" name="NETWORK_TITLE"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
|
||||
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
|
||||
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
|
||||
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
|
||||
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
|
||||
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
|
||||
</network>
|
||||
<node>
|
||||
<dependency value="true" name="nodeCustomGraphicsSizeSync"/>
|
||||
<dependency value="true" name="nodeSizeLocked"/>
|
||||
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
|
||||
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
|
||||
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
|
||||
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
|
||||
<visualProperty default="true" name="NODE_VISIBLE"/>
|
||||
<visualProperty default="" name="NODE_TOOLTIP"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
|
||||
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
|
||||
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
|
||||
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_DEPTH"/>
|
||||
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
|
||||
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
|
||||
<visualProperty default="60.0" name="NODE_WIDTH"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
|
||||
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
|
||||
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
|
||||
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
|
||||
<visualProperty default="18.0" name="NODE_SIZE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
|
||||
<visualProperty default="" name="NODE_LABEL">
|
||||
<passthroughMapping attributeName="name" attributeType="string"/>
|
||||
</visualProperty>
|
||||
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
|
||||
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
|
||||
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
|
||||
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
|
||||
<visualProperty default="false" name="NODE_SELECTED"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
|
||||
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
|
||||
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
|
||||
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="255" name="NODE_TRANSPARENCY"/>
|
||||
<visualProperty default="40.0" name="NODE_HEIGHT"/>
|
||||
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
|
||||
</node>
|
||||
<edge>
|
||||
<dependency value="true" name="arrowColorMatchesEdge"/>
|
||||
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="" name="EDGE_TOOLTIP"/>
|
||||
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
|
||||
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
|
||||
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
|
||||
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
|
||||
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
|
||||
<visualProperty default="false" name="EDGE_SELECTED"/>
|
||||
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
|
||||
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="true" name="EDGE_CURVED"/>
|
||||
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
|
||||
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
|
||||
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
|
||||
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
|
||||
<visualProperty default="3.0" name="EDGE_WIDTH"/>
|
||||
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
|
||||
<visualProperty default="true" name="EDGE_VISIBLE"/>
|
||||
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="" name="EDGE_LABEL"/>
|
||||
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
|
||||
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
|
||||
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
|
||||
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
|
||||
<visualProperty default="0.0" name="EDGE_Z_ORDER"/>
|
||||
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
|
||||
</edge>
|
||||
</visualStyle>
|
||||
</vizmap>
|
||||
BIN
src/lang_main/cytoscape_config/template_test.cys
Normal file
BIN
src/lang_main/cytoscape_config/template_test.cys
Normal file
Binary file not shown.
2
src/lang_main/errors.py
Normal file
2
src/lang_main/errors.py
Normal file
@ -0,0 +1,2 @@
|
||||
class EdgePropertyNotContainedError(Exception):
|
||||
"""Error raised if a needed edge property is not contained in graph edges"""
|
||||
@ -1,5 +1,5 @@
|
||||
import pickle
|
||||
import base64
|
||||
import pickle
|
||||
import shutil
|
||||
import tomllib
|
||||
from pathlib import Path
|
||||
|
||||
@ -2,8 +2,8 @@
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
results = './results/test_20240619/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
@ -12,10 +12,11 @@ dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = false
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis_skip = false
|
||||
graph_rescaling_skip = false
|
||||
time_analysis_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
57
src/lang_main/lang_main_config_old.toml
Normal file
57
src/lang_main/lang_main_config_old.toml
Normal file
@ -0,0 +1,57 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
# input_features = [
|
||||
# 'VorgangsTypName',
|
||||
# 'VorgangsArtText',
|
||||
# 'VorgangsBeschreibung',
|
||||
# ]
|
||||
input_features = [
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
@ -163,6 +163,17 @@ def build_tk_graph_post_pipe() -> Pipeline:
|
||||
return pipe_graph_postprocessing
|
||||
|
||||
|
||||
def build_tk_graph_rescaling() -> Pipeline:
|
||||
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
|
||||
pipe_graph_rescaling.add(
|
||||
graphs.apply_rescaling_to_graph,
|
||||
save_result=True,
|
||||
filename=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
|
||||
)
|
||||
|
||||
return pipe_graph_rescaling
|
||||
|
||||
|
||||
# ** timeline analysis
|
||||
def build_timeline_pipe() -> Pipeline:
|
||||
pipe_timeline = Pipeline(name='Timeline_Analysis', working_dir=SAVE_PATH_FOLDER)
|
||||
|
||||
@ -30,6 +30,7 @@ class EntryPoints(enum.StrEnum):
|
||||
TIMELINE_POST = 'TIMELINE_POSTPROCESSING'
|
||||
TK_GRAPH_POST = 'TK-GRAPH_POSTPROCESSING'
|
||||
TK_GRAPH_ANALYSIS = 'TK-GRAPH_ANALYSIS'
|
||||
TK_GRAPH_ANALYSIS_RESCALED = 'TK-GRAPH_ANALYSIS_RESCALED'
|
||||
TOKEN_ANALYSIS = 'TOKEN_ANALYSIS'
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 34 KiB |
@ -2,22 +2,21 @@
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
results = './results/test_20240619/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing = true
|
||||
preprocessing_skip = false
|
||||
token_analysis = false
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis = false
|
||||
time_analysis_skip = false
|
||||
graph_rescaling_skip = false
|
||||
time_analysis_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
@ -42,9 +41,12 @@ criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
# input_features = [
|
||||
# 'VorgangsTypName',
|
||||
# 'VorgangsArtText',
|
||||
# 'VorgangsBeschreibung',
|
||||
# ]
|
||||
input_features = [
|
||||
'VorgangsTypName',
|
||||
'VorgangsArtText',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
|
||||
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user