prepare usage of cytoscape API
This commit is contained in:
parent
0acce25243
commit
1b2d5597b0
6
.gitignore
vendored
6
.gitignore
vendored
@ -10,6 +10,12 @@ __pycache__/
|
|||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
|
# images
|
||||||
|
*.jog
|
||||||
|
*.png
|
||||||
|
*.svg
|
||||||
|
*.bmp
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
*.so
|
*.so
|
||||||
|
|
||||||
|
|||||||
@ -1,13 +1,14 @@
|
|||||||
import typing
|
import typing
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
from pandas import DataFrame, Series
|
from pandas import DataFrame
|
||||||
|
|
||||||
from lang_main.analysis.graphs import TokenGraph
|
from lang_main.analysis.graphs import Graph, TokenGraph, save_to_GraphML
|
||||||
from lang_main.constants import (
|
from lang_main.constants import (
|
||||||
PATH_TO_DATASET,
|
PATH_TO_DATASET,
|
||||||
SAVE_PATH_FOLDER,
|
SAVE_PATH_FOLDER,
|
||||||
SKIP_GRAPH_POSTPROCESSING,
|
SKIP_GRAPH_POSTPROCESSING,
|
||||||
|
SKIP_GRAPH_RESCALING,
|
||||||
SKIP_PREPROCESSING,
|
SKIP_PREPROCESSING,
|
||||||
SKIP_TIME_ANALYSIS,
|
SKIP_TIME_ANALYSIS,
|
||||||
SKIP_TOKEN_ANALYSIS,
|
SKIP_TOKEN_ANALYSIS,
|
||||||
@ -20,6 +21,7 @@ from lang_main.pipelines.predefined import (
|
|||||||
build_timeline_pipe,
|
build_timeline_pipe,
|
||||||
build_tk_graph_pipe,
|
build_tk_graph_pipe,
|
||||||
build_tk_graph_post_pipe,
|
build_tk_graph_post_pipe,
|
||||||
|
build_tk_graph_rescaling,
|
||||||
)
|
)
|
||||||
from lang_main.types import (
|
from lang_main.types import (
|
||||||
EntryPoints,
|
EntryPoints,
|
||||||
@ -34,6 +36,7 @@ pipe_target_feat = build_base_target_feature_pipe()
|
|||||||
pipe_merge = build_merge_duplicates_pipe()
|
pipe_merge = build_merge_duplicates_pipe()
|
||||||
pipe_token_analysis = build_tk_graph_pipe()
|
pipe_token_analysis = build_tk_graph_pipe()
|
||||||
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
||||||
|
pipe_graph_rescaling = build_tk_graph_rescaling()
|
||||||
pipe_timeline = build_timeline_pipe()
|
pipe_timeline = build_timeline_pipe()
|
||||||
|
|
||||||
|
|
||||||
@ -81,6 +84,24 @@ def run_graph_postprocessing() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_graph_edge_rescaling() -> None:
|
||||||
|
entry_point_path = get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_ANALYSIS)
|
||||||
|
loaded_results = cast(
|
||||||
|
tuple[TokenGraph],
|
||||||
|
load_pickle(entry_point_path),
|
||||||
|
)
|
||||||
|
tk_graph = loaded_results[0]
|
||||||
|
ret = cast(
|
||||||
|
tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,))
|
||||||
|
)
|
||||||
|
undirected_rescaled_graph = ret[1]
|
||||||
|
save_to_GraphML(
|
||||||
|
undirected_rescaled_graph,
|
||||||
|
saving_path=SAVE_PATH_FOLDER,
|
||||||
|
filename='TokenGraph-undirected-rescaled',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ** time analysis
|
# ** time analysis
|
||||||
def run_time_analysis() -> None:
|
def run_time_analysis() -> None:
|
||||||
# load entry point
|
# load entry point
|
||||||
@ -101,6 +122,7 @@ def build_pipeline_container() -> PipelineContainer:
|
|||||||
container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
|
container.add(run_preprocessing, skip=SKIP_PREPROCESSING)
|
||||||
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
|
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
|
||||||
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
|
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
|
||||||
|
container.add(run_graph_edge_rescaling, skip=SKIP_GRAPH_RESCALING)
|
||||||
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
|
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
|
||||||
|
|
||||||
return container
|
return container
|
||||||
|
|||||||
@ -13,9 +13,10 @@ dataset = '../data/02_202307/Export4.csv'
|
|||||||
# be fully executed
|
# be fully executed
|
||||||
[control]
|
[control]
|
||||||
preprocessing_skip = true
|
preprocessing_skip = true
|
||||||
token_analysis_skip = true
|
token_analysis_skip = false
|
||||||
graph_postprocessing_skip = true
|
graph_postprocessing_skip = false
|
||||||
time_analysis_skip = false
|
graph_rescaling_skip = false
|
||||||
|
time_analysis_skip = true
|
||||||
|
|
||||||
#[export_filenames]
|
#[export_filenames]
|
||||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
|
|||||||
@ -1,12 +1 @@
|
|||||||
from lang_main.analysis.preprocessing import clean_string_slim
|
import py4cytoscape
|
||||||
from lang_main.constants import SAVE_PATH_FOLDER
|
|
||||||
|
|
||||||
print(SAVE_PATH_FOLDER)
|
|
||||||
txt = """
|
|
||||||
Wir feiern den Jahrestag am 23.11.2023, olé!
|
|
||||||
tel:::: !!!!???? +++49 123 456 789
|
|
||||||
|
|
||||||
Doch leben wir länger.
|
|
||||||
"""
|
|
||||||
print(txt)
|
|
||||||
print(clean_string_slim(txt))
|
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from networkx import DiGraph, Graph
|
|||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
from lang_main.constants import EDGE_WEIGHT_DECIMALS
|
from lang_main.constants import EDGE_WEIGHT_DECIMALS
|
||||||
|
from lang_main.errors import EdgePropertyNotContainedError
|
||||||
from lang_main.io import load_pickle, save_pickle
|
from lang_main.io import load_pickle, save_pickle
|
||||||
from lang_main.loggers import logger_graphs as logger
|
from lang_main.loggers import logger_graphs as logger
|
||||||
from lang_main.types import (
|
from lang_main.types import (
|
||||||
@ -27,6 +28,18 @@ from lang_main.types import (
|
|||||||
LOGGING_DEFAULT: Final[bool] = False
|
LOGGING_DEFAULT: Final[bool] = False
|
||||||
|
|
||||||
|
|
||||||
|
def save_to_GraphML(
|
||||||
|
graph: DiGraph | Graph,
|
||||||
|
saving_path: Path,
|
||||||
|
filename: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
if filename is not None:
|
||||||
|
saving_path = saving_path.joinpath(filename)
|
||||||
|
saving_path = saving_path.with_suffix('.graphml')
|
||||||
|
nx.write_graphml(G=graph, path=saving_path)
|
||||||
|
logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
|
||||||
|
|
||||||
|
|
||||||
def get_graph_metadata(
|
def get_graph_metadata(
|
||||||
graph: Graph | DiGraph,
|
graph: Graph | DiGraph,
|
||||||
logging: bool = LOGGING_DEFAULT,
|
logging: bool = LOGGING_DEFAULT,
|
||||||
@ -270,6 +283,24 @@ def filter_graph_by_node_degree(
|
|||||||
return filtered_graph
|
return filtered_graph
|
||||||
|
|
||||||
|
|
||||||
|
def apply_rescaling_to_graph(
|
||||||
|
graph: TokenGraph,
|
||||||
|
) -> tuple[TokenGraph, Graph]:
|
||||||
|
"""helper function to allow calls in pipelines
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
graph : TokenGraph
|
||||||
|
token graph pushed through pipeline
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
tuple[TokenGraph, Graph]
|
||||||
|
token graph (directed) and undirected version with rescaled edge weights
|
||||||
|
"""
|
||||||
|
return graph.rescale_edge_weights()
|
||||||
|
|
||||||
|
|
||||||
def normalise_array_linear(
|
def normalise_array_linear(
|
||||||
array: npt.NDArray[np.float_],
|
array: npt.NDArray[np.float_],
|
||||||
) -> npt.NDArray[np.float32]:
|
) -> npt.NDArray[np.float32]:
|
||||||
@ -323,22 +354,57 @@ def weight_scaling(
|
|||||||
return np.round(adjusted_weights, decimals=EDGE_WEIGHT_DECIMALS)
|
return np.round(adjusted_weights, decimals=EDGE_WEIGHT_DECIMALS)
|
||||||
|
|
||||||
|
|
||||||
|
def verify_property(
|
||||||
|
graph: Graph | DiGraph,
|
||||||
|
property: str,
|
||||||
|
) -> None:
|
||||||
|
for idx, (node_1, node_2) in enumerate(graph.edges):
|
||||||
|
if property not in graph[node_1][node_2]:
|
||||||
|
raise EdgePropertyNotContainedError(
|
||||||
|
(
|
||||||
|
f'Edge property >>{property}<< not '
|
||||||
|
f'available for edge >>({node_1}, {node_2})<<'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
def rescale_edge_weights(
|
def rescale_edge_weights(
|
||||||
graph: TokenGraph,
|
graph: TokenGraph,
|
||||||
) -> TokenGraph:
|
weight_property: str = ...,
|
||||||
|
) -> TokenGraph: ...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def rescale_edge_weights(
|
||||||
|
graph: DiGraph,
|
||||||
|
weight_property: str = ...,
|
||||||
|
) -> DiGraph: ...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def rescale_edge_weights(
|
||||||
|
graph: Graph,
|
||||||
|
weight_property: str = ...,
|
||||||
|
) -> Graph: ...
|
||||||
|
|
||||||
|
|
||||||
|
def rescale_edge_weights(
|
||||||
|
graph: Graph | DiGraph | TokenGraph,
|
||||||
|
weight_property: str = 'weight',
|
||||||
|
) -> Graph | DiGraph | TokenGraph:
|
||||||
graph = graph.copy()
|
graph = graph.copy()
|
||||||
|
# check if all edges contain weight property
|
||||||
|
verify_property(graph, property=weight_property)
|
||||||
|
|
||||||
weights = cast(list[int], [data['weight'] for data in graph.edges.values()])
|
weights = cast(list[int], [data['weight'] for data in graph.edges.values()])
|
||||||
w_log = cast(npt.NDArray[np.float32], np.log(weights, dtype=np.float32))
|
w_log = cast(npt.NDArray[np.float32], np.log(weights, dtype=np.float32))
|
||||||
weights_norm = normalise_array_linear(w_log)
|
weights_norm = normalise_array_linear(w_log)
|
||||||
weights_adjusted = weight_scaling(weights_norm)
|
weights_adjusted = weight_scaling(weights_norm)
|
||||||
# assign new weight values
|
# assign new weight values
|
||||||
for idx, (node_1, node_2) in enumerate(list(graph.edges)):
|
for idx, (node_1, node_2) in enumerate(graph.edges):
|
||||||
graph[node_1][node_2]['weight'] = weights_adjusted[idx]
|
graph[node_1][node_2]['weight'] = weights_adjusted[idx]
|
||||||
|
|
||||||
graph.rescaled_weights = True
|
|
||||||
graph.update_metadata(logging=False)
|
|
||||||
|
|
||||||
return graph
|
return graph
|
||||||
|
|
||||||
|
|
||||||
@ -405,7 +471,10 @@ class TokenGraph(DiGraph):
|
|||||||
return self._directed
|
return self._directed
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def undirected(self) -> Graph | None:
|
def undirected(self) -> Graph:
|
||||||
|
if self._undirected is None:
|
||||||
|
self._undirected = self.to_undirected(inplace=False, logging=False)
|
||||||
|
|
||||||
return self._undirected
|
return self._undirected
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -464,6 +533,35 @@ class TokenGraph(DiGraph):
|
|||||||
graph=self._undirected, logging=logging
|
graph=self._undirected, logging=logging
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def rescale_edge_weights(
|
||||||
|
self,
|
||||||
|
) -> tuple[TokenGraph, Graph]:
|
||||||
|
"""generate new instances of the directed and undirected TokenGraph with
|
||||||
|
rescaled edge weights
|
||||||
|
Only this method ensures that undirected graphs are scaled properly. If
|
||||||
|
the underlying `to_undirected` method of the directed and rescaled
|
||||||
|
TokenGraph instance is called the weights are not rescaled again. Thus,
|
||||||
|
the maximum edge weight can exceed the theoretical maximum value of 1. To
|
||||||
|
ensure consistent behaviour across different application of the conversion to
|
||||||
|
undirected graphs new instances are returned, especially for the undirected
|
||||||
|
graph.
|
||||||
|
In contrast, the new directed TokenGraph contains an undirected version without
|
||||||
|
rescaling of the weights. Therefore, this undirected version differs from the version
|
||||||
|
returned by this method.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
tuple[TokenGraph, Graph]
|
||||||
|
directed and undirected instances
|
||||||
|
"""
|
||||||
|
token_graph = rescale_edge_weights(self.directed)
|
||||||
|
token_graph.rescaled_weights = True
|
||||||
|
token_graph.to_undirected(inplace=True, logging=False)
|
||||||
|
token_graph.update_metadata(logging=False)
|
||||||
|
undirected = rescale_edge_weights(self.undirected)
|
||||||
|
|
||||||
|
return token_graph, undirected
|
||||||
|
|
||||||
def _save_prepare(
|
def _save_prepare(
|
||||||
self,
|
self,
|
||||||
path: Path,
|
path: Path,
|
||||||
@ -508,9 +606,10 @@ class TokenGraph(DiGraph):
|
|||||||
else:
|
else:
|
||||||
raise ValueError('No undirected graph available.')
|
raise ValueError('No undirected graph available.')
|
||||||
|
|
||||||
saving_path = saving_path.with_suffix('.graphml')
|
save_to_GraphML(graph=target_graph, saving_path=saving_path)
|
||||||
nx.write_graphml(G=target_graph, path=saving_path)
|
# saving_path = saving_path.with_suffix('.graphml')
|
||||||
logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
|
# nx.write_graphml(G=target_graph, path=saving_path)
|
||||||
|
# logger.info('Successfully saved graph as GraphML file under %s.', saving_path)
|
||||||
|
|
||||||
def to_pickle(
|
def to_pickle(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -24,13 +24,10 @@ PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve()
|
|||||||
# if not PATH_TO_DATASET.exists():
|
# if not PATH_TO_DATASET.exists():
|
||||||
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
|
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
|
||||||
# ** control
|
# ** control
|
||||||
# DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
|
|
||||||
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
|
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
|
||||||
# DO_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis']
|
|
||||||
SKIP_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis_skip']
|
SKIP_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis_skip']
|
||||||
# DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing']
|
|
||||||
SKIP_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing_skip']
|
SKIP_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing_skip']
|
||||||
# DO_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis']
|
SKIP_GRAPH_RESCALING: Final[bool] = CONFIG['control']['graph_rescaling_skip']
|
||||||
SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
|
SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
|
||||||
|
|
||||||
# ** models
|
# ** models
|
||||||
|
|||||||
123
src/lang_main/cytoscape_config/styles_template.xml
Normal file
123
src/lang_main/cytoscape_config/styles_template.xml
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<vizmap id="VizMap-2024_07_10-16_50" documentVersion="3.1">
|
||||||
|
<visualStyle name="template">
|
||||||
|
<network>
|
||||||
|
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
|
||||||
|
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
|
||||||
|
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
|
||||||
|
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
|
||||||
|
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
|
||||||
|
<visualProperty default="" name="NETWORK_TITLE"/>
|
||||||
|
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
|
||||||
|
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
|
||||||
|
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
|
||||||
|
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
|
||||||
|
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
|
||||||
|
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
|
||||||
|
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
|
||||||
|
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
|
||||||
|
</network>
|
||||||
|
<node>
|
||||||
|
<dependency value="true" name="nodeCustomGraphicsSizeSync"/>
|
||||||
|
<dependency value="true" name="nodeSizeLocked"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
|
||||||
|
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
|
||||||
|
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
|
||||||
|
<visualProperty default="true" name="NODE_VISIBLE"/>
|
||||||
|
<visualProperty default="" name="NODE_TOOLTIP"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
|
||||||
|
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
|
||||||
|
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
|
||||||
|
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_DEPTH"/>
|
||||||
|
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
|
||||||
|
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
|
||||||
|
<visualProperty default="60.0" name="NODE_WIDTH"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
|
||||||
|
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
|
||||||
|
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
|
||||||
|
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
|
||||||
|
<visualProperty default="18.0" name="NODE_SIZE"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
|
||||||
|
<visualProperty default="" name="NODE_LABEL">
|
||||||
|
<passthroughMapping attributeName="name" attributeType="string"/>
|
||||||
|
</visualProperty>
|
||||||
|
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
|
||||||
|
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
|
||||||
|
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
|
||||||
|
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
|
||||||
|
<visualProperty default="false" name="NODE_SELECTED"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
|
||||||
|
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
|
||||||
|
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
|
||||||
|
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
|
||||||
|
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
|
||||||
|
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="255" name="NODE_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="40.0" name="NODE_HEIGHT"/>
|
||||||
|
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
|
||||||
|
</node>
|
||||||
|
<edge>
|
||||||
|
<dependency value="true" name="arrowColorMatchesEdge"/>
|
||||||
|
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
|
||||||
|
<visualProperty default="" name="EDGE_TOOLTIP"/>
|
||||||
|
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
|
||||||
|
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
|
||||||
|
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
|
||||||
|
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
|
||||||
|
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
|
||||||
|
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
|
||||||
|
<visualProperty default="false" name="EDGE_SELECTED"/>
|
||||||
|
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
|
||||||
|
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
|
||||||
|
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
|
||||||
|
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="true" name="EDGE_CURVED"/>
|
||||||
|
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
|
||||||
|
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
|
||||||
|
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
|
||||||
|
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
|
||||||
|
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
|
||||||
|
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
|
||||||
|
<visualProperty default="3.0" name="EDGE_WIDTH"/>
|
||||||
|
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
|
||||||
|
<visualProperty default="true" name="EDGE_VISIBLE"/>
|
||||||
|
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
|
||||||
|
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
|
||||||
|
<visualProperty default="" name="EDGE_LABEL"/>
|
||||||
|
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
|
||||||
|
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
|
||||||
|
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
|
||||||
|
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
|
||||||
|
<visualProperty default="0.0" name="EDGE_Z_ORDER"/>
|
||||||
|
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
|
||||||
|
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
|
||||||
|
</edge>
|
||||||
|
</visualStyle>
|
||||||
|
</vizmap>
|
||||||
BIN
src/lang_main/cytoscape_config/template_test.cys
Normal file
BIN
src/lang_main/cytoscape_config/template_test.cys
Normal file
Binary file not shown.
2
src/lang_main/errors.py
Normal file
2
src/lang_main/errors.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
class EdgePropertyNotContainedError(Exception):
|
||||||
|
"""Error raised if a needed edge property is not contained in graph edges"""
|
||||||
@ -1,5 +1,5 @@
|
|||||||
import pickle
|
|
||||||
import base64
|
import base64
|
||||||
|
import pickle
|
||||||
import shutil
|
import shutil
|
||||||
import tomllib
|
import tomllib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|||||||
@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
[paths]
|
[paths]
|
||||||
inputs = './inputs/'
|
inputs = './inputs/'
|
||||||
results = './results/test_new2/'
|
results = './results/test_20240619/'
|
||||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
dataset = '../data/02_202307/Export4.csv'
|
||||||
#results = './results/Export7/'
|
#results = './results/Export7/'
|
||||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||||
#results = './results/Export7_trunc/'
|
#results = './results/Export7_trunc/'
|
||||||
@ -12,10 +12,11 @@ dataset = './01_2_Rohdaten_neu/Export4.csv'
|
|||||||
# only debugging features, production-ready pipelines should always
|
# only debugging features, production-ready pipelines should always
|
||||||
# be fully executed
|
# be fully executed
|
||||||
[control]
|
[control]
|
||||||
preprocessing_skip = false
|
preprocessing_skip = true
|
||||||
token_analysis_skip = false
|
token_analysis_skip = false
|
||||||
graph_postprocessing_skip = false
|
graph_postprocessing_skip = false
|
||||||
time_analysis_skip = false
|
graph_rescaling_skip = false
|
||||||
|
time_analysis_skip = true
|
||||||
|
|
||||||
#[export_filenames]
|
#[export_filenames]
|
||||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
|
|||||||
57
src/lang_main/lang_main_config_old.toml
Normal file
57
src/lang_main/lang_main_config_old.toml
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# lang_main: Config file
|
||||||
|
|
||||||
|
[paths]
|
||||||
|
inputs = './inputs/'
|
||||||
|
results = './results/test_new2/'
|
||||||
|
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||||
|
#results = './results/Export7/'
|
||||||
|
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||||
|
#results = './results/Export7_trunc/'
|
||||||
|
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||||
|
|
||||||
|
# only debugging features, production-ready pipelines should always
|
||||||
|
# be fully executed
|
||||||
|
[control]
|
||||||
|
preprocessing_skip = false
|
||||||
|
token_analysis_skip = false
|
||||||
|
graph_postprocessing_skip = false
|
||||||
|
time_analysis_skip = false
|
||||||
|
|
||||||
|
#[export_filenames]
|
||||||
|
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
|
|
||||||
|
[preprocess]
|
||||||
|
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
|
date_cols = [
|
||||||
|
"VorgangsDatum",
|
||||||
|
"ErledigungsDatum",
|
||||||
|
"Arbeitsbeginn",
|
||||||
|
"ErstellungsDatum",
|
||||||
|
]
|
||||||
|
threshold_amount_characters = 5
|
||||||
|
threshold_similarity = 0.8
|
||||||
|
|
||||||
|
[graph_postprocessing]
|
||||||
|
threshold_edge_weight = 150
|
||||||
|
|
||||||
|
[time_analysis.uniqueness]
|
||||||
|
threshold_unique_texts = 4
|
||||||
|
criterion_feature = 'HObjektText'
|
||||||
|
feature_name_obj_id = 'ObjektID'
|
||||||
|
|
||||||
|
[time_analysis.model_input]
|
||||||
|
# input_features = [
|
||||||
|
# 'VorgangsTypName',
|
||||||
|
# 'VorgangsArtText',
|
||||||
|
# 'VorgangsBeschreibung',
|
||||||
|
# ]
|
||||||
|
input_features = [
|
||||||
|
'VorgangsBeschreibung',
|
||||||
|
]
|
||||||
|
activity_feature = 'VorgangsTypName'
|
||||||
|
activity_types = [
|
||||||
|
'Reparaturauftrag (Portal)',
|
||||||
|
'Störungsmeldung',
|
||||||
|
]
|
||||||
|
threshold_num_acitivities = 1
|
||||||
|
threshold_similarity = 0.8
|
||||||
@ -163,6 +163,17 @@ def build_tk_graph_post_pipe() -> Pipeline:
|
|||||||
return pipe_graph_postprocessing
|
return pipe_graph_postprocessing
|
||||||
|
|
||||||
|
|
||||||
|
def build_tk_graph_rescaling() -> Pipeline:
|
||||||
|
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
|
||||||
|
pipe_graph_rescaling.add(
|
||||||
|
graphs.apply_rescaling_to_graph,
|
||||||
|
save_result=True,
|
||||||
|
filename=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
|
||||||
|
)
|
||||||
|
|
||||||
|
return pipe_graph_rescaling
|
||||||
|
|
||||||
|
|
||||||
# ** timeline analysis
|
# ** timeline analysis
|
||||||
def build_timeline_pipe() -> Pipeline:
|
def build_timeline_pipe() -> Pipeline:
|
||||||
pipe_timeline = Pipeline(name='Timeline_Analysis', working_dir=SAVE_PATH_FOLDER)
|
pipe_timeline = Pipeline(name='Timeline_Analysis', working_dir=SAVE_PATH_FOLDER)
|
||||||
|
|||||||
@ -30,6 +30,7 @@ class EntryPoints(enum.StrEnum):
|
|||||||
TIMELINE_POST = 'TIMELINE_POSTPROCESSING'
|
TIMELINE_POST = 'TIMELINE_POSTPROCESSING'
|
||||||
TK_GRAPH_POST = 'TK-GRAPH_POSTPROCESSING'
|
TK_GRAPH_POST = 'TK-GRAPH_POSTPROCESSING'
|
||||||
TK_GRAPH_ANALYSIS = 'TK-GRAPH_ANALYSIS'
|
TK_GRAPH_ANALYSIS = 'TK-GRAPH_ANALYSIS'
|
||||||
|
TK_GRAPH_ANALYSIS_RESCALED = 'TK-GRAPH_ANALYSIS_RESCALED'
|
||||||
TOKEN_ANALYSIS = 'TOKEN_ANALYSIS'
|
TOKEN_ANALYSIS = 'TOKEN_ANALYSIS'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 34 KiB |
@ -2,22 +2,21 @@
|
|||||||
|
|
||||||
[paths]
|
[paths]
|
||||||
inputs = './inputs/'
|
inputs = './inputs/'
|
||||||
results = './results/test_new2/'
|
results = './results/test_20240619/'
|
||||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
dataset = '../data/02_202307/Export4.csv'
|
||||||
#results = './results/Export7/'
|
#results = './results/Export7/'
|
||||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||||
#results = './results/Export7_trunc/'
|
#results = './results/Export7_trunc/'
|
||||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||||
|
|
||||||
|
# only debugging features, production-ready pipelines should always
|
||||||
|
# be fully executed
|
||||||
[control]
|
[control]
|
||||||
preprocessing = true
|
preprocessing_skip = true
|
||||||
preprocessing_skip = false
|
|
||||||
token_analysis = false
|
|
||||||
token_analysis_skip = false
|
token_analysis_skip = false
|
||||||
graph_postprocessing = false
|
|
||||||
graph_postprocessing_skip = false
|
graph_postprocessing_skip = false
|
||||||
time_analysis = false
|
graph_rescaling_skip = false
|
||||||
time_analysis_skip = false
|
time_analysis_skip = true
|
||||||
|
|
||||||
#[export_filenames]
|
#[export_filenames]
|
||||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
@ -42,9 +41,12 @@ criterion_feature = 'HObjektText'
|
|||||||
feature_name_obj_id = 'ObjektID'
|
feature_name_obj_id = 'ObjektID'
|
||||||
|
|
||||||
[time_analysis.model_input]
|
[time_analysis.model_input]
|
||||||
|
# input_features = [
|
||||||
|
# 'VorgangsTypName',
|
||||||
|
# 'VorgangsArtText',
|
||||||
|
# 'VorgangsBeschreibung',
|
||||||
|
# ]
|
||||||
input_features = [
|
input_features = [
|
||||||
'VorgangsTypName',
|
|
||||||
'VorgangsArtText',
|
|
||||||
'VorgangsBeschreibung',
|
'VorgangsBeschreibung',
|
||||||
]
|
]
|
||||||
activity_feature = 'VorgangsTypName'
|
activity_feature = 'VorgangsTypName'
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user