graph and subgraph generation
This commit is contained in:
@@ -6,8 +6,17 @@ from pathlib import Path
|
||||
from time import gmtime
|
||||
from typing import Any, Final
|
||||
|
||||
import py4cytoscape as p4c
|
||||
|
||||
from lang_main.io import load_toml_config
|
||||
|
||||
# ** py4cytoscape config
|
||||
p4c.set_summary_logger(False)
|
||||
p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
|
||||
p4c.py4cytoscape_logger.detail_logger.removeHandler(p4c.py4cytoscape_logger.detail_handler)
|
||||
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
|
||||
|
||||
# ** lang-main config
|
||||
logging.Formatter.converter = gmtime
|
||||
LOG_FMT: Final[str] = '%(asctime)s | %(module)s:%(levelname)s | %(message)s'
|
||||
LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
|
||||
@@ -18,13 +27,13 @@ logging.basicConfig(
|
||||
)
|
||||
|
||||
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
|
||||
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
|
||||
USE_INTERNAL_CONFIG: Final[bool] = False
|
||||
pkg_dir = Path(__file__).parent
|
||||
cfg_path_internal = (pkg_dir / CONFIG_FILENAME).resolve()
|
||||
# caller_file = Path(inspect.stack()[-1].filename)
|
||||
# CALLER_PATH: Final[Path] = caller_file.parent.resolve()
|
||||
cyto_stylesheet_path = (pkg_dir / CYTO_STYLESHEET_FILENAME).resolve()
|
||||
|
||||
# load config data: internal/external
|
||||
# ** load config data: internal/external
|
||||
if USE_INTERNAL_CONFIG:
|
||||
loaded_cfg = load_toml_config(path_to_toml=cfg_path_internal)
|
||||
else:
|
||||
@@ -43,6 +52,17 @@ else:
|
||||
|
||||
CONFIG: Final[dict[str, Any]] = loaded_cfg.copy()
|
||||
|
||||
# ** Cytoscape configuration
|
||||
# stylesheet
|
||||
if not cyto_stylesheet_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f'Visual stylesheet for Cytoscape not found under: >>{cyto_stylesheet_path}<<'
|
||||
)
|
||||
|
||||
CYTO_PATH_STYLESHEET: Final[Path] = cyto_stylesheet_path
|
||||
|
||||
|
||||
# TODO check removal
|
||||
# append Graphviz binary folder to system path if not already contained
|
||||
if sys.platform == 'win32':
|
||||
path = Path(r'C:\Program Files\Graphviz\bin')
|
||||
|
||||
@@ -13,7 +13,10 @@ import numpy.typing as npt
|
||||
from networkx import DiGraph, Graph
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.constants import EDGE_WEIGHT_DECIMALS
|
||||
from lang_main.constants import (
|
||||
EDGE_WEIGHT_DECIMALS,
|
||||
PROPERTY_NAME_DEGREE_WEIGHTED,
|
||||
)
|
||||
from lang_main.errors import EdgePropertyNotContainedError
|
||||
from lang_main.io import load_pickle, save_pickle
|
||||
from lang_main.loggers import logger_graphs as logger
|
||||
@@ -233,9 +236,7 @@ def filter_graph_by_edge_weight(
|
||||
if bound_upper is not None and weight > bound_upper:
|
||||
filtered_graph.remove_edge(edge[0], edge[1])
|
||||
|
||||
if filtered_graph._undirected is not None:
|
||||
filtered_graph.to_undirected(inplace=True, logging=False)
|
||||
|
||||
filtered_graph.to_undirected(inplace=True, logging=False)
|
||||
filtered_graph.update_metadata(logging=False)
|
||||
|
||||
return filtered_graph
|
||||
@@ -275,15 +276,77 @@ def filter_graph_by_node_degree(
|
||||
if bound_upper is not None and degree > bound_upper:
|
||||
filtered_graph.remove_node(node)
|
||||
|
||||
if filtered_graph._undirected is not None:
|
||||
filtered_graph.to_undirected(inplace=True, logging=False)
|
||||
|
||||
filtered_graph.to_undirected(inplace=True, logging=False)
|
||||
filtered_graph.update_metadata(logging=False)
|
||||
|
||||
return filtered_graph
|
||||
|
||||
|
||||
def apply_rescaling_to_graph(
|
||||
def add_weighted_degree(
|
||||
graph: DiGraph | Graph,
|
||||
edge_weight_property: str = 'weight',
|
||||
property_name: str = PROPERTY_NAME_DEGREE_WEIGHTED,
|
||||
) -> None:
|
||||
"""adds the weighted degree as property to each node of the given graph
|
||||
Operation is performed inplace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
graph : DiGraph | Graph
|
||||
Graph with weighted degree as node property added inplace
|
||||
edge_weight_property : str, optional
|
||||
property of the edges which contains the weight information, by default 'weight'
|
||||
property_name : str, optional
|
||||
target name for the property containing the weighted degree in nodes,
|
||||
by default 'degree_weighted'
|
||||
"""
|
||||
node_degree_mapping = cast(
|
||||
dict[str, float],
|
||||
dict(graph.degree(weight=edge_weight_property)), # type: ignore
|
||||
)
|
||||
nx.set_node_attributes(
|
||||
graph,
|
||||
node_degree_mapping,
|
||||
name=property_name,
|
||||
)
|
||||
|
||||
|
||||
def static_graph_analysis(
|
||||
graph: TokenGraph,
|
||||
) -> tuple[TokenGraph]:
|
||||
"""helper function to allow the calculation of static metrics in pipelines
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tk_graph_directed : TokenGraph
|
||||
token graph (directed) and with rescaled edge weights
|
||||
tk_graph_undirected : Graph
|
||||
token graph (undirected) and with rescaled edge weights
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[TokenGraph, Graph]
|
||||
token graph (directed) and undirected version with added weighted degree
|
||||
"""
|
||||
graph = graph.copy()
|
||||
graph.perform_static_analysis()
|
||||
|
||||
return (graph,)
|
||||
|
||||
|
||||
def pipe_add_graph_metrics(
|
||||
*graphs: DiGraph | Graph,
|
||||
) -> tuple[DiGraph | Graph, ...]:
|
||||
collection: list[DiGraph | Graph] = []
|
||||
for graph in graphs:
|
||||
graph_copy = copy.deepcopy(graph)
|
||||
add_weighted_degree(graph_copy)
|
||||
collection.append(graph_copy)
|
||||
|
||||
return tuple(collection)
|
||||
|
||||
|
||||
def pipe_rescale_graph_edge_weights(
|
||||
graph: TokenGraph,
|
||||
) -> tuple[TokenGraph, Graph]:
|
||||
"""helper function to allow calls in pipelines
|
||||
@@ -298,6 +361,8 @@ def apply_rescaling_to_graph(
|
||||
tuple[TokenGraph, Graph]
|
||||
token graph (directed) and undirected version with rescaled edge weights
|
||||
"""
|
||||
graph = graph.copy()
|
||||
|
||||
return graph.rescale_edge_weights()
|
||||
|
||||
|
||||
@@ -542,7 +607,7 @@ class TokenGraph(DiGraph):
|
||||
the underlying `to_undirected` method of the directed and rescaled
|
||||
TokenGraph instance is called the weights are not rescaled again. Thus,
|
||||
the maximum edge weight can exceed the theoretical maximum value of 1. To
|
||||
ensure consistent behaviour across different application of the conversion to
|
||||
ensure consistent behaviour across different applications of the conversion to
|
||||
undirected graphs new instances are returned, especially for the undirected
|
||||
graph.
|
||||
In contrast, the new directed TokenGraph contains an undirected version without
|
||||
@@ -554,14 +619,24 @@ class TokenGraph(DiGraph):
|
||||
tuple[TokenGraph, Graph]
|
||||
directed and undirected instances
|
||||
"""
|
||||
self.to_undirected(inplace=True, logging=False)
|
||||
token_graph = rescale_edge_weights(self.directed)
|
||||
token_graph.rescaled_weights = True
|
||||
token_graph.to_undirected(inplace=True, logging=False)
|
||||
token_graph.update_metadata(logging=False)
|
||||
undirected = rescale_edge_weights(self.undirected)
|
||||
|
||||
return token_graph, undirected
|
||||
|
||||
def perform_static_analysis(self):
|
||||
"""calculate different metrics directly on the data of the underlying graphs
|
||||
(directed and undirected)
|
||||
|
||||
Current operations:
|
||||
- adding weighted degree
|
||||
"""
|
||||
add_weighted_degree(self)
|
||||
add_weighted_degree(self.undirected)
|
||||
|
||||
def _save_prepare(
|
||||
self,
|
||||
path: Path,
|
||||
|
||||
@@ -226,6 +226,7 @@ def build_token_graph(
|
||||
graph.update_metadata()
|
||||
# convert to undirected
|
||||
graph.to_undirected(logging=False)
|
||||
graph.perform_static_analysis()
|
||||
|
||||
return graph, docs_mapping
|
||||
|
||||
|
||||
@@ -5,8 +5,13 @@ import spacy
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from spacy.language import Language as GermanSpacyModel
|
||||
|
||||
from lang_main import CONFIG
|
||||
from lang_main.types import STFRDeviceTypes
|
||||
from lang_main import CONFIG, CYTO_PATH_STYLESHEET
|
||||
from lang_main.types import CytoLayoutProperties, CytoLayouts, STFRDeviceTypes
|
||||
|
||||
__all__ = [
|
||||
'CONFIG',
|
||||
'CYTO_PATH_STYLESHEET',
|
||||
]
|
||||
|
||||
# ** paths
|
||||
input_path_conf = Path.cwd() / Path(CONFIG['paths']['inputs'])
|
||||
@@ -28,8 +33,10 @@ SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
|
||||
SKIP_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis_skip']
|
||||
SKIP_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing_skip']
|
||||
SKIP_GRAPH_RESCALING: Final[bool] = CONFIG['control']['graph_rescaling_skip']
|
||||
SKIP_GRAPH_STATIC_RENDERING: Final[bool] = CONFIG['control']['graph_static_rendering_skip']
|
||||
SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
|
||||
|
||||
|
||||
# ** models
|
||||
# ** sentence_transformers
|
||||
STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU
|
||||
@@ -55,6 +62,26 @@ THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity'
|
||||
# ** graph postprocessing
|
||||
EDGE_WEIGHT_DECIMALS: Final[int] = 4
|
||||
THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight']
|
||||
PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted'
|
||||
|
||||
# ** graph exports (Cytoscape)
|
||||
CYTO_COLLECTION_NAME: Final[str] = 'lang_main'
|
||||
CYTO_BASE_NETWORK_NAME: Final[str] = 'token_graph'
|
||||
CYTO_LAYOUT_NAME: Final[CytoLayouts] = 'force-directed'
|
||||
CYTO_LAYOUT_PROPERTIES: Final[CytoLayoutProperties] = {
|
||||
'numIterations': 1000,
|
||||
'defaultSpringCoefficient': 1e-4,
|
||||
'defaultSpringLength': 45,
|
||||
'defaultNodeMass': 11,
|
||||
'isDeterministic': True,
|
||||
'singlePartition': False,
|
||||
}
|
||||
CYTO_STYLESHEET_NAME: Final[str] = 'lang_main'
|
||||
# name for property, on which selection is done
|
||||
CYTO_SELECTION_PROPERTY: Final[str] = 'node_selection'
|
||||
CYTO_NUMBER_SUBGRAPHS: Final[int] = 5
|
||||
CYTO_ITER_NEIGHBOUR_DEPTH: Final[int] = 2
|
||||
|
||||
# ** time analysis.uniqueness
|
||||
THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['uniqueness'][
|
||||
'threshold_unique_texts'
|
||||
|
||||
128
src/lang_main/cytoscape_config/lang_main.xml
Normal file
128
src/lang_main/cytoscape_config/lang_main.xml
Normal file
@@ -0,0 +1,128 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<vizmap id="VizMap-2024_07_12-08_08" documentVersion="3.1">
|
||||
<visualStyle name="lang_main">
|
||||
<network>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
|
||||
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
|
||||
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
|
||||
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
|
||||
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
|
||||
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
|
||||
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
|
||||
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
|
||||
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
|
||||
<visualProperty default="" name="NETWORK_TITLE"/>
|
||||
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
|
||||
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
|
||||
</network>
|
||||
<node>
|
||||
<dependency value="true" name="nodeCustomGraphicsSizeSync"/>
|
||||
<dependency value="true" name="nodeSizeLocked"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
|
||||
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
|
||||
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
|
||||
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
|
||||
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
|
||||
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
|
||||
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
|
||||
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
|
||||
<visualProperty default="40.0" name="NODE_HEIGHT"/>
|
||||
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
|
||||
<visualProperty default="false" name="NODE_SELECTED"/>
|
||||
<visualProperty default="0.0" name="NODE_DEPTH"/>
|
||||
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
|
||||
<visualProperty default="" name="NODE_TOOLTIP"/>
|
||||
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
|
||||
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
|
||||
<visualProperty default="18.0" name="NODE_SIZE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
|
||||
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
|
||||
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
|
||||
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
|
||||
<visualProperty default="60.0" name="NODE_WIDTH"/>
|
||||
<visualProperty default="" name="NODE_LABEL">
|
||||
<passthroughMapping attributeName="name" attributeType="string"/>
|
||||
</visualProperty>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
|
||||
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
|
||||
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
|
||||
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
|
||||
<visualProperty default="true" name="NODE_VISIBLE"/>
|
||||
<visualProperty default="255" name="NODE_TRANSPARENCY"/>
|
||||
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
|
||||
</node>
|
||||
<edge>
|
||||
<dependency value="true" name="arrowColorMatchesEdge"/>
|
||||
<visualProperty default="false" name="EDGE_SELECTED"/>
|
||||
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
|
||||
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
|
||||
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="" name="EDGE_LABEL"/>
|
||||
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
|
||||
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
|
||||
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
|
||||
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
|
||||
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
|
||||
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
|
||||
<visualProperty default="true" name="EDGE_VISIBLE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
|
||||
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
|
||||
<visualProperty default="" name="EDGE_TOOLTIP"/>
|
||||
<visualProperty default="0.0" name="EDGE_Z_ORDER"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
|
||||
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
|
||||
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
|
||||
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
|
||||
<visualProperty default="true" name="EDGE_CURVED"/>
|
||||
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
|
||||
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
|
||||
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
|
||||
<visualProperty default="3.0" name="EDGE_WIDTH">
|
||||
<continuousMapping attributeName="weight" attributeType="float">
|
||||
<continuousMappingPoint attrValue="0.09520000219345093" equalValue="2.0" greaterValue="2.0" lesserValue="1.0"/>
|
||||
<continuousMappingPoint attrValue="1.0" equalValue="10.0" greaterValue="1.0" lesserValue="10.0"/>
|
||||
</continuousMapping>
|
||||
</visualProperty>
|
||||
</edge>
|
||||
</visualStyle>
|
||||
</vizmap>
|
||||
Binary file not shown.
@@ -16,6 +16,7 @@ preprocessing_skip = true
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
graph_static_rendering_skip = false
|
||||
time_analysis_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
|
||||
@@ -185,13 +185,16 @@ class Pipeline(BasePipeline):
|
||||
def add(
|
||||
self,
|
||||
action: Callable,
|
||||
action_kwargs: dict[str, Any] = {},
|
||||
action_kwargs: dict[str, Any] | None = None,
|
||||
save_result: bool = False,
|
||||
load_result: bool = False,
|
||||
filename: str | None = None,
|
||||
) -> None:
|
||||
# check explicitly for function type
|
||||
# if isinstance(action, FunctionType):
|
||||
if action_kwargs is None:
|
||||
action_kwargs = {}
|
||||
|
||||
if isinstance(action, Callable):
|
||||
self.actions.append(action)
|
||||
self.action_names.append(action.__name__)
|
||||
@@ -272,7 +275,7 @@ class Pipeline(BasePipeline):
|
||||
@override
|
||||
def logic(
|
||||
self,
|
||||
starting_values: tuple[Any, ...],
|
||||
starting_values: tuple[Any, ...] | None = None,
|
||||
) -> tuple[Any, ...]:
|
||||
for idx, (action, action_kwargs) in enumerate(zip(self.actions, self.actions_kwargs)):
|
||||
# loading
|
||||
@@ -287,11 +290,21 @@ class Pipeline(BasePipeline):
|
||||
continue
|
||||
# calculation
|
||||
if idx == 0:
|
||||
ret = action(*starting_values, **action_kwargs)
|
||||
args = starting_values
|
||||
# ret = action(*starting_values, **action_kwargs)
|
||||
else:
|
||||
ret = action(*ret, **action_kwargs)
|
||||
args = ret
|
||||
|
||||
if not isinstance(ret, tuple):
|
||||
if args is not None and action_kwargs:
|
||||
ret = action(*args, **action_kwargs)
|
||||
elif args is not None:
|
||||
ret = action(*args)
|
||||
elif args is None and action_kwargs:
|
||||
ret = action(**action_kwargs)
|
||||
else:
|
||||
ret = action()
|
||||
|
||||
if ret is not None and not isinstance(ret, tuple):
|
||||
ret = (ret,)
|
||||
ret = cast(tuple[Any, ...], ret)
|
||||
# save intermediate result
|
||||
|
||||
@@ -19,6 +19,7 @@ from lang_main.analysis.tokens import build_token_graph
|
||||
from lang_main.constants import (
|
||||
ACTIVITY_FEATURE,
|
||||
ACTIVITY_TYPES,
|
||||
CYTO_BASE_NETWORK_NAME,
|
||||
DATE_COLS,
|
||||
FEATURE_NAME_OBJ_ID,
|
||||
MODEL_INPUT_FEATURES,
|
||||
@@ -34,6 +35,7 @@ from lang_main.constants import (
|
||||
UNIQUE_CRITERION_FEATURE,
|
||||
)
|
||||
from lang_main.pipelines.base import Pipeline
|
||||
from lang_main.render import cytoscape as cyto
|
||||
from lang_main.types import EntryPoints
|
||||
|
||||
|
||||
@@ -156,6 +158,9 @@ def build_tk_graph_post_pipe() -> Pipeline:
|
||||
'bound_lower': 1,
|
||||
'bound_upper': None,
|
||||
},
|
||||
)
|
||||
pipe_graph_postprocessing.add(
|
||||
graphs.static_graph_analysis,
|
||||
save_result=True,
|
||||
filename=EntryPoints.TK_GRAPH_ANALYSIS,
|
||||
)
|
||||
@@ -163,10 +168,13 @@ def build_tk_graph_post_pipe() -> Pipeline:
|
||||
return pipe_graph_postprocessing
|
||||
|
||||
|
||||
def build_tk_graph_rescaling() -> Pipeline:
|
||||
def build_tk_graph_rescaling_pipe() -> Pipeline:
|
||||
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
|
||||
pipe_graph_rescaling.add(
|
||||
graphs.apply_rescaling_to_graph,
|
||||
graphs.pipe_rescale_graph_edge_weights,
|
||||
)
|
||||
pipe_graph_rescaling.add(
|
||||
graphs.pipe_add_graph_metrics,
|
||||
save_result=True,
|
||||
filename=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
|
||||
)
|
||||
@@ -174,6 +182,27 @@ def build_tk_graph_rescaling() -> Pipeline:
|
||||
return pipe_graph_rescaling
|
||||
|
||||
|
||||
def build_tk_graph_rendering_pipe() -> Pipeline:
|
||||
pipe_graph_rendering = Pipeline(
|
||||
name='Graph_Static-Rendering',
|
||||
working_dir=SAVE_PATH_FOLDER,
|
||||
)
|
||||
pipe_graph_rendering.add(cyto.import_to_cytoscape)
|
||||
pipe_graph_rendering.add(cyto.layout_network)
|
||||
pipe_graph_rendering.add(cyto.apply_style_to_network)
|
||||
pipe_graph_rendering.add(
|
||||
cyto.export_network_to_image,
|
||||
{'filename': CYTO_BASE_NETWORK_NAME},
|
||||
)
|
||||
pipe_graph_rendering.add(cyto.get_subgraph_node_selection)
|
||||
pipe_graph_rendering.add(
|
||||
cyto.build_subnetworks,
|
||||
{'export_image': True},
|
||||
)
|
||||
|
||||
return pipe_graph_rendering
|
||||
|
||||
|
||||
# ** timeline analysis
|
||||
def build_timeline_pipe() -> Pipeline:
|
||||
pipe_timeline = Pipeline(name='Timeline_Analysis', working_dir=SAVE_PATH_FOLDER)
|
||||
|
||||
0
src/lang_main/render/__init__.py
Normal file
0
src/lang_main/render/__init__.py
Normal file
159
src/lang_main/render/cytoscape.py
Normal file
159
src/lang_main/render/cytoscape.py
Normal file
@@ -0,0 +1,159 @@
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import py4cytoscape as p4c
|
||||
from networkx import DiGraph, Graph
|
||||
|
||||
from lang_main.constants import (
|
||||
CYTO_BASE_NETWORK_NAME,
|
||||
CYTO_COLLECTION_NAME,
|
||||
CYTO_ITER_NEIGHBOUR_DEPTH,
|
||||
CYTO_LAYOUT_NAME,
|
||||
CYTO_LAYOUT_PROPERTIES,
|
||||
CYTO_NUMBER_SUBGRAPHS,
|
||||
CYTO_PATH_STYLESHEET,
|
||||
CYTO_SELECTION_PROPERTY,
|
||||
CYTO_STYLESHEET_NAME,
|
||||
PROPERTY_NAME_DEGREE_WEIGHTED,
|
||||
SAVE_PATH_FOLDER,
|
||||
)
|
||||
from lang_main.types import (
|
||||
CytoExportFileTypes,
|
||||
CytoExportPageSizes,
|
||||
CytoLayoutProperties,
|
||||
CytoLayouts,
|
||||
CytoNodeID,
|
||||
)
|
||||
|
||||
|
||||
# ** Cytoscape API related, using py4cytoscape
|
||||
def import_to_cytoscape(
|
||||
graph: DiGraph | Graph,
|
||||
) -> None:
|
||||
p4c.delete_all_networks()
|
||||
p4c.create_network_from_networkx(
|
||||
graph,
|
||||
title=CYTO_BASE_NETWORK_NAME,
|
||||
collection=CYTO_COLLECTION_NAME,
|
||||
)
|
||||
p4c.analyze_network(directed=False)
|
||||
|
||||
|
||||
def reset_current_network_to_base() -> None:
|
||||
p4c.set_current_network(CYTO_BASE_NETWORK_NAME)
|
||||
|
||||
|
||||
def export_network_to_image(
|
||||
filename: str,
|
||||
filetype: CytoExportFileTypes = 'SVG',
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
pdf_export_page_size: CytoExportPageSizes = 'A4',
|
||||
) -> None:
|
||||
# target_folder = Path.cwd() / 'results'
|
||||
target_folder = SAVE_PATH_FOLDER
|
||||
if not target_folder.exists():
|
||||
target_folder.mkdir(parents=True)
|
||||
file_pth = target_folder / filename
|
||||
|
||||
text_as_font = True
|
||||
if filetype == 'SVG':
|
||||
text_as_font = False
|
||||
|
||||
p4c.export_image(
|
||||
filename=str(file_pth),
|
||||
type=filetype,
|
||||
network=network_name,
|
||||
overwrite_file=True,
|
||||
all_graphics_details=True,
|
||||
export_text_as_font=text_as_font,
|
||||
page_size=pdf_export_page_size,
|
||||
)
|
||||
|
||||
|
||||
def layout_network(
|
||||
layout_name: CytoLayouts = CYTO_LAYOUT_NAME,
|
||||
layout_properties: CytoLayoutProperties = CYTO_LAYOUT_PROPERTIES,
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
) -> None:
|
||||
p4c.set_layout_properties(layout_name, layout_properties)
|
||||
p4c.layout_network(layout_name=layout_name, network=network_name)
|
||||
p4c.fit_content(selected_only=False, network=network_name)
|
||||
|
||||
|
||||
def apply_style_to_network(
|
||||
pth_to_stylesheet: Path = CYTO_PATH_STYLESHEET,
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
) -> None:
|
||||
styles_avail = cast(list[str], p4c.get_visual_style_names())
|
||||
if CYTO_STYLESHEET_NAME not in styles_avail:
|
||||
if not pth_to_stylesheet.exists():
|
||||
# existence for standard path verified at import, but not for other
|
||||
# provided paths
|
||||
raise FileNotFoundError(
|
||||
f'Visual stylesheet for Cytoscape not found under: >>{pth_to_stylesheet}<<'
|
||||
)
|
||||
p4c.import_visual_styles(str(pth_to_stylesheet))
|
||||
|
||||
p4c.set_visual_style(CYTO_STYLESHEET_NAME, network=network_name)
|
||||
time.sleep(1) # if not waited image export could be without applied style
|
||||
p4c.fit_content(selected_only=False, network=network_name)
|
||||
|
||||
|
||||
def get_subgraph_node_selection(
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
property_degree_weighted: str = PROPERTY_NAME_DEGREE_WEIGHTED,
|
||||
) -> list[CytoNodeID]:
|
||||
node_table = p4c.get_table_columns(network=network_name)
|
||||
node_table['stress_norm'] = node_table['Stress'] / node_table['Stress'].max()
|
||||
node_table[CYTO_SELECTION_PROPERTY] = (
|
||||
node_table[property_degree_weighted]
|
||||
* node_table['BetweennessCentrality']
|
||||
* node_table['stress_norm']
|
||||
)
|
||||
node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False)
|
||||
node_table_choice = node_table.iloc[:CYTO_NUMBER_SUBGRAPHS, :]
|
||||
|
||||
return node_table_choice['SUID'].to_list()
|
||||
|
||||
|
||||
def select_neighbours_of_node(
|
||||
node: CytoNodeID,
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
) -> None:
|
||||
p4c.clear_selection(network=network_name)
|
||||
p4c.select_nodes(node, network=network_name)
|
||||
|
||||
for _ in range(CYTO_ITER_NEIGHBOUR_DEPTH):
|
||||
_ = p4c.select_first_neighbors(network=network_name)
|
||||
|
||||
_ = p4c.select_edges_connecting_selected_nodes()
|
||||
|
||||
|
||||
def make_subnetwork(
|
||||
index: int,
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
export_image: bool = True,
|
||||
) -> None:
|
||||
subnetwork_name = network_name + f'_sub_{index+1}'
|
||||
p4c.create_subnetwork(
|
||||
nodes='selected',
|
||||
edges='selected',
|
||||
subnetwork_name=subnetwork_name,
|
||||
network=network_name,
|
||||
)
|
||||
p4c.set_current_network(subnetwork_name)
|
||||
p4c.fit_content(selected_only=False, network=subnetwork_name)
|
||||
if export_image:
|
||||
export_network_to_image(filename=subnetwork_name, network_name=subnetwork_name)
|
||||
|
||||
|
||||
def build_subnetworks(
|
||||
nodes_to_analyse: Iterable[CytoNodeID],
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
export_image: bool = True,
|
||||
) -> None:
|
||||
for idx, node in enumerate(nodes_to_analyse):
|
||||
select_neighbours_of_node(node=node, network_name=network_name)
|
||||
make_subnetwork(index=idx, network_name=network_name, export_image=export_image)
|
||||
@@ -1,5 +1,10 @@
|
||||
import enum
|
||||
from typing import Required, TypeAlias, TypedDict
|
||||
from typing import (
|
||||
Literal,
|
||||
Required,
|
||||
TypeAlias,
|
||||
TypedDict,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
from spacy.tokens.doc import Doc as SpacyDoc
|
||||
@@ -42,6 +47,43 @@ Embedding: TypeAlias = SpacyDoc | Tensor
|
||||
# ** graphs
|
||||
NodeTitle: TypeAlias = str
|
||||
EdgeWeight: TypeAlias = int
|
||||
CytoExportFileTypes: TypeAlias = Literal[
|
||||
'JPEG',
|
||||
'PDF',
|
||||
'PNG',
|
||||
'PS',
|
||||
'SVG',
|
||||
]
|
||||
CytoExportPageSizes: TypeAlias = Literal[
|
||||
'A0',
|
||||
'A1',
|
||||
'A2',
|
||||
'A3',
|
||||
'A4',
|
||||
'A5',
|
||||
'Auto',
|
||||
'Legal',
|
||||
'Letter',
|
||||
'Tabloid',
|
||||
]
|
||||
CytoLayouts: TypeAlias = Literal[
|
||||
'attribute-circle',
|
||||
'attribute-grid',
|
||||
'attributes-layout',
|
||||
'circular',
|
||||
'cose',
|
||||
'degree-circle',
|
||||
'force-directed',
|
||||
'force-directed-cl',
|
||||
'fruchterman-rheingold',
|
||||
'grid',
|
||||
'hierarchical',
|
||||
'isom',
|
||||
'kamada-kawai',
|
||||
'stacked-node-layout',
|
||||
]
|
||||
CytoLayoutProperties: TypeAlias = dict[str, float | bool]
|
||||
CytoNodeID: TypeAlias = int
|
||||
|
||||
|
||||
class NodeData(TypedDict):
|
||||
|
||||
Reference in New Issue
Block a user