enhanced timeline, improved handling of odd cases

This commit is contained in:
Florian Förster 2024-07-24 16:49:19 +02:00
parent 578c543a3e
commit 9197146d2c
21 changed files with 5900 additions and 234 deletions

View File

@ -9,6 +9,7 @@ from lang_main.analysis.graphs import (
save_to_GraphML, save_to_GraphML,
) )
from lang_main.constants import ( from lang_main.constants import (
CYTO_BASE_NETWORK_NAME,
PATH_TO_DATASET, PATH_TO_DATASET,
SAVE_PATH_FOLDER, SAVE_PATH_FOLDER,
SKIP_GRAPH_POSTPROCESSING, SKIP_GRAPH_POSTPROCESSING,
@ -26,7 +27,7 @@ from lang_main.pipelines.predefined import (
build_timeline_pipe, build_timeline_pipe,
build_tk_graph_pipe, build_tk_graph_pipe,
build_tk_graph_post_pipe, build_tk_graph_post_pipe,
build_tk_graph_rendering_pipe, build_tk_graph_render_pipe,
build_tk_graph_rescaling_pipe, build_tk_graph_rescaling_pipe,
) )
from lang_main.types import ( from lang_main.types import (
@ -42,8 +43,14 @@ pipe_target_feat = build_base_target_feature_pipe()
pipe_merge = build_merge_duplicates_pipe() pipe_merge = build_merge_duplicates_pipe()
pipe_token_analysis = build_tk_graph_pipe() pipe_token_analysis = build_tk_graph_pipe()
pipe_graph_postprocessing = build_tk_graph_post_pipe() pipe_graph_postprocessing = build_tk_graph_post_pipe()
pipe_graph_rescaling = build_tk_graph_rescaling_pipe() pipe_graph_rescaling = build_tk_graph_rescaling_pipe(
pipe_static_graph_rendering = build_tk_graph_rendering_pipe() save_result=True,
exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
)
pipe_static_graph_rendering = build_tk_graph_render_pipe(
with_subgraphs=True,
base_network_name=CYTO_BASE_NETWORK_NAME,
)
pipe_timeline = build_timeline_pipe() pipe_timeline = build_timeline_pipe()
@ -98,11 +105,11 @@ def run_graph_edge_rescaling() -> None:
load_pickle(entry_point_path), load_pickle(entry_point_path),
) )
tk_graph = loaded_results[0] tk_graph = loaded_results[0]
ret = cast( tk_graph_rescaled, tk_graph_rescaled_undirected = cast(
tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,)) tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,))
) )
tk_graph_rescaled = ret[0] # tk_graph_rescaled = ret[0]
tk_graph_rescaled_undirected = ret[1] # tk_graph_rescaled_undirected = ret[1]
tk_graph_rescaled.to_GraphML( tk_graph_rescaled.to_GraphML(
SAVE_PATH_FOLDER, filename='TokenGraph-directed-rescaled', directed=False SAVE_PATH_FOLDER, filename='TokenGraph-directed-rescaled', directed=False
) )

View File

@ -20,13 +20,15 @@ from pandas import DataFrame
import lang_main.io import lang_main.io
from lang_main.analysis import graphs, tokens from lang_main.analysis import graphs, tokens
from lang_main.constants import SPCY_MODEL from lang_main.constants import SAVE_PATH_FOLDER, SPCY_MODEL
from lang_main.types import ObjectID, TimelineCandidates from lang_main.types import EntryPoints, ObjectID, TimelineCandidates
# ** data # ** data
p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve() # p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df)) (data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve() # p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
p_tl = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
cands, texts = cast( cands, texts = cast(
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl) tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
) )
@ -58,9 +60,10 @@ HOVER_DATA: Final[dict[str, Any]] = {
} }
# ** graph # ** graph
target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl' # target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
p = Path(target).resolve() # p = Path(target).resolve()
ret = lang_main.io.load_pickle(p) p_tk_graph = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
ret = lang_main.io.load_pickle(p_tk_graph)
tk_graph = cast(graphs.TokenGraph, ret[0]) tk_graph = cast(graphs.TokenGraph, ret[0])
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None) tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None) tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)

View File

@ -0,0 +1,413 @@
import time
import webbrowser
from pathlib import Path
from threading import Thread
from typing import Any, Final, cast
# import dash_cytoscape as cyto
import plotly.express as px
from dash import (
Dash,
Input,
Output,
State,
callback,
dash_table,
dcc,
html,
)
from pandas import DataFrame
from plotly.graph_objects import Figure
import lang_main.io
from lang_main.analysis import graphs, tokens
from lang_main.constants import SAVE_PATH_FOLDER, SPCY_MODEL
from lang_main.errors import EmptyEdgesError, EmptyGraphError
from lang_main.pipelines.predefined import (
build_tk_graph_render_pipe,
build_tk_graph_rescaling_pipe,
)
from lang_main.types import EntryPoints, ObjectID, TimelineCandidates
# ** data
# p_df = Path(r'../results/test_20240619/TIMELINE.pkl').resolve()
p_df = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE)
(data,) = cast(tuple[DataFrame], lang_main.io.load_pickle(p_df))
# p_tl = Path(r'../results/test_20240619/TIMELINE_POSTPROCESSING.pkl').resolve()
p_tl = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TIMELINE_POST)
cands, texts = cast(
tuple[TimelineCandidates, dict[ObjectID, str]], lang_main.io.load_pickle(p_tl)
)
# ** necessary pipelines
rescaling_pipe = build_tk_graph_rescaling_pipe(
exit_point=EntryPoints.TIMELINE_TK_GRAPH_RESCALED,
save_result=False,
)
BASE_NETWORK_NAME: Final[str] = 'test_timeline'
# RENDER_FOLDER: Final[Path] = Path.cwd() / 'assets/'
graph_render_pipe = build_tk_graph_render_pipe(
with_subgraphs=False,
base_network_name=BASE_NETWORK_NAME,
)
# PTH_RENDERED_GRAPH = f'assets/{BASE_NETWORK_NAME}.svg'
PTH_RENDERED_GRAPH = lang_main.io.get_entry_point(
SAVE_PATH_FOLDER,
BASE_NETWORK_NAME,
file_ext='.svg',
)
TABLE_FEATS: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
'VorgangsTypName',
'VorgangsBeschreibung',
]
TABLE_FEATS_DATES: Final[list[str]] = [
'ErstellungsDatum',
'ErledigungsDatum',
]
# ** figure config
MARKERS_OCCURRENCES: Final[dict[str, Any]] = {
'size': 12,
'color': 'yellow',
'line': {
'width': 2,
'color': 'red',
},
}
MARKERS_DELTA: Final[dict[str, Any]] = {
'size': 8,
'color': 'red',
'symbol': 'cross',
}
HOVER_DATA: Final[dict[str, Any]] = {
'ErstellungsDatum': '|%d.%m.%Y',
'ErledigungsDatum': '|%d.%m.%Y',
'VorgangsBeschreibung': True,
}
HOVER_DATA_DELTA: Final[dict[str, Any]] = {
'ErstellungsDatum': '|%d.%m.%Y',
'ErledigungsDatum': '|%d.%m.%Y',
'VorgangsDatum': '|%d.%m.%Y',
'delta': True,
'VorgangsBeschreibung': True,
}
# ** graph
p_tk_graph = lang_main.io.get_entry_point(SAVE_PATH_FOLDER, EntryPoints.TK_GRAPH_POST)
ret = lang_main.io.load_pickle(p_tk_graph)
tk_graph = cast(graphs.TokenGraph, ret[0])
tk_graph_filtered = graphs.filter_graph_by_edge_weight(tk_graph, 150, None)
tk_graph_filtered = graphs.filter_graph_by_node_degree(tk_graph_filtered, 1, None)
graph_layout = html.Div(
[
dcc.Store(id='graph-store', storage_type='memory'),
# dcc.Store(id='graph-store-cyto-curr_cands', storage_type='memory'),
html.Div(id='output'),
html.Div(
[
html.H2('Token Graph', style={'margin': 0}),
],
style={
'display': 'flex',
'marginBottom': '1em',
},
),
html.Div(
[
html.H3('Graph'),
html.Button(
'Download Bild',
id='bt-reset',
style={
'marginLeft': 'auto',
'width': '300px',
},
),
dcc.Download(id='static-graph-download'),
dcc.Loading(
id='loading-graph-render',
children=html.Div(
[
html.Img(
id='static-graph-img',
alt='static rendered graph',
# style={
# 'width': 'auto',
# 'height': 'auto',
# },
),
html.P(id='info-graph-errors', children=[]),
],
style={
'border': '3px solid black',
'borderRadius': '25px',
'marginTop': '1em',
'marginBottom': '2em',
'padding': '7px',
},
),
),
],
style={'marginTop': '1em'},
),
],
)
# ** app
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div(
[
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
html.Div(
children=[
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
dcc.Dropdown(
list(cands.keys()),
id='selector-obj_id',
placeholder='ObjektID auswählen...',
),
]
),
html.Div(
children=[
html.H3(id='object-text'),
dcc.Dropdown(id='selector-candidates'),
dcc.Graph(id='figure-occurrences'),
dcc.Graph(id='figure-delta'),
]
),
html.Div(
[dash_table.DataTable(id='table-candidates')], style={'marginBottom': '2em'}
),
graph_layout,
],
style={'margin': '2em'},
)
# ** selectors of candidates
@callback(
Output('object-text', 'children'),
Input('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_obj_text(obj_id):
obj_id = int(obj_id)
obj_text = texts[obj_id]
headline = f'HObjektText: {obj_text}'
return headline
@callback(
[
Output('selector-candidates', 'options'),
Output('selector-candidates', 'value'),
],
Input('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_choice_candidates(obj_id):
obj_id = int(obj_id)
choices = list(range(1, len(cands[obj_id]) + 1))
return choices, choices[0]
# ** helpers to filter DataFrame
def pre_filter_data(
data: DataFrame,
idx: int,
obj_id: ObjectID,
) -> DataFrame:
idx = int(idx)
obj_id = int(obj_id)
# data = data.copy()
cands_for_obj_id = cands[obj_id]
cands_choice = cands_for_obj_id[int(idx) - 1]
# data
data = data.loc[list(cands_choice)].sort_index() # type: ignore
data['delta'] = data['ErledigungsDatum'] - data['ErstellungsDatum']
data['delta'] = data['delta'].dt.days
return data
# ** figure generation
# TODO check possible storage of pre-filtered result
# TODO change input of ``update_table_candidates`` and ``display_candidates_as_graph``
# TODO to storage component
@callback(
[
Output('figure-occurrences', 'figure'),
Output('figure-delta', 'figure'),
],
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_timeline(index, obj_id):
obj_id = int(obj_id)
obj_text = texts[obj_id]
title_occurrences = f'HObjektText: {obj_text}'
title_delta = f'HObjektText: {obj_text}, Differenz Erstellung und Erledigung'
df = pre_filter_data(data, idx=index, obj_id=obj_id)
# figure
fig_occurrences = fig_timeline_occurrences(df, title_occurrences)
fig_delta = fig_timeline_delta(df, title_delta)
return fig_occurrences, fig_delta
def fig_timeline_occurrences(
df: DataFrame,
title: str,
) -> Figure:
fig = px.line(
data_frame=df,
x='ErstellungsDatum',
y='ObjektID',
title=title,
hover_data=HOVER_DATA,
)
fig.update_traces(
mode='markers+lines', marker=MARKERS_OCCURRENCES, marker_symbol='diamond'
)
fig.update_xaxes(
tickformat='%B\n%Y',
rangeslider_visible=True,
)
fig.update_yaxes(type='category')
fig.update_layout(hovermode='x unified')
return fig
def fig_timeline_delta(
df: DataFrame,
title: str,
) -> Figure:
fig = px.scatter(
data_frame=df,
x='ErstellungsDatum',
y='delta',
title=title,
hover_data=HOVER_DATA_DELTA,
)
fig.update_traces(marker=MARKERS_DELTA)
fig.update_xaxes(tickformat='%B\n%Y')
fig.update_yaxes(dtick=1)
fig.update_layout(hovermode='x unified')
return fig
# ** HTML table
@callback(
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def update_table_candidates(index, obj_id):
df = pre_filter_data(data, idx=index, obj_id=obj_id)
df = df.filter(items=TABLE_FEATS, axis=1).sort_values(
by='ErstellungsDatum', ascending=True
)
cols = [{'name': i, 'id': i} for i in df.columns]
# convert dates to strings
for col in TABLE_FEATS_DATES:
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
table_data = df.to_dict('records')
return table_data, cols
# ** graph callbacks
@app.callback(
[
Output('graph-store', 'data'),
Output('static-graph-img', 'src'),
Output('info-graph-errors', 'children'),
],
# Input('graph-build-btn', 'n_clicks'),
Input('selector-candidates', 'value'),
State('selector-obj_id', 'value'),
prevent_initial_call=True,
)
def display_candidates_as_graph(index, obj_id):
error_msg = ''
t1 = time.perf_counter()
df = pre_filter_data(data, idx=index, obj_id=obj_id)
t2 = time.perf_counter()
print(f'Time for filtering: {t2 - t1} s')
t1 = time.perf_counter()
tk_graph_cands, _ = tokens.build_token_graph(
data=df,
model=SPCY_MODEL,
target_feature='VorgangsBeschreibung',
build_map=False,
logging_graph=False,
)
t2 = time.perf_counter()
print(f'Time for graph building: {t2 - t1} s')
# ** now start rendering pipeline in Cytoscape
# rescale graph
try:
t1 = time.perf_counter()
_, tk_graph_rescaled_undirected = cast(
tuple[graphs.TokenGraph, graphs.Graph],
rescaling_pipe.run(starting_values=(tk_graph_cands,)),
)
# render graph in Cytoscape and export image
_ = graph_render_pipe.run(starting_values=(tk_graph_rescaled_undirected,))
# load image as b64 encoded string
b64_img = lang_main.io.encode_file_to_base64_str(PTH_RENDERED_GRAPH)
static_img = f'data:image/svg+xml;base64,{b64_img}'
graph_to_store = lang_main.io.encode_to_base64_str(tk_graph_cands)
# place image in browser
t2 = time.perf_counter()
print(f'Time for graph rescaling and rendering: {t2 - t1} s')
except (EmptyGraphError, EmptyEdgesError):
graph_to_store = ''
static_img = ''
error_msg = 'Graph ist leer und konnte nicht generiert werden!'
finally:
return graph_to_store, static_img, error_msg
@callback(
Output('static-graph-download', 'data'),
Input('bt-reset', 'n_clicks'),
prevent_initial_call=True,
)
def func(n_clicks):
return dcc.send_file(path=PTH_RENDERED_GRAPH)
def _start_webbrowser():
host = '127.0.0.1'
port = '8050'
adress = f'http://{host}:{port}/'
time.sleep(2)
webbrowser.open_new(adress)
def main():
webbrowser_thread = Thread(target=_start_webbrowser, daemon=True)
webbrowser_thread.start()
app.run(debug=True)
if __name__ == '__main__':
main()

Binary file not shown.

View File

@ -1,56 +0,0 @@
# lang_main: Config file
[paths]
inputs = './inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
[control]
preprocessing = true
preprocessing_skip = false
token_analysis = false
token_analysis_skip = false
graph_postprocessing = false
graph_postprocessing_skip = false
time_analysis = false
time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
input_features = [
'VorgangsTypName',
'VorgangsArtText',
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_similarity = 0.8

View File

@ -1,18 +0,0 @@
from pathlib import Path
from typing import cast
import statistics
import lang_main.io
from lang_main.analysis import graphs
# target = '../results/test_20240529/Pipe-Token_Analysis_Step-1_build_token_graph.pkl'
# p = Path(target).resolve()
# ret = lang_main.io.load_pickle(p)
# tk_graph = cast(graphs.TokenGraph, ret[0])
# tk_graph_filtered = tk_graph.filter_by_edge_weight(150, None)
# tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1, None)
# cyto_data_base, weight_data, all_weights = graphs.convert_graph_to_cytoscape(tk_graph_filtered)
test = [1, 1, 1, 2, 2, 3, 3, 4, 4, 1000]
print(statistics.mean(test))

View File

@ -17,7 +17,7 @@ from lang_main.constants import (
EDGE_WEIGHT_DECIMALS, EDGE_WEIGHT_DECIMALS,
PROPERTY_NAME_DEGREE_WEIGHTED, PROPERTY_NAME_DEGREE_WEIGHTED,
) )
from lang_main.errors import EdgePropertyNotContainedError from lang_main.errors import EdgePropertyNotContainedError, EmptyEdgesError, EmptyGraphError
from lang_main.io import load_pickle, save_pickle from lang_main.io import load_pickle, save_pickle
from lang_main.loggers import logger_graphs as logger from lang_main.loggers import logger_graphs as logger
from lang_main.types import ( from lang_main.types import (
@ -381,9 +381,12 @@ def normalise_array_linear(
npt.NDArray[np.float32] npt.NDArray[np.float32]
min/max normalised array min/max normalised array
""" """
arr_norm = (array - array.min()) / (array.max() - array.min()) div = array.max() - array.min()
if div != 0:
arr_norm = (array - array.min()) / div
return arr_norm.astype(np.float32) return arr_norm.astype(np.float32)
else:
return np.zeros(shape=array.shape, dtype=np.float32)
def weight_scaling( def weight_scaling(
@ -459,6 +462,8 @@ def rescale_edge_weights(
weight_property: str = 'weight', weight_property: str = 'weight',
) -> Graph | DiGraph | TokenGraph: ) -> Graph | DiGraph | TokenGraph:
graph = graph.copy() graph = graph.copy()
# check non-emptiness
verify_non_empty_graph(graph, including_edges=True)
# check if all edges contain weight property # check if all edges contain weight property
verify_property(graph, property=weight_property) verify_property(graph, property=weight_property)
@ -473,6 +478,33 @@ def rescale_edge_weights(
return graph return graph
def verify_non_empty_graph(
graph: DiGraph | Graph,
including_edges: bool = True,
) -> None:
"""check if the given graph is empty, presence of nodes is checked first,
then of edges
Parameters
----------
graph : DiGraph | Graph
graph to check for emptiness
including_edges : bool, optional
whether to check for non-existence of edges, by default True
Raises
------
EmptyGraphError
if graph does not contain any nodes and therefore edges
EmptyEdgesError
if graph does not contain any edges
"""
if not tuple(graph.nodes):
raise EmptyGraphError(f'Graph object >>{graph}<< does not contain any nodes.')
elif including_edges and not tuple(graph.edges):
raise EmptyEdgesError(f'Graph object >>{graph}<< does not contain any edges.')
# ** --------------------------------------- # ** ---------------------------------------
class TokenGraph(DiGraph): class TokenGraph(DiGraph):
def __init__( def __init__(

View File

@ -1,128 +1,128 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<vizmap id="VizMap-2024_07_12-08_08" documentVersion="3.1"> <vizmap id="VizMap-2024_07_24-08_54" documentVersion="3.1">
<visualStyle name="lang_main"> <visualStyle name="lang_main">
<network> <network>
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/> <visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
<visualProperty default="" name="NETWORK_TITLE"/>
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/> <visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
<visualProperty default="" name="NETWORK_TITLE"/>
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
</network> </network>
<node> <node>
<dependency value="true" name="nodeCustomGraphicsSizeSync"/> <dependency value="true" name="nodeCustomGraphicsSizeSync"/>
<dependency value="true" name="nodeSizeLocked"/> <dependency value="true" name="nodeSizeLocked"/>
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
<visualProperty default="40.0" name="NODE_HEIGHT"/>
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
<visualProperty default="false" name="NODE_SELECTED"/>
<visualProperty default="0.0" name="NODE_DEPTH"/>
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
<visualProperty default="" name="NODE_TOOLTIP"/>
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
<visualProperty default="18.0" name="NODE_SIZE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/> <visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/> <visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/> <visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
<visualProperty default="60.0" name="NODE_WIDTH"/> <visualProperty default="18.0" name="NODE_SIZE"/>
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
<visualProperty default="40.0" name="NODE_HEIGHT"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
<visualProperty default="" name="NODE_TOOLTIP"/>
<visualProperty default="false" name="NODE_SELECTED"/>
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
<visualProperty default="" name="NODE_LABEL"> <visualProperty default="" name="NODE_LABEL">
<passthroughMapping attributeName="name" attributeType="string"/> <passthroughMapping attributeName="name" attributeType="string"/>
</visualProperty> </visualProperty>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/> <visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/> <visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/> <visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/> <visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
<visualProperty default="true" name="NODE_VISIBLE"/>
<visualProperty default="255" name="NODE_TRANSPARENCY"/> <visualProperty default="255" name="NODE_TRANSPARENCY"/>
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/> <visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
<visualProperty default="60.0" name="NODE_WIDTH"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
<visualProperty default="true" name="NODE_VISIBLE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
<visualProperty default="5.0" name="NODE_BORDER_WIDTH"/>
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
<visualProperty default="0.0" name="NODE_DEPTH"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
</node> </node>
<edge> <edge>
<dependency value="true" name="arrowColorMatchesEdge"/> <dependency value="true" name="arrowColorMatchesEdge"/>
<visualProperty default="false" name="EDGE_SELECTED"/>
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
<visualProperty default="" name="EDGE_LABEL"/>
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
<visualProperty default="true" name="EDGE_VISIBLE"/>
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
<visualProperty default="" name="EDGE_TOOLTIP"/>
<visualProperty default="0.0" name="EDGE_Z_ORDER"/> <visualProperty default="0.0" name="EDGE_Z_ORDER"/>
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/> <visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/> <visualProperty default="" name="EDGE_TOOLTIP"/>
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/> <visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/> <visualProperty default="255" name="EDGE_TRANSPARENCY"/>
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/> <visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
<visualProperty default="true" name="EDGE_CURVED"/> <visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/> <visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/> <visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/> <visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/> <visualProperty default="false" name="EDGE_SELECTED"/>
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
<visualProperty default="3.0" name="EDGE_WIDTH"> <visualProperty default="3.0" name="EDGE_WIDTH">
<continuousMapping attributeName="weight" attributeType="float"> <continuousMapping attributeName="weight" attributeType="float">
<continuousMappingPoint attrValue="0.09520000219345093" equalValue="2.0" greaterValue="2.0" lesserValue="1.0"/> <continuousMappingPoint attrValue="0.09520000219345093" equalValue="2.0" greaterValue="2.0" lesserValue="1.0"/>
<continuousMappingPoint attrValue="1.0" equalValue="10.0" greaterValue="1.0" lesserValue="10.0"/> <continuousMappingPoint attrValue="1.0" equalValue="10.0" greaterValue="1.0" lesserValue="10.0"/>
</continuousMapping> </continuousMapping>
</visualProperty> </visualProperty>
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
<visualProperty default="" name="EDGE_LABEL"/>
<visualProperty default="true" name="EDGE_VISIBLE"/>
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
<visualProperty default="true" name="EDGE_CURVED"/>
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
</edge> </edge>
</visualStyle> </visualStyle>
</vizmap> </vizmap>

View File

@ -1,2 +1,12 @@
class EdgePropertyNotContainedError(Exception): class EdgePropertyNotContainedError(Exception):
"""Error raised if a needed edge property is not contained in graph edges""" """Error raised if a needed edge property is not contained in graph edges"""
class EmptyGraphError(Exception):
"""Error raised if an operation should be performed on the graph,
but it does not contain any nodes or edges"""
class EmptyEdgesError(EmptyGraphError):
"""Error raised if action should be performed on a graph's edges, but
it does not contain any"""

View File

@ -71,6 +71,15 @@ def encode_to_base64_str(
return b64_bytes.decode(encoding=encoding) return b64_bytes.decode(encoding=encoding)
def encode_file_to_base64_str(
path: Path,
encoding: str = 'utf-8',
) -> str:
with open(path, 'rb') as file:
b64_bytes = base64.b64encode(file.read())
return b64_bytes.decode(encoding=encoding)
def decode_from_base64_str( def decode_from_base64_str(
b64_str: str, b64_str: str,
encoding: str = 'utf-8', encoding: str = 'utf-8',
@ -83,8 +92,9 @@ def decode_from_base64_str(
def get_entry_point( def get_entry_point(
saving_path: Path, saving_path: Path,
filename: str, filename: str,
file_ext: str = '.pkl',
) -> Path: ) -> Path:
entry_point_path = (saving_path / filename).with_suffix('.pkl') entry_point_path = (saving_path / filename).with_suffix(file_ext)
if not entry_point_path.exists(): if not entry_point_path.exists():
raise FileNotFoundError( raise FileNotFoundError(
f'Could not find provided entry data under path: >>{entry_point_path}<<' f'Could not find provided entry data under path: >>{entry_point_path}<<'

View File

@ -1,3 +1,5 @@
from pathlib import Path
from lang_main.analysis import graphs from lang_main.analysis import graphs
from lang_main.analysis.preprocessing import ( from lang_main.analysis.preprocessing import (
analyse_feature, analyse_feature,
@ -168,37 +170,75 @@ def build_tk_graph_post_pipe() -> Pipeline:
return pipe_graph_postprocessing return pipe_graph_postprocessing
def build_tk_graph_rescaling_pipe() -> Pipeline: def build_tk_graph_rescaling_pipe(
save_result: bool,
exit_point: EntryPoints,
) -> Pipeline:
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER) pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
pipe_graph_rescaling.add( pipe_graph_rescaling.add(
graphs.pipe_rescale_graph_edge_weights, graphs.pipe_rescale_graph_edge_weights,
) )
pipe_graph_rescaling.add( pipe_graph_rescaling.add(
graphs.pipe_add_graph_metrics, graphs.pipe_add_graph_metrics,
save_result=True, save_result=save_result,
filename=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED, filename=exit_point,
# filename=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
) )
return pipe_graph_rescaling return pipe_graph_rescaling
# ** token analysis: rendering # ** token analysis: rendering
def build_tk_graph_rendering_pipe() -> Pipeline: def build_tk_graph_render_pipe(
with_subgraphs: bool,
export_folder: Path = SAVE_PATH_FOLDER,
base_network_name: str = CYTO_BASE_NETWORK_NAME,
) -> Pipeline:
pipe_graph_rendering = Pipeline( pipe_graph_rendering = Pipeline(
name='Graph_Static-Rendering', name='Graph_Static-Rendering',
working_dir=SAVE_PATH_FOLDER, working_dir=SAVE_PATH_FOLDER,
) )
pipe_graph_rendering.add(cyto.import_to_cytoscape) pipe_graph_rendering.add(
pipe_graph_rendering.add(cyto.layout_network) cyto.import_to_cytoscape,
pipe_graph_rendering.add(cyto.apply_style_to_network) {
'network_name': base_network_name,
},
)
pipe_graph_rendering.add(
cyto.layout_network,
{
'network_name': base_network_name,
},
)
pipe_graph_rendering.add(
cyto.apply_style_to_network,
{
'network_name': base_network_name,
},
)
pipe_graph_rendering.add( pipe_graph_rendering.add(
cyto.export_network_to_image, cyto.export_network_to_image,
{'filename': CYTO_BASE_NETWORK_NAME}, {
'filename': base_network_name,
'target_folder': export_folder,
'network_name': base_network_name,
},
)
if with_subgraphs:
pipe_graph_rendering.add(
cyto.get_subgraph_node_selection,
{
'network_name': base_network_name,
},
) )
pipe_graph_rendering.add(cyto.get_subgraph_node_selection)
pipe_graph_rendering.add( pipe_graph_rendering.add(
cyto.build_subnetworks, cyto.build_subnetworks,
{'export_image': True}, {
'export_image': True,
'target_folder': export_folder,
'network_name': base_network_name,
},
) )
return pipe_graph_rendering return pipe_graph_rendering

View File

@ -1,7 +1,7 @@
import time import time
from collections.abc import Iterable from collections.abc import Iterable
from pathlib import Path from pathlib import Path
from typing import cast from typing import Literal, cast
import py4cytoscape as p4c import py4cytoscape as p4c
from networkx import DiGraph, Graph from networkx import DiGraph, Graph
@ -55,6 +55,7 @@ def verify_connection():
def import_to_cytoscape( def import_to_cytoscape(
graph: DiGraph | Graph, graph: DiGraph | Graph,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -> None: ) -> None:
"""Cytoscape: import NetworkX graph as new network collection """Cytoscape: import NetworkX graph as new network collection
@ -65,15 +66,49 @@ def import_to_cytoscape(
""" """
logger.debug('Checking Cytoscape connection...') logger.debug('Checking Cytoscape connection...')
verify_connection() verify_connection()
logger.debug('Importing network to Cytoscape...') logger.debug('Importing to and analysing network in Cytoscape...')
p4c.delete_all_networks() p4c.delete_all_networks()
p4c.create_network_from_networkx( p4c.create_network_from_networkx(
graph, graph,
title=CYTO_BASE_NETWORK_NAME, title=network_name,
collection=CYTO_COLLECTION_NAME, collection=CYTO_COLLECTION_NAME,
) )
analyse_network(network_name=network_name)
logger.debug('Import and analysis of network to Cytoscape successful.')
def verify_table_property(
property: str,
table_type: Literal['node', 'edge', 'network'] = 'node',
network_name: str = CYTO_BASE_NETWORK_NAME,
) -> bool:
table = p4c.get_table_columns(table=table_type, network=network_name)
return property in table.columns
def analyse_network(
property_degree_weighted: str = PROPERTY_NAME_DEGREE_WEIGHTED,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -> None:
node_table = p4c.get_table_columns(table='node', network=network_name)
net_analyse_possible: bool = True
if len(node_table) < 4:
net_analyse_possible = False
if net_analyse_possible:
p4c.analyze_network(directed=False) p4c.analyze_network(directed=False)
logger.debug('Importing network to Cytoscape successful.') node_table = p4c.get_table_columns(table='node', network=network_name)
node_table['stress_norm'] = node_table['Stress'] / node_table['Stress'].max()
node_table[CYTO_SELECTION_PROPERTY] = (
node_table[property_degree_weighted]
* node_table['BetweennessCentrality']
* node_table['stress_norm']
)
else:
node_table[CYTO_SELECTION_PROPERTY] = 1
p4c.load_table_data(node_table, data_key_column='name', network=network_name)
def reset_current_network_to_base() -> None: def reset_current_network_to_base() -> None:
@ -83,6 +118,7 @@ def reset_current_network_to_base() -> None:
def export_network_to_image( def export_network_to_image(
filename: str, filename: str,
target_folder: Path = SAVE_PATH_FOLDER,
filetype: CytoExportFileTypes = 'SVG', filetype: CytoExportFileTypes = 'SVG',
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
pdf_export_page_size: CytoExportPageSizes = 'A4', pdf_export_page_size: CytoExportPageSizes = 'A4',
@ -102,7 +138,6 @@ def export_network_to_image(
by default 'A4' by default 'A4'
""" """
logger.debug('Exporting image to file...') logger.debug('Exporting image to file...')
target_folder = SAVE_PATH_FOLDER
if not target_folder.exists(): if not target_folder.exists():
target_folder.mkdir(parents=True) target_folder.mkdir(parents=True)
file_pth = target_folder / filename file_pth = target_folder / filename
@ -138,7 +173,8 @@ def layout_network(
necessarily match the name in the Cytoscape UI), necessarily match the name in the Cytoscape UI),
by default CYTO_LAYOUT_NAME by default CYTO_LAYOUT_NAME
layout_properties : CytoLayoutProperties, optional layout_properties : CytoLayoutProperties, optional
configuration of parameters for the given layout algorithm, by default CYTO_LAYOUT_PROPERTIES configuration of parameters for the given layout algorithm,
by default CYTO_LAYOUT_PROPERTIES
network_name : str, optional network_name : str, optional
network to apply the layout algorithm on, by default CYTO_BASE_NETWORK_NAME network to apply the layout algorithm on, by default CYTO_BASE_NETWORK_NAME
""" """
@ -153,6 +189,9 @@ def apply_style_to_network(
style_name: str = CYTO_STYLESHEET_NAME, style_name: str = CYTO_STYLESHEET_NAME,
pth_to_stylesheet: Path = CYTO_PATH_STYLESHEET, pth_to_stylesheet: Path = CYTO_PATH_STYLESHEET,
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
node_size_property: str = 'node_selection',
min_node_size: int = 15,
max_node_size: int = 40,
) -> None: ) -> None:
"""Cytoscape: apply a chosen Cytoscape style to the defined network """Cytoscape: apply a chosen Cytoscape style to the defined network
@ -185,14 +224,36 @@ def apply_style_to_network(
p4c.import_visual_styles(str(pth_to_stylesheet)) p4c.import_visual_styles(str(pth_to_stylesheet))
p4c.set_visual_style(style_name, network=network_name) p4c.set_visual_style(style_name, network=network_name)
time.sleep(1) # if not waited image export could be without applied style # node size mapping, only if needed property is available
# TODO check removal
# size_prop_available = verify_table_property(
# property=node_size_property,
# network_name=network_name,
# )
# if size_prop_available:
scheme = p4c.scheme_c_number_continuous(
start_value=min_node_size, end_value=max_node_size
)
node_size_map = p4c.gen_node_size_map(
node_size_property,
number_scheme=scheme,
mapping_type='c',
style_name='lang_main',
default_number=min_node_size,
)
p4c.set_node_size_mapping(**node_size_map)
# else:
# node_table = p4c.get_table_columns(table='node', network=network_name)
# nodes_SUID = node_table['SUID'].to_list()
# p4c.set_node_size_bypass(nodes_SUID, new_sizes=min_node_size, network=network_name)
# p4c.set_visual_style(style_name, network=network_name)
# time.sleep(1) # if not waited image export could be without applied style
p4c.fit_content(selected_only=False, network=network_name) p4c.fit_content(selected_only=False, network=network_name)
logger.debug('Style application to network successful.') logger.debug('Style application to network successful.')
def get_subgraph_node_selection( def get_subgraph_node_selection(
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
property_degree_weighted: str = PROPERTY_NAME_DEGREE_WEIGHTED,
num_subgraphs: int = CYTO_NUMBER_SUBGRAPHS, num_subgraphs: int = CYTO_NUMBER_SUBGRAPHS,
) -> list[CytoNodeID]: ) -> list[CytoNodeID]:
"""Cytoscape: obtain the relevant nodes for iterative subgraph generation """Cytoscape: obtain the relevant nodes for iterative subgraph generation
@ -214,14 +275,9 @@ def get_subgraph_node_selection(
list containing all relevant Cytoscape nodes list containing all relevant Cytoscape nodes
""" """
logger.debug('Selecting nodes for subgraph generation...') logger.debug('Selecting nodes for subgraph generation...')
node_table = p4c.get_table_columns(network=network_name) node_table = p4c.get_table_columns(table='node', network=network_name)
node_table['stress_norm'] = node_table['Stress'] / node_table['Stress'].max()
node_table[CYTO_SELECTION_PROPERTY] = (
node_table[property_degree_weighted]
* node_table['BetweennessCentrality']
* node_table['stress_norm']
)
node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False) node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False)
p4c.load_table_data(node_table, data_key_column='name', network=network_name)
node_table_choice = node_table.iloc[:num_subgraphs, :] node_table_choice = node_table.iloc[:num_subgraphs, :]
logger.debug('Selection of nodes for subgraph generation successful.') logger.debug('Selection of nodes for subgraph generation successful.')
@ -264,6 +320,7 @@ def make_subnetwork(
index: int, index: int,
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
export_image: bool = True, export_image: bool = True,
target_folder: Path = SAVE_PATH_FOLDER,
) -> None: ) -> None:
"""Cytoscape: generate a new subnetwork based on the currently """Cytoscape: generate a new subnetwork based on the currently
selected nodes and edges selected nodes and edges
@ -289,7 +346,11 @@ def make_subnetwork(
p4c.fit_content(selected_only=False, network=subnetwork_name) p4c.fit_content(selected_only=False, network=subnetwork_name)
if export_image: if export_image:
time.sleep(1) time.sleep(1)
export_network_to_image(filename=subnetwork_name, network_name=subnetwork_name) export_network_to_image(
filename=subnetwork_name,
target_folder=target_folder,
network_name=subnetwork_name,
)
logger.debug('Generation of subnetwork with index %d successful.', index) logger.debug('Generation of subnetwork with index %d successful.', index)
@ -298,6 +359,7 @@ def build_subnetworks(
nodes_to_analyse: Iterable[CytoNodeID], nodes_to_analyse: Iterable[CytoNodeID],
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
export_image: bool = True, export_image: bool = True,
target_folder: Path = SAVE_PATH_FOLDER,
) -> None: ) -> None:
"""Cytoscape: iteratively build subnetworks from a collection of nodes """Cytoscape: iteratively build subnetworks from a collection of nodes
and their respective neighbouring nodes and their respective neighbouring nodes
@ -316,5 +378,10 @@ def build_subnetworks(
logger.debug('Generating all subnetworks for node selection...') logger.debug('Generating all subnetworks for node selection...')
for idx, node in enumerate(nodes_to_analyse): for idx, node in enumerate(nodes_to_analyse):
select_neighbours_of_node(node=node, network_name=network_name) select_neighbours_of_node(node=node, network_name=network_name)
make_subnetwork(index=idx, network_name=network_name, export_image=export_image) make_subnetwork(
index=idx,
network_name=network_name,
export_image=export_image,
target_folder=target_folder,
)
logger.debug('Generation of all subnetworks for node selection successful.') logger.debug('Generation of all subnetworks for node selection successful.')

View File

@ -33,6 +33,7 @@ ResultHandling: TypeAlias = list[tuple[bool, str | None]]
class EntryPoints(enum.StrEnum): class EntryPoints(enum.StrEnum):
TIMELINE = 'TIMELINE' TIMELINE = 'TIMELINE'
TIMELINE_POST = 'TIMELINE_POSTPROCESSING' TIMELINE_POST = 'TIMELINE_POSTPROCESSING'
TIMELINE_TK_GRAPH_RESCALED = 'TIMELINE_TK_GRAPH_RESCALED'
TK_GRAPH_POST = 'TK-GRAPH_POSTPROCESSING' TK_GRAPH_POST = 'TK-GRAPH_POSTPROCESSING'
TK_GRAPH_ANALYSIS = 'TK-GRAPH_ANALYSIS' TK_GRAPH_ANALYSIS = 'TK-GRAPH_ANALYSIS'
TK_GRAPH_ANALYSIS_RESCALED = 'TK-GRAPH_ANALYSIS_RESCALED' TK_GRAPH_ANALYSIS_RESCALED = 'TK-GRAPH_ANALYSIS_RESCALED'

View File

@ -2,7 +2,7 @@
[paths] [paths]
inputs = './inputs/' inputs = './inputs/'
results = './results/test_20240619/' results = '../scripts/results/test_20240619/'
dataset = '../data/02_202307/Export4.csv' dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/' #results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv' #dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'

File diff suppressed because one or more lines are too long