graph and subgraph generation
This commit is contained in:
@@ -3,12 +3,17 @@ from typing import cast
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main.analysis.graphs import Graph, TokenGraph, save_to_GraphML
|
||||
from lang_main.analysis.graphs import (
|
||||
Graph,
|
||||
TokenGraph,
|
||||
save_to_GraphML,
|
||||
)
|
||||
from lang_main.constants import (
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
SKIP_GRAPH_POSTPROCESSING,
|
||||
SKIP_GRAPH_RESCALING,
|
||||
SKIP_GRAPH_STATIC_RENDERING,
|
||||
SKIP_PREPROCESSING,
|
||||
SKIP_TIME_ANALYSIS,
|
||||
SKIP_TOKEN_ANALYSIS,
|
||||
@@ -21,7 +26,8 @@ from lang_main.pipelines.predefined import (
|
||||
build_timeline_pipe,
|
||||
build_tk_graph_pipe,
|
||||
build_tk_graph_post_pipe,
|
||||
build_tk_graph_rescaling,
|
||||
build_tk_graph_rendering_pipe,
|
||||
build_tk_graph_rescaling_pipe,
|
||||
)
|
||||
from lang_main.types import (
|
||||
EntryPoints,
|
||||
@@ -36,7 +42,8 @@ pipe_target_feat = build_base_target_feature_pipe()
|
||||
pipe_merge = build_merge_duplicates_pipe()
|
||||
pipe_token_analysis = build_tk_graph_pipe()
|
||||
pipe_graph_postprocessing = build_tk_graph_post_pipe()
|
||||
pipe_graph_rescaling = build_tk_graph_rescaling()
|
||||
pipe_graph_rescaling = build_tk_graph_rescaling_pipe()
|
||||
pipe_static_graph_rendering = build_tk_graph_rendering_pipe()
|
||||
pipe_timeline = build_timeline_pipe()
|
||||
|
||||
|
||||
@@ -61,7 +68,7 @@ def run_token_analysis() -> None:
|
||||
loaded_results = cast(tuple[DataFrame], load_pickle(entry_point_path))
|
||||
preprocessed_data = loaded_results[0]
|
||||
# build token graph
|
||||
(tk_graph, docs_mapping) = typing.cast(
|
||||
(tk_graph, _) = typing.cast(
|
||||
tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None],
|
||||
pipe_token_analysis.run(starting_values=(preprocessed_data,)),
|
||||
)
|
||||
@@ -94,14 +101,33 @@ def run_graph_edge_rescaling() -> None:
|
||||
ret = cast(
|
||||
tuple[TokenGraph, Graph], pipe_graph_rescaling.run(starting_values=(tk_graph,))
|
||||
)
|
||||
undirected_rescaled_graph = ret[1]
|
||||
tk_graph_rescaled = ret[0]
|
||||
tk_graph_rescaled_undirected = ret[1]
|
||||
tk_graph_rescaled.to_GraphML(
|
||||
SAVE_PATH_FOLDER, filename='TokenGraph-directed-rescaled', directed=False
|
||||
)
|
||||
save_to_GraphML(
|
||||
undirected_rescaled_graph,
|
||||
tk_graph_rescaled_undirected,
|
||||
saving_path=SAVE_PATH_FOLDER,
|
||||
filename='TokenGraph-undirected-rescaled',
|
||||
)
|
||||
|
||||
|
||||
def run_static_graph_rendering() -> None:
|
||||
entry_point_path = get_entry_point(
|
||||
SAVE_PATH_FOLDER,
|
||||
EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
|
||||
)
|
||||
loaded_results = cast(
|
||||
tuple[TokenGraph, Graph],
|
||||
load_pickle(entry_point_path),
|
||||
)
|
||||
tk_graph_rescaled = loaded_results[0]
|
||||
tk_graph_rescaled_undirected = loaded_results[1]
|
||||
|
||||
_ = pipe_static_graph_rendering.run(starting_values=(tk_graph_rescaled_undirected,))
|
||||
|
||||
|
||||
# ** time analysis
|
||||
def run_time_analysis() -> None:
|
||||
# load entry point
|
||||
@@ -123,6 +149,7 @@ def build_pipeline_container() -> PipelineContainer:
|
||||
container.add(run_token_analysis, skip=SKIP_TOKEN_ANALYSIS)
|
||||
container.add(run_graph_postprocessing, skip=SKIP_GRAPH_POSTPROCESSING)
|
||||
container.add(run_graph_edge_rescaling, skip=SKIP_GRAPH_RESCALING)
|
||||
container.add(run_static_graph_rendering, skip=SKIP_GRAPH_STATIC_RENDERING)
|
||||
container.add(run_time_analysis, skip=SKIP_TIME_ANALYSIS)
|
||||
|
||||
return container
|
||||
|
||||
@@ -13,9 +13,10 @@ dataset = '../data/02_202307/Export4.csv'
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
token_analysis_skip = true
|
||||
graph_postprocessing_skip = true
|
||||
graph_rescaling_skip = true
|
||||
graph_static_rendering_skip = false
|
||||
time_analysis_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
|
||||
Reference in New Issue
Block a user