Cytoscape Docker image, profiling

This commit is contained in:
Florian Förster
2024-09-04 18:02:12 +02:00
parent 9328c0218a
commit e85334ab06
7 changed files with 170 additions and 49 deletions

View File

@@ -1,5 +1,8 @@
import cProfile
import pstats
import typing
from typing import cast
from pathlib import Path
from typing import Final, cast
from pandas import DataFrame
@@ -38,6 +41,11 @@ from lang_main.types import (
TimelineCandidates,
)
# ** profiling
ONLY_PROFILING_REPORT: Final[bool] = True
USE_PROFILING: Final[bool] = True
PROFILE_REPORT_NAME: Final[str] = 'prof_report.profdata'
# ** build pipelines
pipe_target_feat = build_base_target_feature_pipe()
pipe_merge = build_merge_duplicates_pipe()
@@ -168,4 +176,18 @@ def main() -> None:
if __name__ == '__main__':
main()
report_path = Path.cwd() / 'profiling'
if not report_path.exists():
report_path.mkdir(parents=True, exist_ok=True)
report_file = report_path / PROFILE_REPORT_NAME
if ONLY_PROFILING_REPORT:
p_stats = pstats.Stats(str(report_file))
p_stats.sort_stats(pstats.SortKey.CUMULATIVE).print_stats(60)
p_stats.sort_stats('tottime').print_stats(60)
elif USE_PROFILING:
cProfile.run('main()', str(report_file))
p_stats = pstats.Stats(str(report_file))
p_stats.sort_stats(pstats.SortKey.CUMULATIVE).print_stats(30)
p_stats.sort_stats('tottime').print_stats(30)
else:
main()

View File

@@ -2,10 +2,10 @@
[paths]
inputs = './inputs/'
results = './results/dummy_N_1000/'
dataset = '../data/Dummy_Dataset_N_1000.csv'
# results = './results/test_20240807/'
# dataset = '../data/02_202307/Export4.csv'
# results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './results/test_20240807/'
dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
@@ -36,7 +36,7 @@ threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 1
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4

Binary file not shown.

Binary file not shown.