more robust graph filtering

This commit is contained in:
Florian Förster 2024-09-12 15:19:24 +02:00
parent e85334ab06
commit 27d40d5c99
13 changed files with 1332 additions and 133 deletions

View File

@ -2,12 +2,10 @@
[paths] [paths]
inputs = './inputs/' inputs = './inputs/'
results = '../scripts/results/test_20240619/' # results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './results/'
dataset = '../data/02_202307/Export4.csv' dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always # only debugging features, production-ready pipelines should always
# be fully executed # be fully executed
@ -19,21 +17,18 @@ graph_rescaling_skip = false
graph_static_rendering_skip = false graph_static_rendering_skip = false
time_analysis_skip = true time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess] [preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [ date_cols = [
"VorgangsDatum", "VorgangsDatum",
"ErledigungsDatum", "ErledigungsDatum",
"Arbeitsbeginn", "Arbeitsbeginn",
"ErstellungsDatum", "ErstellungsDatum",
] ]
threshold_amount_characters = 5 threshold_amount_characters = 5
threshold_similarity = 0.8 threshold_similarity = 0.8
[graph_postprocessing] [graph_postprocessing]
threshold_edge_number = 300
threshold_edge_weight = 150 threshold_edge_weight = 150
[time_analysis.uniqueness] [time_analysis.uniqueness]
@ -41,6 +36,10 @@ threshold_unique_texts = 4
criterion_feature = 'HObjektText' criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID' feature_name_obj_id = 'ObjektID'
[time_analysis.preparation]
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input] [time_analysis.model_input]
# input_features = [ # input_features = [
# 'VorgangsTypName', # 'VorgangsTypName',

File diff suppressed because one or more lines are too long

237
pdm.lock generated
View File

@ -5,7 +5,7 @@
groups = ["default", "dev", "notebooks", "trails", "trials"] groups = ["default", "dev", "notebooks", "trails", "trials"]
strategy = ["inherit_metadata"] strategy = ["inherit_metadata"]
lock_version = "4.5.0" lock_version = "4.5.0"
content_hash = "sha256:468a23f2e765abd2cf8760a33a219a4e475f1ebc73630f792eddf6563293720a" content_hash = "sha256:09aa90447c8cad5f9c18eeedf7b383574dcf5143110ed90033a9652757538544"
[[metadata.targets]] [[metadata.targets]]
requires_python = ">=3.11" requires_python = ">=3.11"
@ -453,6 +453,68 @@ files = [
{file = "confection-0.1.4.tar.gz", hash = "sha256:e80f22fd008b5231a2e8852fac6de9e28f2276a04031d0536cff74fe4a990c8f"}, {file = "confection-0.1.4.tar.gz", hash = "sha256:e80f22fd008b5231a2e8852fac6de9e28f2276a04031d0536cff74fe4a990c8f"},
] ]
[[package]]
name = "contourpy"
version = "1.3.0"
requires_python = ">=3.9"
summary = "Python library for calculating contours of 2D quadrilateral grids"
groups = ["dev"]
dependencies = [
"numpy>=1.23",
]
files = [
{file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"},
{file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"},
{file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"},
{file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"},
{file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"},
{file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"},
{file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"},
{file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"},
{file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"},
{file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"},
{file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"},
{file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"},
{file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"},
{file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"},
{file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"},
{file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"},
{file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"},
{file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"},
{file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"},
{file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"},
{file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"},
{file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"},
{file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"},
{file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"},
{file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"},
{file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"},
{file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"},
{file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"},
{file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"},
{file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"},
{file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"},
{file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"},
{file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"},
{file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"},
{file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"},
{file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"},
{file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"},
{file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"},
{file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"},
]
[[package]]
name = "cycler"
version = "0.12.1"
requires_python = ">=3.8"
summary = "Composable style cycles"
groups = ["dev"]
files = [
{file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"},
{file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"},
]
[[package]] [[package]]
name = "cymem" name = "cymem"
version = "2.0.8" version = "2.0.8"
@ -670,6 +732,33 @@ files = [
{file = "flask-3.0.3.tar.gz", hash = "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842"}, {file = "flask-3.0.3.tar.gz", hash = "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842"},
] ]
[[package]]
name = "fonttools"
version = "4.53.1"
requires_python = ">=3.8"
summary = "Tools to manipulate font files"
groups = ["dev"]
files = [
{file = "fonttools-4.53.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:da33440b1413bad53a8674393c5d29ce64d8c1a15ef8a77c642ffd900d07bfe1"},
{file = "fonttools-4.53.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ff7e5e9bad94e3a70c5cd2fa27f20b9bb9385e10cddab567b85ce5d306ea923"},
{file = "fonttools-4.53.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6e7170d675d12eac12ad1a981d90f118c06cf680b42a2d74c6c931e54b50719"},
{file = "fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee32ea8765e859670c4447b0817514ca79054463b6b79784b08a8df3a4d78e3"},
{file = "fonttools-4.53.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6e08f572625a1ee682115223eabebc4c6a2035a6917eac6f60350aba297ccadb"},
{file = "fonttools-4.53.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b21952c092ffd827504de7e66b62aba26fdb5f9d1e435c52477e6486e9d128b2"},
{file = "fonttools-4.53.1-cp311-cp311-win32.whl", hash = "sha256:9dfdae43b7996af46ff9da520998a32b105c7f098aeea06b2226b30e74fbba88"},
{file = "fonttools-4.53.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4d0096cb1ac7a77b3b41cd78c9b6bc4a400550e21dc7a92f2b5ab53ed74eb02"},
{file = "fonttools-4.53.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d92d3c2a1b39631a6131c2fa25b5406855f97969b068e7e08413325bc0afba58"},
{file = "fonttools-4.53.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3b3c8ebafbee8d9002bd8f1195d09ed2bd9ff134ddec37ee8f6a6375e6a4f0e8"},
{file = "fonttools-4.53.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f029c095ad66c425b0ee85553d0dc326d45d7059dbc227330fc29b43e8ba60"},
{file = "fonttools-4.53.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f5e6c3510b79ea27bb1ebfcc67048cde9ec67afa87c7dd7efa5c700491ac7f"},
{file = "fonttools-4.53.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f677ce218976496a587ab17140da141557beb91d2a5c1a14212c994093f2eae2"},
{file = "fonttools-4.53.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9e6ceba2a01b448e36754983d376064730690401da1dd104ddb543519470a15f"},
{file = "fonttools-4.53.1-cp312-cp312-win32.whl", hash = "sha256:791b31ebbc05197d7aa096bbc7bd76d591f05905d2fd908bf103af4488e60670"},
{file = "fonttools-4.53.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ed170b5e17da0264b9f6fae86073be3db15fa1bd74061c8331022bca6d09bab"},
{file = "fonttools-4.53.1-py3-none-any.whl", hash = "sha256:f1f8758a2ad110bd6432203a344269f445a2907dc24ef6bccfd0ac4e14e0d71d"},
{file = "fonttools-4.53.1.tar.gz", hash = "sha256:e128778a8e9bc11159ce5447f76766cefbd876f44bd79aff030287254e4752c4"},
]
[[package]] [[package]]
name = "fqdn" name = "fqdn"
version = "1.5.1" version = "1.5.1"
@ -1260,6 +1349,64 @@ files = [
{file = "kaleido-0.2.1-py2.py3-none-win_amd64.whl", hash = "sha256:4670985f28913c2d063c5734d125ecc28e40810141bdb0a46f15b76c1d45f23c"}, {file = "kaleido-0.2.1-py2.py3-none-win_amd64.whl", hash = "sha256:4670985f28913c2d063c5734d125ecc28e40810141bdb0a46f15b76c1d45f23c"},
] ]
[[package]]
name = "kiwisolver"
version = "1.4.7"
requires_python = ">=3.8"
summary = "A fast implementation of the Cassowary constraint solver"
groups = ["dev"]
files = [
{file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"},
{file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"},
{file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"},
{file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"},
{file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"},
{file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"},
{file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"},
{file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"},
{file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"},
{file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"},
{file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"},
{file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"},
{file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"},
{file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"},
{file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"},
{file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"},
{file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"},
{file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"},
{file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"},
{file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"},
{file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"},
{file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"},
{file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"},
{file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"},
{file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"},
{file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"},
{file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"},
{file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"},
{file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"},
{file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"},
{file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"},
{file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"},
{file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"},
{file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"},
{file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"},
{file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"},
{file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"},
{file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"},
{file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"},
{file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"},
{file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"},
{file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"},
{file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"},
{file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"},
{file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"},
{file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"},
{file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"},
{file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"},
{file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"},
]
[[package]] [[package]]
name = "langcodes" name = "langcodes"
version = "3.4.0" version = "3.4.0"
@ -1375,6 +1522,51 @@ files = [
{file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
] ]
[[package]]
name = "matplotlib"
version = "3.9.2"
requires_python = ">=3.9"
summary = "Python plotting package"
groups = ["dev"]
dependencies = [
"contourpy>=1.0.1",
"cycler>=0.10",
"fonttools>=4.22.0",
"importlib-resources>=3.2.0; python_version < \"3.10\"",
"kiwisolver>=1.3.1",
"numpy>=1.23",
"packaging>=20.0",
"pillow>=8",
"pyparsing>=2.3.1",
"python-dateutil>=2.7",
]
files = [
{file = "matplotlib-3.9.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772"},
{file = "matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41"},
{file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f"},
{file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447"},
{file = "matplotlib-3.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e"},
{file = "matplotlib-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7"},
{file = "matplotlib-3.9.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9"},
{file = "matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d"},
{file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7"},
{file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c"},
{file = "matplotlib-3.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e"},
{file = "matplotlib-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3"},
{file = "matplotlib-3.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9"},
{file = "matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa"},
{file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b"},
{file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413"},
{file = "matplotlib-3.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b"},
{file = "matplotlib-3.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49"},
{file = "matplotlib-3.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03"},
{file = "matplotlib-3.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30"},
{file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51"},
{file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c"},
{file = "matplotlib-3.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e"},
{file = "matplotlib-3.9.2.tar.gz", hash = "sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92"},
]
[[package]] [[package]]
name = "matplotlib-inline" name = "matplotlib-inline"
version = "0.1.7" version = "0.1.7"
@ -1552,7 +1744,7 @@ name = "numpy"
version = "1.26.4" version = "1.26.4"
requires_python = ">=3.9" requires_python = ">=3.9"
summary = "Fundamental package for array computing in Python" summary = "Fundamental package for array computing in Python"
groups = ["default"] groups = ["default", "dev"]
files = [ files = [
{file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
{file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
@ -1763,7 +1955,7 @@ name = "packaging"
version = "24.0" version = "24.0"
requires_python = ">=3.7" requires_python = ">=3.7"
summary = "Core utilities for Python packages" summary = "Core utilities for Python packages"
groups = ["default", "notebooks"] groups = ["default", "dev", "notebooks"]
files = [ files = [
{file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
@ -1774,7 +1966,7 @@ name = "pandas"
version = "2.2.2" version = "2.2.2"
requires_python = ">=3.9" requires_python = ">=3.9"
summary = "Powerful data structures for data analysis, time series, and statistics" summary = "Powerful data structures for data analysis, time series, and statistics"
groups = ["default"] groups = ["default", "dev"]
dependencies = [ dependencies = [
"numpy>=1.22.4; python_version < \"3.11\"", "numpy>=1.22.4; python_version < \"3.11\"",
"numpy>=1.23.2; python_version == \"3.11\"", "numpy>=1.23.2; python_version == \"3.11\"",
@ -1842,7 +2034,7 @@ name = "pillow"
version = "10.3.0" version = "10.3.0"
requires_python = ">=3.8" requires_python = ">=3.8"
summary = "Python Imaging Library (Fork)" summary = "Python Imaging Library (Fork)"
groups = ["default"] groups = ["default", "dev"]
files = [ files = [
{file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"}, {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"},
{file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"}, {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"},
@ -2121,12 +2313,23 @@ files = [
{file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
] ]
[[package]]
name = "pyparsing"
version = "3.1.4"
requires_python = ">=3.6.8"
summary = "pyparsing module - Classes and methods to define and execute parsing grammars"
groups = ["dev"]
files = [
{file = "pyparsing-3.1.4-py3-none-any.whl", hash = "sha256:a6a7ee4235a3f944aa1fa2249307708f893fe5717dc603503c6c7969c070fb7c"},
{file = "pyparsing-3.1.4.tar.gz", hash = "sha256:f86ec8d1a83f11977c9a6ea7598e8c27fc5cddfa5b07ea2241edbbde1d7bc032"},
]
[[package]] [[package]]
name = "python-dateutil" name = "python-dateutil"
version = "2.9.0.post0" version = "2.9.0.post0"
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
summary = "Extensions to the standard Python datetime module" summary = "Extensions to the standard Python datetime module"
groups = ["default", "notebooks"] groups = ["default", "dev", "notebooks"]
dependencies = [ dependencies = [
"six>=1.5", "six>=1.5",
] ]
@ -2150,7 +2353,7 @@ files = [
name = "pytz" name = "pytz"
version = "2024.1" version = "2024.1"
summary = "World timezone definitions, modern and historical" summary = "World timezone definitions, modern and historical"
groups = ["default"] groups = ["default", "dev"]
files = [ files = [
{file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
{file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
@ -2558,6 +2761,22 @@ files = [
{file = "scipy-1.13.0.tar.gz", hash = "sha256:58569af537ea29d3f78e5abd18398459f195546bb3be23d16677fb26616cc11e"}, {file = "scipy-1.13.0.tar.gz", hash = "sha256:58569af537ea29d3f78e5abd18398459f195546bb3be23d16677fb26616cc11e"},
] ]
[[package]]
name = "seaborn"
version = "0.13.2"
requires_python = ">=3.8"
summary = "Statistical data visualization"
groups = ["dev"]
dependencies = [
"matplotlib!=3.6.1,>=3.4",
"numpy!=1.24.0,>=1.20",
"pandas>=1.2",
]
files = [
{file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"},
{file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"},
]
[[package]] [[package]]
name = "send2trash" name = "send2trash"
version = "1.8.3" version = "1.8.3"
@ -2606,7 +2825,7 @@ name = "six"
version = "1.16.0" version = "1.16.0"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
summary = "Python 2 and 3 compatibility utilities" summary = "Python 2 and 3 compatibility utilities"
groups = ["default", "notebooks"] groups = ["default", "dev", "notebooks"]
files = [ files = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
@ -3199,7 +3418,7 @@ name = "tzdata"
version = "2024.1" version = "2024.1"
requires_python = ">=2" requires_python = ">=2"
summary = "Provider of IANA time zone data" summary = "Provider of IANA time zone data"
groups = ["default"] groups = ["default", "dev"]
files = [ files = [
{file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
{file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},

View File

@ -46,6 +46,7 @@ trials = [
dev = [ dev = [
"cython>=3.0.10", "cython>=3.0.10",
"openpyxl>=3.1.5", "openpyxl>=3.1.5",
"seaborn>=0.13.2",
] ]
[tool.ruff] [tool.ruff]

View File

@ -42,8 +42,8 @@ from lang_main.types import (
) )
# ** profiling # ** profiling
ONLY_PROFILING_REPORT: Final[bool] = True USE_PROFILING: Final[bool] = False
USE_PROFILING: Final[bool] = True ONLY_PROFILING_REPORT: Final[bool] = False
PROFILE_REPORT_NAME: Final[str] = 'prof_report.profdata' PROFILE_REPORT_NAME: Final[str] = 'prof_report.profdata'
# ** build pipelines # ** build pipelines

View File

@ -6,43 +6,40 @@ inputs = './inputs/'
# dataset = '../data/Dummy_Dataset_N_1000.csv' # dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './results/test_20240807/' results = './results/test_20240807/'
dataset = '../data/02_202307/Export4.csv' dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always # only debugging features, production-ready pipelines should always
# be fully executed # be fully executed
[control] [control]
preprocessing_skip = false preprocessing_skip = true
token_analysis_skip = false token_analysis_skip = true
graph_postprocessing_skip = false graph_postprocessing_skip = false
graph_rescaling_skip = false graph_rescaling_skip = false
graph_static_rendering_skip = false graph_static_rendering_skip = false
time_analysis_skip = false time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess] [preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [ date_cols = [
"VorgangsDatum", "VorgangsDatum",
"ErledigungsDatum", "ErledigungsDatum",
"Arbeitsbeginn", "Arbeitsbeginn",
"ErstellungsDatum", "ErstellungsDatum",
] ]
threshold_amount_characters = 5 threshold_amount_characters = 5
threshold_similarity = 0.8 threshold_similarity = 0.8
[graph_postprocessing] [graph_postprocessing]
threshold_edge_weight = 150 threshold_edge_number = 330
# threshold_edge_weight = 150
[time_analysis.uniqueness] [time_analysis.uniqueness]
threshold_unique_texts = 4 threshold_unique_texts = 4
criterion_feature = 'HObjektText' criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID' feature_name_obj_id = 'ObjektID'
[time_analysis.preparation]
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input] [time_analysis.model_input]
# input_features = [ # input_features = [
# 'VorgangsTypName', # 'VorgangsTypName',

View File

@ -5,7 +5,7 @@ import sys
import typing import typing
from collections.abc import Hashable, Iterable from collections.abc import Hashable, Iterable
from pathlib import Path from pathlib import Path
from typing import Any, Final, Literal, Self, cast, overload from typing import Any, Literal, Self, cast, overload
import networkx as nx import networkx as nx
import numpy as np import numpy as np
@ -15,9 +15,14 @@ from pandas import DataFrame
from lang_main.constants import ( from lang_main.constants import (
EDGE_WEIGHT_DECIMALS, EDGE_WEIGHT_DECIMALS,
LOGGING_DEFAULT_GRAPHS,
PROPERTY_NAME_DEGREE_WEIGHTED, PROPERTY_NAME_DEGREE_WEIGHTED,
) )
from lang_main.errors import EdgePropertyNotContainedError, EmptyEdgesError, EmptyGraphError from lang_main.errors import (
EdgePropertyNotContainedError,
EmptyEdgesError,
EmptyGraphError,
)
from lang_main.io import load_pickle, save_pickle from lang_main.io import load_pickle, save_pickle
from lang_main.loggers import logger_graphs as logger from lang_main.loggers import logger_graphs as logger
from lang_main.types import ( from lang_main.types import (
@ -27,9 +32,6 @@ from lang_main.types import (
WeightData, WeightData,
) )
# TODO change logging behaviour, add logging to file
LOGGING_DEFAULT: Final[bool] = False
def save_to_GraphML( def save_to_GraphML(
graph: DiGraph | Graph, graph: DiGraph | Graph,
@ -45,7 +47,7 @@ def save_to_GraphML(
def get_graph_metadata( def get_graph_metadata(
graph: Graph | DiGraph, graph: Graph | DiGraph,
logging: bool = LOGGING_DEFAULT, logging: bool = LOGGING_DEFAULT_GRAPHS,
) -> dict[str, int]: ) -> dict[str, int]:
# info about graph # info about graph
graph_info: dict[str, int] = {} graph_info: dict[str, int] = {}
@ -121,7 +123,7 @@ def update_graph(
# build undirected adjacency matrix # build undirected adjacency matrix
def convert_graph_to_undirected( def convert_graph_to_undirected(
graph: DiGraph, graph: DiGraph,
logging: bool = LOGGING_DEFAULT, logging: bool = LOGGING_DEFAULT_GRAPHS,
cast_int: bool = False, cast_int: bool = False,
) -> Graph: ) -> Graph:
dtype = np.float32 dtype = np.float32
@ -282,6 +284,23 @@ def filter_graph_by_node_degree(
return filtered_graph return filtered_graph
def filter_graph_by_number_edges(
graph: TokenGraph,
limit: int,
property: str = 'weight',
descending: bool = True,
) -> TokenGraph:
graph = graph.copy()
# edges
original = set(graph.edges(data=property)) # type: ignore
original_sorted = sorted(original, key=lambda tup: tup[2], reverse=descending)
chosen = set(original_sorted[:limit])
edges_to_drop = original.difference(chosen)
graph.remove_edges_from(edges_to_drop)
return graph
def add_weighted_degree( def add_weighted_degree(
graph: DiGraph | Graph, graph: DiGraph | Graph,
edge_weight_property: str = 'weight', edge_weight_property: str = 'weight',

View File

@ -1,10 +1,6 @@
from pathlib import Path from pathlib import Path
from typing import Final from typing import Final
# TODO check removal
# import spacy
# from sentence_transformers import SentenceTransformer
# from spacy.language import Language as GermanSpacyModel
from lang_main import CONFIG, CYTO_PATH_STYLESHEET from lang_main import CONFIG, CYTO_PATH_STYLESHEET
from lang_main import model_loader as m_load from lang_main import model_loader as m_load
from lang_main.types import ( from lang_main.types import (
@ -20,19 +16,20 @@ __all__ = [
'CYTO_PATH_STYLESHEET', 'CYTO_PATH_STYLESHEET',
] ]
# ** logging
# graphs
LOGGING_DEFAULT_GRAPHS: Final[bool] = False
# ** paths # ** paths
input_path_conf = Path.cwd() / Path(CONFIG['paths']['inputs']) input_path_conf = Path.cwd() / Path(CONFIG['paths']['inputs'])
INPUT_PATH_FOLDER: Final[Path] = input_path_conf.resolve() INPUT_PATH_FOLDER: Final[Path] = input_path_conf.resolve()
# INPUT_PATH_FOLDER: Final[Path] = (CALLER_PATH / input_path_conf).resolve()
# TODO reactivate later # TODO reactivate later
# if not INPUT_PATH_FOLDER.exists(): # if not INPUT_PATH_FOLDER.exists():
# raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.') # raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.')
save_path_conf = Path.cwd() / Path(CONFIG['paths']['results']) save_path_conf = Path.cwd() / Path(CONFIG['paths']['results'])
SAVE_PATH_FOLDER: Final[Path] = save_path_conf.resolve() SAVE_PATH_FOLDER: Final[Path] = save_path_conf.resolve()
# SAVE_PATH_FOLDER: Final[Path] = (CALLER_PATH / save_path_conf).resolve()
path_dataset_conf = Path.cwd() / Path(CONFIG['paths']['dataset']) path_dataset_conf = Path.cwd() / Path(CONFIG['paths']['dataset'])
PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve() PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve()
# PATH_TO_DATASET: Final[Path] = (CALLER_PATH / path_dataset_conf).resolve()
# if not PATH_TO_DATASET.exists(): # if not PATH_TO_DATASET.exists():
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.') # raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
# ** control # ** control
@ -64,20 +61,9 @@ MODEL_LOADER_MAP: Final[ModelLoaderMap] = {
}, },
}, },
} }
# ** sentence_transformers
# STFR_MODEL: Final[SentenceTransformer] = SentenceTransformer(
# 'sentence-transformers/all-mpnet-base-v2', device=STFR_DEVICE
# )
# ** spacy
# SPCY_MODEL: Final[GermanSpacyModel] = spacy.load('de_dep_news_trf')
# ** export # ** export
# ** preprocessing # ** preprocessing
FILENAME_COSSIM_FILTER_CANDIDATES: Final[str] = CONFIG['preprocess'][
'filename_cossim_filter_candidates'
]
DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols'] DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols']
THRESHOLD_AMOUNT_CHARACTERS: Final[float] = CONFIG['preprocess'][ THRESHOLD_AMOUNT_CHARACTERS: Final[float] = CONFIG['preprocess'][
'threshold_amount_characters' 'threshold_amount_characters'
@ -87,10 +73,13 @@ THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity'
# ** graph postprocessing # ** graph postprocessing
EDGE_WEIGHT_DECIMALS: Final[int] = 4 EDGE_WEIGHT_DECIMALS: Final[int] = 4
THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight'] THRESHOLD_EDGE_NUMBER: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_number']
# THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight']
PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted' PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted'
# ** graph exports (Cytoscape) # ** graph exports (Cytoscape)
CYTO_MAX_NODE_COUNT: Final[int] = 500
CYTO_MAX_EDGE_COUNT: Final[int] = 800
CYTO_COLLECTION_NAME: Final[str] = 'lang_main' CYTO_COLLECTION_NAME: Final[str] = 'lang_main'
CYTO_BASE_NETWORK_NAME: Final[str] = 'token_graph' CYTO_BASE_NETWORK_NAME: Final[str] = 'token_graph'
CYTO_LAYOUT_NAME: Final[CytoLayouts] = 'force-directed' CYTO_LAYOUT_NAME: Final[CytoLayouts] = 'force-directed'
@ -119,9 +108,14 @@ UNIQUE_CRITERION_FEATURE: Final[str] = CONFIG['time_analysis']['uniqueness'][
] ]
FEATURE_NAME_OBJ_ID: Final[str] = CONFIG['time_analysis']['uniqueness']['feature_name_obj_id'] FEATURE_NAME_OBJ_ID: Final[str] = CONFIG['time_analysis']['uniqueness']['feature_name_obj_id']
# ** time_analysis.preparation # ** time_analysis.preparation
NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'delta_to_repair' # NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'delta_to_repair'
# NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'Zeitspanne bis zur Behebung [Tage]' CONFIG['time_analysis']['preparation']['name_delta_feat_to_repair']
NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = 'Zeitspanne bis zum nächsten Ereignis [Tage]' NAME_DELTA_FEAT_TO_REPAIR: Final[str] = CONFIG['time_analysis']['preparation'][
'name_delta_feat_to_repair'
]
NAME_DELTA_FEAT_TO_NEXT_FAILURE: Final[str] = CONFIG['time_analysis']['preparation'][
'name_delta_feat_to_next_failure'
]
# ** time_analysis.model_input # ** time_analysis.model_input
MODEL_INPUT_FEATURES: Final[tuple[str, ...]] = tuple( MODEL_INPUT_FEATURES: Final[tuple[str, ...]] = tuple(
CONFIG['time_analysis']['model_input']['input_features'] CONFIG['time_analysis']['model_input']['input_features']

View File

@ -10,3 +10,7 @@ class EmptyGraphError(Exception):
class EmptyEdgesError(EmptyGraphError): class EmptyEdgesError(EmptyGraphError):
"""Error raised if action should be performed on a graph's edges, but """Error raised if action should be performed on a graph's edges, but
it does not contain any""" it does not contain any"""
class GraphRenderError(Exception):
"""Error raised if a graph object can not be rendered"""

View File

@ -2,12 +2,10 @@
[paths] [paths]
inputs = './inputs/' inputs = './inputs/'
results = './results/test_20240619/' # results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './results/test_20240807/'
dataset = '../data/02_202307/Export4.csv' dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always # only debugging features, production-ready pipelines should always
# be fully executed # be fully executed
@ -19,28 +17,29 @@ graph_rescaling_skip = false
graph_static_rendering_skip = false graph_static_rendering_skip = false
time_analysis_skip = true time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess] [preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [ date_cols = [
"VorgangsDatum", "VorgangsDatum",
"ErledigungsDatum", "ErledigungsDatum",
"Arbeitsbeginn", "Arbeitsbeginn",
"ErstellungsDatum", "ErstellungsDatum",
] ]
threshold_amount_characters = 5 threshold_amount_characters = 5
threshold_similarity = 0.8 threshold_similarity = 0.8
[graph_postprocessing] [graph_postprocessing]
threshold_edge_weight = 150 threshold_edge_number = 330
# threshold_edge_weight = 150
[time_analysis.uniqueness] [time_analysis.uniqueness]
threshold_unique_texts = 4 threshold_unique_texts = 4
criterion_feature = 'HObjektText' criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID' feature_name_obj_id = 'ObjektID'
[time_analysis.preparation]
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input] [time_analysis.model_input]
# input_features = [ # input_features = [
# 'VorgangsTypName', # 'VorgangsTypName',

View File

@ -34,7 +34,7 @@ from lang_main.constants import (
NAME_DELTA_FEAT_TO_REPAIR, NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER, SAVE_PATH_FOLDER,
THRESHOLD_AMOUNT_CHARACTERS, THRESHOLD_AMOUNT_CHARACTERS,
THRESHOLD_EDGE_WEIGHT, THRESHOLD_EDGE_NUMBER,
THRESHOLD_NUM_ACTIVITIES, THRESHOLD_NUM_ACTIVITIES,
THRESHOLD_SIMILARITY, THRESHOLD_SIMILARITY,
THRESHOLD_TIMELINE_SIMILARITY, THRESHOLD_TIMELINE_SIMILARITY,
@ -89,31 +89,6 @@ def build_base_target_feature_pipe() -> Pipeline:
return pipe_target_feat return pipe_target_feat
# output: DataFrame containing target feature with
# number of occurrences and associated ObjectIDs
# ** embedding pipe
# ?? still needed?
# using similarity between entries to catch duplicates with typo or similar content
# pipe_embds = BasePipeline(name='Embedding1', working_dir=SAVE_PATH_FOLDER)
# pipe_embds.add(build_cosSim_matrix, {'model': model_stfr}, save_result=True)
# pipe_embds.add(
# filt_thresh_cosSim_matrix, {'threshold': THRESHOLD_SIMILARITY}, save_result=True
# )
# pipe_embds.add(
# list_cosSim_dupl_candidates,
# {
# 'save_candidates': True,
# 'saving_path': SAVE_PATH_FOLDER,
# 'filename': FILENAME_COSSIM_FILTER_CANDIDATES,
# 'pipeline': pipe_embds,
# },
# save_result=True,
# )
# ** Merge duplicates # ** Merge duplicates
def build_merge_duplicates_pipe() -> Pipeline: def build_merge_duplicates_pipe() -> Pipeline:
pipe_merge = Pipeline(name='Merge_Duplicates', working_dir=SAVE_PATH_FOLDER) pipe_merge = Pipeline(name='Merge_Duplicates', working_dir=SAVE_PATH_FOLDER)
@ -162,11 +137,18 @@ def build_tk_graph_post_pipe() -> Pipeline:
pipe_graph_postprocessing = Pipeline( pipe_graph_postprocessing = Pipeline(
name='Graph_Postprocessing', working_dir=SAVE_PATH_FOLDER name='Graph_Postprocessing', working_dir=SAVE_PATH_FOLDER
) )
# pipe_graph_postprocessing.add(
# graphs.filter_graph_by_edge_weight,
# {
# 'bound_lower': THRESHOLD_EDGE_WEIGHT,
# 'bound_upper': None,
# },
# )
pipe_graph_postprocessing.add( pipe_graph_postprocessing.add(
graphs.filter_graph_by_edge_weight, graphs.filter_graph_by_number_edges,
{ {
'bound_lower': THRESHOLD_EDGE_WEIGHT, 'limit': THRESHOLD_EDGE_NUMBER,
'bound_upper': None, 'property': 'weight',
}, },
) )
pipe_graph_postprocessing.add( pipe_graph_postprocessing.add(

View File

@ -5,6 +5,7 @@ from typing import Literal, cast
import py4cytoscape as p4c import py4cytoscape as p4c
from networkx import DiGraph, Graph from networkx import DiGraph, Graph
from py4cytoscape import network_selection as p4c_network_selection
from py4cytoscape.exceptions import CyError from py4cytoscape.exceptions import CyError
from requests.exceptions import RequestException from requests.exceptions import RequestException
@ -14,6 +15,8 @@ from lang_main.constants import (
CYTO_ITER_NEIGHBOUR_DEPTH, CYTO_ITER_NEIGHBOUR_DEPTH,
CYTO_LAYOUT_NAME, CYTO_LAYOUT_NAME,
CYTO_LAYOUT_PROPERTIES, CYTO_LAYOUT_PROPERTIES,
CYTO_MAX_EDGE_COUNT,
CYTO_MAX_NODE_COUNT,
CYTO_NETWORK_ZOOM_FACTOR, CYTO_NETWORK_ZOOM_FACTOR,
CYTO_NUMBER_SUBGRAPHS, CYTO_NUMBER_SUBGRAPHS,
CYTO_PATH_STYLESHEET, CYTO_PATH_STYLESHEET,
@ -23,7 +26,9 @@ from lang_main.constants import (
PROPERTY_NAME_DEGREE_WEIGHTED, PROPERTY_NAME_DEGREE_WEIGHTED,
SAVE_PATH_FOLDER, SAVE_PATH_FOLDER,
) )
from lang_main.errors import GraphRenderError
from lang_main.loggers import logger_rendering as logger from lang_main.loggers import logger_rendering as logger
from lang_main.render import cytoscape_monkeypatch as cs_monkeypatch
from lang_main.types import ( from lang_main.types import (
CytoExportFileTypes, CytoExportFileTypes,
CytoExportPageSizes, CytoExportPageSizes,
@ -32,9 +37,17 @@ from lang_main.types import (
CytoNodeID, CytoNodeID,
) )
# monkeypatch non-stable py4cytoscape function
p4c_network_selection.select_edges_connecting_selected_nodes = (
cs_monkeypatch.select_edges_connecting_selected_nodes
)
p4c.select_edges_connecting_selected_nodes = (
cs_monkeypatch.select_edges_connecting_selected_nodes
)
# ** Cytoscape API related, using py4cytoscape # ** Cytoscape API related, using py4cytoscape
def verify_connection(): def verify_connection() -> None:
"""Cytoscape: checks if CyREST and Cytoscape versions are compatible nad """Cytoscape: checks if CyREST and Cytoscape versions are compatible nad
if Cytoscape API endpoint is reachable if Cytoscape API endpoint is reachable
@ -55,6 +68,60 @@ def verify_connection():
raise error raise error
def verify_graph_render_size(
graph: Graph | DiGraph,
max_node_count: int | None = CYTO_MAX_NODE_COUNT,
max_edge_count: int | None = CYTO_MAX_EDGE_COUNT,
) -> None:
"""verify that the graph size can still be handled within an acceptable time
frame for rendering in Cytoscape
Parameters
----------
graph : Graph | DiGraph
graph to verify
max_node_count : int | None, optional
maximum allowed number of nodes, by default CYTO_MAX_NODE_COUNT
max_edge_count : int | None, optional
maximum allowed number of edges, by default CYTO_MAX_EDGE_COUNT
Raises
------
GraphRenderError
if any of the provided limits is exceeded
"""
num_nodes = len(graph.nodes)
num_edges = len(graph.edges)
if max_node_count is not None and num_nodes > max_node_count:
raise GraphRenderError(
f'Maximum number of nodes for rendering exceeded. '
f'Limit {max_node_count}, Counted: {num_nodes}'
)
if max_edge_count is not None and num_edges > max_edge_count:
raise GraphRenderError(
f'Maximum number of edges for rendering exceeded. '
f'Limit {max_edge_count}, Counted: {num_edges}'
)
def change_default_layout() -> None:
"""Cytoscape: resets the default layout to `grid` to accelerate the import process
(grid layout one of the fastest)
Raises
------
RequestException
API endpoint not reachable or CyREST operation not successful
"""
body: dict[str, str] = {'value': 'grid', 'key': 'layout.default'}
try:
p4c.cyrest_put('properties/cytoscape3.props/layout.default', body=body)
except RequestException as error:
logger.error('[CytoAPIConnection] Property change of default layout not successful.')
raise error
def import_to_cytoscape( def import_to_cytoscape(
graph: DiGraph | Graph, graph: DiGraph | Graph,
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
@ -70,6 +137,10 @@ def import_to_cytoscape(
""" """
logger.debug('Checking Cytoscape connection...') logger.debug('Checking Cytoscape connection...')
verify_connection() verify_connection()
logger.debug('Checking graph size for rendering...')
verify_graph_render_size(graph)
logger.debug('Setting default layout to improve import speed...')
change_default_layout()
logger.debug('Setting Cytoscape sandbox...') logger.debug('Setting Cytoscape sandbox...')
p4c.sandbox_set( p4c.sandbox_set(
sandbox_name=sandbox_name, sandbox_name=sandbox_name,

View File

@ -0,0 +1,73 @@
import re
from py4cytoscape import networks
from py4cytoscape.network_selection import get_selected_nodes, select_edges
from py4cytoscape.py4cytoscape_logger import cy_log
from py4cytoscape.py4cytoscape_utils import * # type: ignore # noqa: F403
re_parenthesis_1 = re.compile(r'[(]+')
re_parenthesis_2 = re.compile(r'[)]+')
@cy_log
def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405
"""Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.
Any edges selected beforehand are deselected before any new edges are selected
Args:
network (SUID or str or None): Name or SUID of a network. Default is the
"current" network active in Cytoscape.
base_url (str): Ignore unless you need to specify a custom domain,
port or version to connect to the CyREST API. Default is http://127.0.0.1:1234
and the latest version of the CyREST API supported by this version of py4cytoscape.
Returns:
dict: {'nodes': [node list], 'edges': [edge list]} or None if no selected nodes
Raises:
CyError: if network name or SUID doesn't exist
requests.exceptions.RequestException: if can't connect to Cytoscape or Cytoscape returns an error
Examples:
>>> select_edges_connecting_selected_nodes()
None
>>> select_edges_connecting_selected_nodes(network='My Network')
{'nodes': [103990, 103991, ...], 'edges': [104432, 104431, ...]}
>>> select_edges_connecting_selected_nodes(network=52)
{'nodes': [103990, 103991, ...], 'edges': [104432, 104431, ...]}
Note:
In the return value node list is list of all selected nodes, and
edge list is the SUIDs of selected edges -- dict is None if no nodes were selected or there were no newly
created edges
"""
net_suid = networks.get_network_suid(network, base_url=base_url)
selected_nodes = get_selected_nodes(network=net_suid, base_url=base_url)
# TODO: In R version, NA test is after len() test ... shouldn't it be before?
if not selected_nodes:
return None
all_edges = networks.get_all_edges(net_suid, base_url=base_url)
selected_sources = set()
selected_targets = set()
for n in selected_nodes:
n = re_parenthesis_1.sub('\(', n) # type: ignore
n = re_parenthesis_2.sub('\)', n) # type: ignore
selected_sources |= set(filter(re.compile('^' + n).search, all_edges)) # type: ignore
selected_targets |= set(filter(re.compile(n + '$').search, all_edges)) # type: ignore
selected_edges = list(selected_sources.intersection(selected_targets))
if len(selected_edges) == 0:
return None
res = select_edges(
selected_edges,
by_col='name',
preserve_current_selection=False,
network=net_suid,
base_url=base_url,
)
return res
# TODO: isn't the pattern match a bit cheesy ... shouldn't it be ^+n+' (' and ') '+n+$ ???