added test cases

This commit is contained in:
Florian Förster 2025-01-22 16:54:15 +01:00
parent 30fe71e80a
commit fb28b8548b
28 changed files with 17721 additions and 17 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,98 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.analysis API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.analysis</code></h1>
</header>
<section id="section-intro">
</section>
<section>
<h2 class="section-title" id="header-submodules">Sub-modules</h2>
<dl>
<dt><code class="name"><a title="lang_main.analysis.graphs" href="graphs.html">lang_main.analysis.graphs</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.analysis.preprocessing" href="preprocessing.html">lang_main.analysis.preprocessing</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.analysis.shared" href="shared.html">lang_main.analysis.shared</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.analysis.timeline" href="timeline.html">lang_main.analysis.timeline</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.analysis.tokens" href="tokens.html">lang_main.analysis.tokens</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="../index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-submodules">Sub-modules</a></h3>
<ul>
<li><code><a title="lang_main.analysis.graphs" href="graphs.html">lang_main.analysis.graphs</a></code></li>
<li><code><a title="lang_main.analysis.preprocessing" href="preprocessing.html">lang_main.analysis.preprocessing</a></code></li>
<li><code><a title="lang_main.analysis.shared" href="shared.html">lang_main.analysis.shared</a></code></li>
<li><code><a title="lang_main.analysis.timeline" href="timeline.html">lang_main.analysis.timeline</a></code></li>
<li><code><a title="lang_main.analysis.tokens" href="tokens.html">lang_main.analysis.tokens</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,451 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.analysis.preprocessing API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.analysis.preprocessing</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.analysis.preprocessing.analyse_feature"><code class="name flex">
<span>def <span class="ident">analyse_feature</span></span>(<span>data: DataFrame, target_feature: str) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def analyse_feature(
data: DataFrame,
target_feature: str,
) -&gt; tuple[DataFrame]:
# feature columns
feature_entries = data[target_feature]
logger.info(
&#39;Number of entries for feature &gt;&gt;%s&lt;&lt;: %d&#39;, target_feature, len(feature_entries)
)
# obtain unique entries
unique_feature_entries = feature_entries.unique()
# prepare result DataFrame
cols = [&#39;batched_idxs&#39;, &#39;entry&#39;, &#39;len&#39;, &#39;num_occur&#39;, &#39;assoc_obj_ids&#39;, &#39;num_assoc_obj_ids&#39;]
result_df = pd.DataFrame(columns=cols)
for entry in tqdm(unique_feature_entries, mininterval=1.0):
len_entry = len(entry)
filt = data[target_feature] == entry
temp = data[filt]
batched_idxs = temp.index.to_numpy()
assoc_obj_ids = temp[&#39;ObjektID&#39;].unique()
assoc_obj_ids = np.sort(assoc_obj_ids, kind=&#39;stable&#39;)
num_assoc_obj_ids = len(assoc_obj_ids)
num_dupl = filt.sum()
conc_df = pd.DataFrame(
data=[
[batched_idxs, entry, len_entry, num_dupl, assoc_obj_ids, num_assoc_obj_ids]
],
columns=cols,
)
result_df = pd.concat([result_df, conc_df], ignore_index=True)
result_df = result_df.sort_values(
by=[&#39;num_occur&#39;, &#39;len&#39;], ascending=[False, False]
).copy()
return (result_df,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.preprocessing.load_raw_data"><code class="name flex">
<span>def <span class="ident">load_raw_data</span></span>(<span>path: Path,<br>date_cols: Collection[str] = ('VorgangsDatum', 'ErledigungsDatum', 'Arbeitsbeginn', 'ErstellungsDatum')) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_raw_data(
path: Path,
date_cols: Collection[str] = (
&#39;VorgangsDatum&#39;,
&#39;ErledigungsDatum&#39;,
&#39;Arbeitsbeginn&#39;,
&#39;ErstellungsDatum&#39;,
),
) -&gt; tuple[DataFrame]:
&#34;&#34;&#34;load IHM dataset with standard structure
Parameters
----------
path : str
path to dataset file, usually CSV file
date_cols : Collection[str], optional
columns which contain dates and are parsed as such,
by default (
&#39;VorgangsDatum&#39;,
&#39;ErledigungsDatum&#39;,
&#39;Arbeitsbeginn&#39;,
&#39;ErstellungsDatum&#39;,
)
Returns
-------
DataFrame
raw dataset as DataFrame
&#34;&#34;&#34;
# load dataset
date_cols = list(date_cols)
data = pd.read_csv(
filepath_or_buffer=path,
sep=&#39;;&#39;,
encoding=&#39;cp1252&#39;,
parse_dates=list(date_cols),
dayfirst=True,
)
logger.info(&#39;Loaded dataset successfully.&#39;)
logger.info(
(
f&#39;Dataset properties: number of entries: {len(data)}, &#39;
f&#39;number of features {len(data.columns)}&#39;
)
)
return (data,)</code></pre>
</details>
<div class="desc"><p>load IHM dataset with standard structure</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>path</code></strong> :&ensp;<code>str</code></dt>
<dd>path to dataset file, usually CSV file</dd>
<dt><strong><code>date_cols</code></strong> :&ensp;<code>Collection[str]</code>, optional</dt>
<dd>columns which contain dates and are parsed as such,
by default (
'VorgangsDatum',
'ErledigungsDatum',
'Arbeitsbeginn',
'ErstellungsDatum',
)</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>DataFrame</code></dt>
<dd>raw dataset as DataFrame</dd>
</dl></div>
</dd>
<dt id="lang_main.analysis.preprocessing.merge_similarity_duplicates"><code class="name flex">
<span>def <span class="ident">merge_similarity_duplicates</span></span>(<span>data: DataFrame, model: SentenceTransformer, cos_sim_threshold: float) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def merge_similarity_duplicates(
data: DataFrame,
model: SentenceTransformer,
cos_sim_threshold: float,
) -&gt; tuple[DataFrame]:
logger.info(&#39;Start merging of similarity candidates...&#39;)
# data
merged_data = data.copy()
model_input = merged_data[&#39;entry&#39;]
candidates_idx = candidates_by_index(
data_model_input=model_input,
model=model,
cos_sim_threshold=cos_sim_threshold,
)
# graph of similar ids
similar_id_graph, _ = similar_index_connection_graph(candidates_idx)
for similar_id_group in similar_index_groups(similar_id_graph):
similar_id_group = list(similar_id_group)
similar_data = merged_data.loc[similar_id_group, :]
# keep first entry with max number occurrences, then number of
# associated objects, then length of entry
similar_data = similar_data.sort_values(
by=[&#39;num_occur&#39;, &#39;num_assoc_obj_ids&#39;, &#39;len&#39;],
ascending=[False, False, False],
)
# merge information to first entry
data_idx = cast(PandasIndex, similar_data.index[0])
similar_data.at[data_idx, &#39;num_occur&#39;] = similar_data[&#39;num_occur&#39;].sum()
assoc_obj_ids = similar_data[&#39;assoc_obj_ids&#39;].to_numpy()
assoc_obj_ids = np.concatenate(assoc_obj_ids)
assoc_obj_ids = np.unique(assoc_obj_ids)
similar_data.at[data_idx, &#39;assoc_obj_ids&#39;] = assoc_obj_ids
similar_data.at[data_idx, &#39;num_assoc_obj_ids&#39;] = len(assoc_obj_ids)
# remaining indices, should be removed
similar_id_group.remove(data_idx)
merged_similar_data = similar_data.drop(index=similar_id_group)
# update entry in main dataset, drop remaining entries
merged_data.update(merged_similar_data)
merged_data = merged_data.drop(index=similar_id_group)
logger.info(&#39;Similarity candidates merged successfully.&#39;)
return (merged_data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.preprocessing.numeric_pre_filter_feature"><code class="name flex">
<span>def <span class="ident">numeric_pre_filter_feature</span></span>(<span>data: DataFrame, feature: str, bound_lower: int | None, bound_upper: int | None) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def numeric_pre_filter_feature(
data: DataFrame,
feature: str,
bound_lower: int | None,
bound_upper: int | None,
) -&gt; tuple[DataFrame]:
&#34;&#34;&#34;filter DataFrame for a given numerical feature regarding their bounds
bounds are inclusive: entries (bound_lower &lt;= entry &lt;= bound_upper) are retained
Parameters
----------
data : DataFrame
DataFrame to filter
feature : str
feature name to filter
bound_lower : int | None
lower bound of values to retain
bound_upper : int | None
upper bound of values to retain
Returns
-------
tuple[DataFrame]
filtered DataFrame
Raises
------
ValueError
if no bounds are provided, at least one bound must be set
&#34;&#34;&#34;
if not any([bound_lower, bound_upper]):
raise ValueError(&#39;No bounds for filtering provided&#39;)
data = data.copy()
if bound_lower is None:
bound_lower = cast(int, data[feature].min())
if bound_upper is None:
bound_upper = cast(int, data[feature].max())
filter_lower = data[feature] &gt;= bound_lower
filter_upper = data[feature] &lt;= bound_upper
filter = filter_lower &amp; filter_upper
data = data.loc[filter]
return (data,)</code></pre>
</details>
<div class="desc"><p>filter DataFrame for a given numerical feature regarding their bounds
bounds are inclusive: entries (bound_lower &lt;= entry &lt;= bound_upper) are retained</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>data</code></strong> :&ensp;<code>DataFrame</code></dt>
<dd>DataFrame to filter</dd>
<dt><strong><code>feature</code></strong> :&ensp;<code>str</code></dt>
<dd>feature name to filter</dd>
<dt><strong><code>bound_lower</code></strong> :&ensp;<code>int | None</code></dt>
<dd>lower bound of values to retain</dd>
<dt><strong><code>bound_upper</code></strong> :&ensp;<code>int | None</code></dt>
<dd>upper bound of values to retain</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>tuple[DataFrame]</code></dt>
<dd>filtered DataFrame</dd>
</dl>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>ValueError</code></dt>
<dd>if no bounds are provided, at least one bound must be set</dd>
</dl></div>
</dd>
<dt id="lang_main.analysis.preprocessing.remove_NA"><code class="name flex">
<span>def <span class="ident">remove_NA</span></span>(<span>data: DataFrame, target_features: Collection[str] = ('VorgangsBeschreibung',)) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def remove_NA(
data: DataFrame,
target_features: Collection[str] = (&#39;VorgangsBeschreibung&#39;,),
) -&gt; tuple[DataFrame]:
&#34;&#34;&#34;function to drop NA entries based on a subset of features to be analysed
Parameters
----------
data : DataFrame
standard IHM dataset, perhaps pre-cleaned
target_features : Collection[str], optional
subset to analyse to define an NA entry, by default (&#39;VorgangsBeschreibung&#39;,)
Returns
-------
DataFrame
dataset with removed NA entries for given subset of features
&#34;&#34;&#34;
target_features = list(target_features)
wo_NA = data.dropna(axis=0, subset=target_features, ignore_index=True).copy() # type: ignore
logger.info(
f&#39;Removed NA entries for features &gt;&gt;{target_features}&lt;&lt; from dataset successfully.&#39;
)
return (wo_NA,)</code></pre>
</details>
<div class="desc"><p>function to drop NA entries based on a subset of features to be analysed</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>data</code></strong> :&ensp;<code>DataFrame</code></dt>
<dd>standard IHM dataset, perhaps pre-cleaned</dd>
<dt><strong><code>target_features</code></strong> :&ensp;<code>Collection[str]</code>, optional</dt>
<dd>subset to analyse to define an NA entry, by default ('VorgangsBeschreibung',)</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>DataFrame</code></dt>
<dd>dataset with removed NA entries for given subset of features</dd>
</dl></div>
</dd>
<dt id="lang_main.analysis.preprocessing.remove_duplicates"><code class="name flex">
<span>def <span class="ident">remove_duplicates</span></span>(<span>data: DataFrame) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def remove_duplicates(
data: DataFrame,
) -&gt; tuple[DataFrame]:
&#34;&#34;&#34;removes duplicated entries over all features in the given dataset
Parameters
----------
data : DataFrame
read data with standard structure
Returns
-------
DataFrame
dataset with removed duplicates over all features
&#34;&#34;&#34;
# obtain info about duplicates over all features
duplicates_filt = data.duplicated()
logger.info(f&#39;Number of duplicates over all features: {duplicates_filt.sum()}&#39;)
# drop duplicates
wo_duplicates = data.drop_duplicates(ignore_index=True)
duplicates_subset: list[str] = [
&#39;VorgangsID&#39;,
&#39;ObjektID&#39;,
]
duplicates_subset_filt = wo_duplicates.duplicated(subset=duplicates_subset)
logger.info(
(
&#39;Number of duplicates over subset &#39;
f&#39;&gt;&gt;{duplicates_subset}&lt;&lt;: {duplicates_subset_filt.sum()}&#39;
)
)
wo_duplicates = wo_duplicates.drop_duplicates(
subset=duplicates_subset, ignore_index=True
).copy()
logger.info(&#39;Removed all duplicates from dataset successfully.&#39;)
logger.info(
&#39;New Dataset properties: number of entries: %d, number of features %d&#39;,
len(wo_duplicates),
len(wo_duplicates.columns),
)
return (wo_duplicates,)</code></pre>
</details>
<div class="desc"><p>removes duplicated entries over all features in the given dataset</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>data</code></strong> :&ensp;<code>DataFrame</code></dt>
<dd>read data with standard structure</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>DataFrame</code></dt>
<dd>dataset with removed duplicates over all features</dd>
</dl></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.analysis.preprocessing.analyse_feature" href="#lang_main.analysis.preprocessing.analyse_feature">analyse_feature</a></code></li>
<li><code><a title="lang_main.analysis.preprocessing.load_raw_data" href="#lang_main.analysis.preprocessing.load_raw_data">load_raw_data</a></code></li>
<li><code><a title="lang_main.analysis.preprocessing.merge_similarity_duplicates" href="#lang_main.analysis.preprocessing.merge_similarity_duplicates">merge_similarity_duplicates</a></code></li>
<li><code><a title="lang_main.analysis.preprocessing.numeric_pre_filter_feature" href="#lang_main.analysis.preprocessing.numeric_pre_filter_feature">numeric_pre_filter_feature</a></code></li>
<li><code><a title="lang_main.analysis.preprocessing.remove_NA" href="#lang_main.analysis.preprocessing.remove_NA">remove_NA</a></code></li>
<li><code><a title="lang_main.analysis.preprocessing.remove_duplicates" href="#lang_main.analysis.preprocessing.remove_duplicates">remove_duplicates</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,273 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.analysis.shared API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.analysis.shared</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.analysis.shared.candidates_by_index"><code class="name flex">
<span>def <span class="ident">candidates_by_index</span></span>(<span>data_model_input: pandas.core.series.Series,<br>model: sentence_transformers.SentenceTransformer.SentenceTransformer,<br>cos_sim_threshold: float = 0.5) > Iterator[tuple[int | numpy.int64, int | numpy.int64]]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def candidates_by_index(
data_model_input: Series,
model: SentenceTransformer,
cos_sim_threshold: float = 0.5,
) -&gt; Iterator[tuple[PandasIndex, PandasIndex]]:
&#34;&#34;&#34;function to filter candidate indices based on cosine similarity
using SentenceTransformer model in batch mode,
feed data as Series to retain information about indices of entries and
access them later in the original dataset
Parameters
----------
obj_id : ObjectID
_description_
data_model_input : Series
containing indices and text entries to process
model : SentenceTransformer
necessary SentenceTransformer model to encode text entries
cos_sim_threshold : float, optional
threshold for cosine similarity to filter candidates, by default 0.5
Yields
------
Iterator[tuple[PandasIndex, PandasIndex]]
tuple of index pairs which meet the cosine similarity threshold
&#34;&#34;&#34;
# embeddings
batch = cast(list[str], data_model_input.to_list())
embds = cast(
Tensor,
model.encode(
batch,
convert_to_numpy=False,
convert_to_tensor=True,
show_progress_bar=False,
),
)
# cosine similarity
cos_sim = cast(npt.NDArray, model.similarity(embds, embds).numpy())
np.fill_diagonal(cos_sim, 0.0)
cos_sim = np.triu(cos_sim)
cos_sim_idx = np.argwhere(cos_sim &gt;= cos_sim_threshold)
for idx_array in cos_sim_idx:
idx_pair = cast(
tuple[np.int64, np.int64], tuple(data_model_input.index[idx] for idx in idx_array)
)
yield idx_pair</code></pre>
</details>
<div class="desc"><p>function to filter candidate indices based on cosine similarity
using SentenceTransformer model in batch mode,
feed data as Series to retain information about indices of entries and
access them later in the original dataset</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>obj_id</code></strong> :&ensp;<code>ObjectID</code></dt>
<dd><em>description</em></dd>
<dt><strong><code>data_model_input</code></strong> :&ensp;<code>Series</code></dt>
<dd>containing indices and text entries to process</dd>
<dt><strong><code>model</code></strong> :&ensp;<code>SentenceTransformer</code></dt>
<dd>necessary SentenceTransformer model to encode text entries</dd>
<dt><strong><code>cos_sim_threshold</code></strong> :&ensp;<code>float</code>, optional</dt>
<dd>threshold for cosine similarity to filter candidates, by default 0.5</dd>
</dl>
<h2 id="yields">Yields</h2>
<dl>
<dt><code>Iterator[tuple[PandasIndex, PandasIndex]]</code></dt>
<dd>tuple of index pairs which meet the cosine similarity threshold</dd>
</dl></div>
</dd>
<dt id="lang_main.analysis.shared.clean_string_slim"><code class="name flex">
<span>def <span class="ident">clean_string_slim</span></span>(<span>string: str) > str</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def clean_string_slim(string: str) -&gt; str:
&#34;&#34;&#34;mapping function to clean single string entries in a series (feature-wise)
of the dataset, used to be applied element-wise for string features
Parameters
----------
string : str
dataset entry feature
Returns
-------
str
cleaned entry
&#34;&#34;&#34;
# remove special chars
# string = pattern_escape_newline.sub(&#39; &#39;, string)
string = pattern_escape_seq.sub(&#39; &#39;, string)
string = pattern_repeated_chars.sub(&#39;&#39;, string)
# string = pattern_dates.sub(&#39;&#39;, string)
# dates are used for context, should not be removed at this stage
string = pattern_whitespace.sub(&#39; &#39;, string)
# remove whitespaces at the beginning and the end
string = string.strip()
return string</code></pre>
</details>
<div class="desc"><p>mapping function to clean single string entries in a series (feature-wise)
of the dataset, used to be applied element-wise for string features</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>string</code></strong> :&ensp;<code>str</code></dt>
<dd>dataset entry feature</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>str</code></dt>
<dd>cleaned entry</dd>
</dl></div>
</dd>
<dt id="lang_main.analysis.shared.entry_wise_cleansing"><code class="name flex">
<span>def <span class="ident">entry_wise_cleansing</span></span>(<span>data: pandas.core.frame.DataFrame,<br>target_features: Collection[str],<br>cleansing_func: Callable[[str], str] = &lt;function clean_string_slim&gt;) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def entry_wise_cleansing(
data: DataFrame,
target_features: Collection[str],
cleansing_func: Callable[[str], str] = clean_string_slim,
) -&gt; tuple[DataFrame]:
# apply given cleansing function to target feature
target_features = list(target_features)
data[target_features] = data[target_features].map(cleansing_func)
logger.info(
(&#39;Successfully applied entry-wise cleansing procedure &gt;&gt;%s&lt;&lt; for features &gt;&gt;%s&lt;&lt;&#39;),
cleansing_func.__name__,
target_features,
)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.shared.similar_index_connection_graph"><code class="name flex">
<span>def <span class="ident">similar_index_connection_graph</span></span>(<span>similar_idx_pairs: Iterable[tuple[int | numpy.int64, int | numpy.int64]]) > tuple[networkx.classes.graph.Graph, dict[str, float]]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def similar_index_connection_graph(
similar_idx_pairs: Iterable[tuple[PandasIndex, PandasIndex]],
) -&gt; tuple[Graph, dict[str, float]]:
# build index graph to obtain graph of connected (similar) indices
# use this graph to get connected components (indices which belong together)
# retain semantic connection on whole dataset
similar_id_graph = nx.Graph()
# for idx1, idx2 in similar_idx_pairs:
# # inplace operation, parent/child do not really exist in undirected graph
# update_graph(graph=similar_id_graph, parent=idx1, child=idx2)
update_graph(graph=similar_id_graph, batch=similar_idx_pairs)
graph_info = get_graph_metadata(graph=similar_id_graph, logging=False)
return similar_id_graph, graph_info</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.shared.similar_index_groups"><code class="name flex">
<span>def <span class="ident">similar_index_groups</span></span>(<span>similar_id_graph: networkx.classes.graph.Graph) > Iterator[tuple[int | numpy.int64, ...]]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def similar_index_groups(
similar_id_graph: Graph,
) -&gt; Iterator[tuple[PandasIndex, ...]]:
# groups of connected indices
ids_groups = cast(Iterator[set[PandasIndex]], nx.connected_components(G=similar_id_graph))
for id_group in ids_groups:
yield tuple(id_group)</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.analysis.shared.candidates_by_index" href="#lang_main.analysis.shared.candidates_by_index">candidates_by_index</a></code></li>
<li><code><a title="lang_main.analysis.shared.clean_string_slim" href="#lang_main.analysis.shared.clean_string_slim">clean_string_slim</a></code></li>
<li><code><a title="lang_main.analysis.shared.entry_wise_cleansing" href="#lang_main.analysis.shared.entry_wise_cleansing">entry_wise_cleansing</a></code></li>
<li><code><a title="lang_main.analysis.shared.similar_index_connection_graph" href="#lang_main.analysis.shared.similar_index_connection_graph">similar_index_connection_graph</a></code></li>
<li><code><a title="lang_main.analysis.shared.similar_index_groups" href="#lang_main.analysis.shared.similar_index_groups">similar_index_groups</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,333 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.analysis.timeline API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.analysis.timeline</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.analysis.timeline.calc_delta_to_next_failure"><code class="name flex">
<span>def <span class="ident">calc_delta_to_next_failure</span></span>(<span>data: pandas.core.frame.DataFrame,<br>date_feature: str = 'ErstellungsDatum',<br>name_delta_feature: str = 'Zeitspanne bis zum nächsten Ereignis [Tage]',<br>convert_to_days: bool = True) > pandas.core.frame.DataFrame</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def calc_delta_to_next_failure(
data: DataFrameTLFiltered,
date_feature: str = &#39;ErstellungsDatum&#39;,
name_delta_feature: str = NAME_DELTA_FEAT_TO_NEXT_FAILURE,
convert_to_days: bool = True,
) -&gt; DataFrameTLFiltered:
data = data.copy()
last_val = data[date_feature].iat[-1]
shifted = data[date_feature].shift(-1, fill_value=last_val)
data[name_delta_feature] = shifted - data[date_feature]
data = data.sort_values(by=name_delta_feature, ascending=False)
if convert_to_days:
data[name_delta_feature] = data[name_delta_feature].dt.days
return data</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.calc_delta_to_repair"><code class="name flex">
<span>def <span class="ident">calc_delta_to_repair</span></span>(<span>data: pandas.core.frame.DataFrame,<br>date_feature_start: str = 'ErstellungsDatum',<br>date_feature_end: str = 'ErledigungsDatum',<br>name_delta_feature: str = 'Zeitspanne bis zur Behebung [Tage]',<br>convert_to_days: bool = True) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def calc_delta_to_repair(
data: DataFrame,
date_feature_start: str = &#39;ErstellungsDatum&#39;,
date_feature_end: str = &#39;ErledigungsDatum&#39;,
name_delta_feature: str = NAME_DELTA_FEAT_TO_REPAIR,
convert_to_days: bool = True,
) -&gt; tuple[DataFrame]:
logger.info(&#39;Calculating time differences between start and end of operations...&#39;)
data = data.copy()
data[name_delta_feature] = data[date_feature_end] - data[date_feature_start]
if convert_to_days:
data[name_delta_feature] = data[name_delta_feature].dt.days
logger.info(&#39;Calculation successful.&#39;)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.cleanup_descriptions"><code class="name flex">
<span>def <span class="ident">cleanup_descriptions</span></span>(<span>data: pandas.core.frame.DataFrame,<br>properties: Collection[str] = ('VorgangsBeschreibung', 'ErledigungsBeschreibung')) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def cleanup_descriptions(
data: DataFrame,
properties: Collection[str] = (
&#39;VorgangsBeschreibung&#39;,
&#39;ErledigungsBeschreibung&#39;,
),
) -&gt; tuple[DataFrame]:
logger.info(&#39;Cleaning necessary descriptions...&#39;)
data = data.copy()
features = list(properties)
data[features] = data[features].fillna(&#39;N.V.&#39;)
(data,) = entry_wise_cleansing(data, target_features=features)
logger.info(&#39;Cleansing successful.&#39;)
return (data.copy(),)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.filter_activities_per_obj_id"><code class="name flex">
<span>def <span class="ident">filter_activities_per_obj_id</span></span>(<span>data: pandas.core.frame.DataFrame,<br>activity_feature: str = 'VorgangsTypName',<br>relevant_activity_types: Iterable[str] = ('Reparaturauftrag (Portal)',),<br>feature_obj_id: str = 'ObjektID',<br>threshold_num_activities: int = 1) > tuple[pandas.core.frame.DataFrame, pandas.core.series.Series]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def filter_activities_per_obj_id(
data: DataFrame,
activity_feature: str = &#39;VorgangsTypName&#39;,
relevant_activity_types: Iterable[str] = (&#39;Reparaturauftrag (Portal)&#39;,),
feature_obj_id: str = &#39;ObjektID&#39;,
threshold_num_activities: int = 1,
) -&gt; tuple[DataFrame, Series]:
data = data.copy()
# filter only relevant activities, count occurrences for each ObjectID
logger.info(&#39;Filtering activities per ObjectID...&#39;)
filt_rel_activities = data[activity_feature].isin(relevant_activity_types)
data_filter_activities = data.loc[filt_rel_activities].copy()
num_activities_per_obj_id = cast(
Series, data_filter_activities[feature_obj_id].value_counts(sort=True)
)
# filter for ObjectIDs with more than given number of activities
filt_below_thresh = num_activities_per_obj_id &lt;= threshold_num_activities
# index of series contains ObjectIDs
obj_ids_below_thresh = num_activities_per_obj_id[filt_below_thresh].index
filt_entries_below_thresh = data_filter_activities[feature_obj_id].isin(
obj_ids_below_thresh
)
num_activities_per_obj_id = num_activities_per_obj_id.loc[~filt_below_thresh]
data_filter_activities = data_filter_activities.loc[~filt_entries_below_thresh]
logger.info(&#39;Activities per ObjectID filtered successfully.&#39;)
return data_filter_activities, num_activities_per_obj_id</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.filter_timeline_cands"><code class="name flex">
<span>def <span class="ident">filter_timeline_cands</span></span>(<span>data: pandas.core.frame.DataFrame,<br>cands: dict[int, tuple[tuple[int | numpy.int64, ...], ...]],<br>obj_id: int,<br>entry_idx: int,<br>sort_feature: str = 'ErstellungsDatum') > pandas.core.frame.DataFrame</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def filter_timeline_cands(
data: DataFrame,
cands: TimelineCandidates,
obj_id: ObjectID,
entry_idx: int,
sort_feature: str = &#39;ErstellungsDatum&#39;,
) -&gt; DataFrameTLFiltered:
data = data.copy()
cands_for_obj_id = cands[obj_id]
cands_choice = cands_for_obj_id[entry_idx]
data = data.loc[list(cands_choice)].sort_values(
by=sort_feature,
ascending=True,
)
return data</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.generate_model_input"><code class="name flex">
<span>def <span class="ident">generate_model_input</span></span>(<span>data: pandas.core.frame.DataFrame,<br>target_feature_name: str = 'nlp_model_input',<br>model_input_features: Iterable[str] = ('VorgangsTypName', 'VorgangsArtText', 'VorgangsBeschreibung')) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def generate_model_input(
data: DataFrame,
target_feature_name: str = &#39;nlp_model_input&#39;,
model_input_features: Iterable[str] = (
&#39;VorgangsTypName&#39;,
&#39;VorgangsArtText&#39;,
&#39;VorgangsBeschreibung&#39;,
),
) -&gt; tuple[DataFrame]:
logger.info(&#39;Generating concatenation of model input features...&#39;)
data = data.copy()
model_input_features = list(model_input_features)
input_features = data[model_input_features].fillna(&#39;&#39;).astype(str)
data[target_feature_name] = input_features.apply(
lambda x: &#39; - &#39;.join(x),
axis=1,
)
logger.info(&#39;Model input generated successfully.&#39;)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.get_timeline_candidates"><code class="name flex">
<span>def <span class="ident">get_timeline_candidates</span></span>(<span>data: pandas.core.frame.DataFrame,<br>num_activities_per_obj_id: pandas.core.series.Series,<br>*,<br>model: sentence_transformers.SentenceTransformer.SentenceTransformer,<br>cos_sim_threshold: float,<br>feature_obj_id: str = 'ObjektID',<br>feature_obj_text: str = 'HObjektText',<br>model_input_feature: str = 'nlp_model_input') > tuple[dict[int, tuple[tuple[int | numpy.int64, ...], ...]], dict[int, str]]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def get_timeline_candidates(
data: DataFrame,
num_activities_per_obj_id: Series,
*,
model: SentenceTransformer,
cos_sim_threshold: float,
feature_obj_id: str = &#39;ObjektID&#39;,
feature_obj_text: str = &#39;HObjektText&#39;,
model_input_feature: str = &#39;nlp_model_input&#39;,
) -&gt; tuple[TimelineCandidates, dict[ObjectID, str]]:
logger.info(&#39;Obtaining timeline candidates...&#39;)
candidates = _get_timeline_candidates_index(
data=data,
num_activities_per_obj_id=num_activities_per_obj_id,
model=model,
cos_sim_threshold=cos_sim_threshold,
feature_obj_id=feature_obj_id,
model_input_feature=model_input_feature,
)
tl_candidates = _transform_timeline_candidates(candidates)
logger.info(&#39;Timeline candidates obtained successfully.&#39;)
# text mapping to obtain object descriptors
logger.info(&#39;Mapping ObjectIDs to their respective text descriptor...&#39;)
map_obj_text = _map_obj_id_to_texts(
data=data,
feature_obj_id=feature_obj_id,
feature_obj_text=feature_obj_text,
)
logger.info(&#39;ObjectIDs successfully mapped to text descriptors.&#39;)
return tl_candidates, map_obj_text</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.remove_non_relevant_obj_ids"><code class="name flex">
<span>def <span class="ident">remove_non_relevant_obj_ids</span></span>(<span>data: pandas.core.frame.DataFrame,<br>thresh_unique_feat_per_id: int,<br>*,<br>feature_uniqueness: str = 'HObjektText',<br>feature_obj_id: str = 'ObjektID') > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def remove_non_relevant_obj_ids(
data: DataFrame,
thresh_unique_feat_per_id: int,
*,
feature_uniqueness: str = &#39;HObjektText&#39;,
feature_obj_id: str = &#39;ObjektID&#39;,
) -&gt; tuple[DataFrame]:
logger.info(&#39;Removing non-relevant ObjectIDs from dataset...&#39;)
data = data.copy()
ids_to_ignore = _non_relevant_obj_ids(
data=data,
thresh_unique_feat_per_id=thresh_unique_feat_per_id,
feature_uniqueness=feature_uniqueness,
feature_obj_id=feature_obj_id,
)
# only retain entries with ObjectIDs not in IDs to ignore
data = data.loc[~(data[feature_obj_id].isin(ids_to_ignore))]
logger.debug(&#39;Ignored ObjectIDs: %s&#39;, ids_to_ignore)
logger.info(&#39;Non-relevant ObjectIDs removed successfully.&#39;)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.analysis.timeline.calc_delta_to_next_failure" href="#lang_main.analysis.timeline.calc_delta_to_next_failure">calc_delta_to_next_failure</a></code></li>
<li><code><a title="lang_main.analysis.timeline.calc_delta_to_repair" href="#lang_main.analysis.timeline.calc_delta_to_repair">calc_delta_to_repair</a></code></li>
<li><code><a title="lang_main.analysis.timeline.cleanup_descriptions" href="#lang_main.analysis.timeline.cleanup_descriptions">cleanup_descriptions</a></code></li>
<li><code><a title="lang_main.analysis.timeline.filter_activities_per_obj_id" href="#lang_main.analysis.timeline.filter_activities_per_obj_id">filter_activities_per_obj_id</a></code></li>
<li><code><a title="lang_main.analysis.timeline.filter_timeline_cands" href="#lang_main.analysis.timeline.filter_timeline_cands">filter_timeline_cands</a></code></li>
<li><code><a title="lang_main.analysis.timeline.generate_model_input" href="#lang_main.analysis.timeline.generate_model_input">generate_model_input</a></code></li>
<li><code><a title="lang_main.analysis.timeline.get_timeline_candidates" href="#lang_main.analysis.timeline.get_timeline_candidates">get_timeline_candidates</a></code></li>
<li><code><a title="lang_main.analysis.timeline.remove_non_relevant_obj_ids" href="#lang_main.analysis.timeline.remove_non_relevant_obj_ids">remove_non_relevant_obj_ids</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,320 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.analysis.tokens API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.analysis.tokens</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.analysis.tokens.add_doc_info_to_graph"><code class="name flex">
<span>def <span class="ident">add_doc_info_to_graph</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="graphs.html#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>,<br>doc: spacy.tokens.doc.Doc,<br>weight: int | None) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def add_doc_info_to_graph(
graph: TokenGraph,
doc: SpacyDoc,
weight: int | None,
) -&gt; None:
# iterate over sentences
for sent in doc.sents:
# iterate over tokens in sentence
for token in sent:
# skip tokens which are not relevant
if not (token.pos_ in POS_OF_INTEREST or token.tag_ in TAG_OF_INTEREST):
continue
# skip token which are dates or times
if token.pos_ == &#39;NUM&#39; and is_str_date(string=token.text):
continue
relevant_descendants = obtain_relevant_descendants(token=token)
# for non-AUX: add parent &lt;--&gt; descendant pair to graph
if token.pos_ not in POS_INDIRECT:
for descendant in relevant_descendants:
# add descendant and parent to graph
update_graph(
graph=graph,
parent=token.lemma_,
child=descendant.lemma_,
weight_connection=weight,
)
else:
# if indirect POS, make connection between all associated words
combs = combinations(relevant_descendants, r=2)
for comb in combs:
# !! parents and children do not really exist in this case,
# !! but only one connection is made
update_graph(
graph=graph,
parent=comb[0].lemma_,
child=comb[1].lemma_,
weight_connection=weight,
)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.tokens.build_token_graph"><code class="name flex">
<span>def <span class="ident">build_token_graph</span></span>(<span>data: pandas.core.frame.DataFrame,<br>model: spacy.language.Language,<br>*,<br>target_feature: str = 'entry',<br>weights_feature: str | None = None,<br>batch_idx_feature: str | None = 'batched_idxs',<br>build_map: bool = True,<br>batch_size_model: int = 50,<br>logging_graph: bool = True) > tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="graphs.html#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>, dict[int | numpy.int64, spacy.tokens.doc.Doc] | None]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_token_graph(
data: DataFrame,
model: SpacyModel,
*,
target_feature: str = &#39;entry&#39;,
weights_feature: str | None = None,
batch_idx_feature: str | None = &#39;batched_idxs&#39;,
build_map: bool = True,
batch_size_model: int = 50,
logging_graph: bool = True,
) -&gt; tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None]:
graph = TokenGraph(enable_logging=logging_graph)
model_input = cast(tuple[str], tuple(data[target_feature].to_list()))
if weights_feature is not None:
weights = cast(tuple[int], tuple(data[weights_feature].to_list()))
else:
weights = None
docs_mapping: dict[PandasIndex, SpacyDoc] | None
if build_map and batch_idx_feature is None:
raise ValueError(&#39;Can not build mapping if batched indices are unknown.&#39;)
elif build_map:
indices = cast(tuple[list[PandasIndex]], tuple(data[batch_idx_feature].to_list()))
docs_mapping = {}
else:
indices = None
docs_mapping = None
index: int = 0
for doc in tqdm(
model.pipe(model_input, batch_size=batch_size_model), total=len(model_input)
):
weight: int | None = None
if weights is not None:
weight = weights[index]
add_doc_info_to_graph(
graph=graph,
doc=doc,
weight=weight,
)
# build map if option chosen
if indices is not None and docs_mapping is not None:
corresponding_indices = indices[index]
for idx in corresponding_indices:
docs_mapping[idx] = doc
index += 1
# metadata
graph.update_metadata()
# convert to undirected
graph.to_undirected(logging=False)
graph.perform_static_analysis()
return graph, docs_mapping</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.tokens.is_str_date"><code class="name flex">
<span>def <span class="ident">is_str_date</span></span>(<span>string: str, fuzzy: bool = False) > bool</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def is_str_date(
string: str,
fuzzy: bool = False,
) -&gt; bool:
&#34;&#34;&#34;not stable function to test strings for dates, not 100 percent reliable
Parameters
----------
string : str
string to check for dates
fuzzy : bool, optional
whether to use dateutils.parser.pase fuzzy capability, by default False
Returns
-------
bool
indicates whether date was found or not
&#34;&#34;&#34;
try:
# check if string is a number
# if length is greater than 8, it is not a date
int(string)
if len(string) not in {2, 4}:
return False
except ValueError:
# not a number
pass
try:
parse(string, fuzzy=fuzzy, dayfirst=True, yearfirst=False)
return True
except ValueError:
date_found: bool = False
match = pattern_dates.search(string)
if match is None:
return date_found
date_found = any(match.groups())
return date_found</code></pre>
</details>
<div class="desc"><p>not stable function to test strings for dates, not 100 percent reliable</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>string</code></strong> :&ensp;<code>str</code></dt>
<dd>string to check for dates</dd>
<dt><strong><code>fuzzy</code></strong> :&ensp;<code>bool</code>, optional</dt>
<dd>whether to use dateutils.parser.pase fuzzy capability, by default False</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>bool</code></dt>
<dd>indicates whether date was found or not</dd>
</dl></div>
</dd>
<dt id="lang_main.analysis.tokens.obtain_relevant_descendants"><code class="name flex">
<span>def <span class="ident">obtain_relevant_descendants</span></span>(<span>token: spacy.tokens.token.Token) > Iterator[spacy.tokens.token.Token]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def obtain_relevant_descendants(
token: SpacyToken,
) -&gt; Iterator[SpacyToken]:
for descendant in token.subtree:
# subtrees contain the token itself
# if current element is token skip this element
if descendant == token:
continue
# if descendant is a date skip it)
if is_str_date(string=descendant.text):
continue
logger.debug(
&#39;Token &gt;&gt;%s&lt;&lt;, POS &gt;&gt;%s&lt;&lt; | descendant &gt;&gt;%s&lt;&lt;, POS &gt;&gt;%s&lt;&lt;&#39;,
token,
token.pos_,
descendant,
descendant.pos_,
)
# eliminate cases of cross-references with verbs
if (token.pos_ == &#39;AUX&#39; or token.pos_ == &#39;VERB&#39;) and (
descendant.pos_ == &#39;AUX&#39; or descendant.pos_ == &#39;VERB&#39;
):
continue
# skip cases in which descendant is indirect POS with others than verbs
elif descendant.pos_ in POS_INDIRECT:
continue
# skip cases in which child has no relevant POS or TAG
elif not (descendant.pos_ in POS_OF_INTEREST or descendant.tag_ in TAG_OF_INTEREST):
continue
yield descendant
# TODO look at results and fine-tune function accordingly</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.tokens.pre_clean_word"><code class="name flex">
<span>def <span class="ident">pre_clean_word</span></span>(<span>string: str) > str</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def pre_clean_word(string: str) -&gt; str:
pattern = r&#39;[^A-Za-zäöüÄÖÜ]+&#39;
string = re.sub(pattern, &#39;&#39;, string)
return string</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.analysis.tokens.add_doc_info_to_graph" href="#lang_main.analysis.tokens.add_doc_info_to_graph">add_doc_info_to_graph</a></code></li>
<li><code><a title="lang_main.analysis.tokens.build_token_graph" href="#lang_main.analysis.tokens.build_token_graph">build_token_graph</a></code></li>
<li><code><a title="lang_main.analysis.tokens.is_str_date" href="#lang_main.analysis.tokens.is_str_date">is_str_date</a></code></li>
<li><code><a title="lang_main.analysis.tokens.obtain_relevant_descendants" href="#lang_main.analysis.tokens.obtain_relevant_descendants">obtain_relevant_descendants</a></code></li>
<li><code><a title="lang_main.analysis.tokens.pre_clean_word" href="#lang_main.analysis.tokens.pre_clean_word">pre_clean_word</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

206
docs/lang_main/config.html Normal file
View File

@ -0,0 +1,206 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.config API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.config</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.config.get_config_paths"><code class="name flex">
<span>def <span class="ident">get_config_paths</span></span>(<span>root_folder: Path, cfg_name: str, cyto_stylesheet_name: str) > tuple[pathlib.Path, pathlib.Path]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def get_config_paths(
root_folder: Path,
cfg_name: str,
cyto_stylesheet_name: str,
) -&gt; tuple[Path, Path]:
cfg_path_internal = (root_folder / cfg_name).resolve()
cyto_stylesheet_path = (root_folder / cyto_stylesheet_name).resolve()
return cfg_path_internal, cyto_stylesheet_path</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.config.load_cfg"><code class="name flex">
<span>def <span class="ident">load_cfg</span></span>(<span>starting_path: Path,<br>glob_pattern: str,<br>stop_folder_name: str | None,<br>lookup_cwd: bool = False) > dict[str, typing.Any]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_cfg(
starting_path: Path,
glob_pattern: str,
stop_folder_name: str | None,
lookup_cwd: bool = False,
) -&gt; dict[str, Any]:
&#34;&#34;&#34;Look for configuration file. Internal configs are not used any more because
the library behaviour is only guaranteed by external configurations.
Parameters
----------
starting_path : Path
path to start for the lookup
glob_pattern : str
pattern of the config file naming scheme
stop_folder_name : str | None
folder name at which the lookup should stop, the parent folder
is also searched, e.g.
if starting_path is path/to/start/folder and stop_folder_name is &#39;to&#39;,
then path/ is also searched
Returns
-------
dict[str, Any]
loaded config file
Raises
------
LangMainConfigNotFoundError
if no config file was found
&#34;&#34;&#34;
cfg_path: Path | None = None
if lookup_cwd:
print(&#39;Looking for cfg file in CWD.&#39;, flush=True)
cfg_path = search_cwd(glob_pattern)
if cfg_path is None:
print(
(
f&#39;Looking iteratively for config file. Start: {starting_path}, &#39;
f&#39;stop folder: {stop_folder_name}&#39;
),
flush=True,
)
cfg_path = search_iterative(
starting_path=starting_path,
glob_pattern=glob_pattern,
stop_folder_name=stop_folder_name,
)
if cfg_path is None:
raise LangMainConfigNotFoundError(&#39;Config file was not found.&#39;)
config = load_toml_config(path_to_toml=cfg_path)
print(f&#39;Loaded config from: &gt;&gt;{cfg_path}&lt;&lt;&#39;)
return config.copy()</code></pre>
</details>
<div class="desc"><p>Look for configuration file. Internal configs are not used any more because
the library behaviour is only guaranteed by external configurations.</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>starting_path</code></strong> :&ensp;<code>Path</code></dt>
<dd>path to start for the lookup</dd>
<dt><strong><code>glob_pattern</code></strong> :&ensp;<code>str</code></dt>
<dd>pattern of the config file naming scheme</dd>
<dt><strong><code>stop_folder_name</code></strong> :&ensp;<code>str | None</code></dt>
<dd>folder name at which the lookup should stop, the parent folder
is also searched, e.g.
if starting_path is path/to/start/folder and stop_folder_name is 'to',
then path/ is also searched</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>dict[str, Any]</code></dt>
<dd>loaded config file</dd>
</dl>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>LangMainConfigNotFoundError</code></dt>
<dd>if no config file was found</dd>
</dl></div>
</dd>
<dt id="lang_main.config.load_toml_config"><code class="name flex">
<span>def <span class="ident">load_toml_config</span></span>(<span>path_to_toml: str | Path) > dict[str, typing.Any]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_toml_config(
path_to_toml: str | Path,
) -&gt; dict[str, Any]:
with open(path_to_toml, &#39;rb&#39;) as f:
data = tomllib.load(f)
print(&#39;Loaded TOML config file successfully.&#39;, flush=True)
return data</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.config.get_config_paths" href="#lang_main.config.get_config_paths">get_config_paths</a></code></li>
<li><code><a title="lang_main.config.load_cfg" href="#lang_main.config.load_cfg">load_cfg</a></code></li>
<li><code><a title="lang_main.config.load_toml_config" href="#lang_main.config.load_toml_config">load_toml_config</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,66 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.constants API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.constants</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

330
docs/lang_main/errors.html Normal file
View File

@ -0,0 +1,330 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.errors API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.errors</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-classes">Classes</h2>
<dl>
<dt id="lang_main.errors.DependencyMissingError"><code class="flex name class">
<span>class <span class="ident">DependencyMissingError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class DependencyMissingError(Exception):
&#34;&#34;&#34;Error raised if needed dependency could not be found&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if needed dependency could not be found</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.EdgePropertyNotContainedError"><code class="flex name class">
<span>class <span class="ident">EdgePropertyNotContainedError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class EdgePropertyNotContainedError(Exception):
&#34;&#34;&#34;Error raised if a needed edge property is not contained in graph edges&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if a needed edge property is not contained in graph edges</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.EmptyEdgesError"><code class="flex name class">
<span>class <span class="ident">EmptyEdgesError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class EmptyEdgesError(EmptyGraphError):
&#34;&#34;&#34;Error raised if action should be performed on a graph&#39;s edges, but
it does not contain any&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if action should be performed on a graph's edges, but
it does not contain any</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li><a title="lang_main.errors.EmptyGraphError" href="#lang_main.errors.EmptyGraphError">EmptyGraphError</a></li>
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.EmptyGraphError"><code class="flex name class">
<span>class <span class="ident">EmptyGraphError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class EmptyGraphError(Exception):
&#34;&#34;&#34;Error raised if an operation should be performed on the graph,
but it does not contain any nodes or edges&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if an operation should be performed on the graph,
but it does not contain any nodes or edges</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
<h3>Subclasses</h3>
<ul class="hlist">
<li><a title="lang_main.errors.EmptyEdgesError" href="#lang_main.errors.EmptyEdgesError">EmptyEdgesError</a></li>
</ul>
</dd>
<dt id="lang_main.errors.GraphRenderError"><code class="flex name class">
<span>class <span class="ident">GraphRenderError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class GraphRenderError(Exception):
&#34;&#34;&#34;Error raised if a graph object can not be rendered&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if a graph object can not be rendered</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.LangMainConfigNotFoundError"><code class="flex name class">
<span>class <span class="ident">LangMainConfigNotFoundError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class LangMainConfigNotFoundError(Exception):
&#34;&#34;&#34;Error raised if a config file could not be found successfully&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if a config file could not be found successfully</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.LanguageModelNotFoundError"><code class="flex name class">
<span>class <span class="ident">LanguageModelNotFoundError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class LanguageModelNotFoundError(Exception):
&#34;&#34;&#34;Error raised if a given language model could not be loaded successfully&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if a given language model could not be loaded successfully</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.NoPerformableActionError"><code class="flex name class">
<span>class <span class="ident">NoPerformableActionError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class NoPerformableActionError(Exception):
&#34;&#34;&#34;Error describing that no action is available in the current pipeline&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error describing that no action is available in the current pipeline</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.NodePropertyNotContainedError"><code class="flex name class">
<span>class <span class="ident">NodePropertyNotContainedError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class NodePropertyNotContainedError(Exception):
&#34;&#34;&#34;Error raised if a needed node property is not contained in graph edges&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if a needed node property is not contained in graph edges</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.OutputInPipelineContainerError"><code class="flex name class">
<span>class <span class="ident">OutputInPipelineContainerError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class OutputInPipelineContainerError(Exception):
&#34;&#34;&#34;Error raised if an output was detected by one of the performed
actions in a PipelineContainer. Each action in a PipelineContainer is itself a
procedure which does not have any parameters or return values and should therefore not
return any values.&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if an output was detected by one of the performed
actions in a PipelineContainer. Each action in a PipelineContainer is itself a
procedure which does not have any parameters or return values and should therefore not
return any values.</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
<dt id="lang_main.errors.WrongActionTypeError"><code class="flex name class">
<span>class <span class="ident">WrongActionTypeError</span></span>
<span>(</span><span>*args, **kwargs)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class WrongActionTypeError(Exception):
&#34;&#34;&#34;Error raised if added action type is not supported by corresponding pipeline&#34;&#34;&#34;</code></pre>
</details>
<div class="desc"><p>Error raised if added action type is not supported by corresponding pipeline</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>builtins.Exception</li>
<li>builtins.BaseException</li>
</ul>
</dd>
</dl>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-classes">Classes</a></h3>
<ul>
<li>
<h4><code><a title="lang_main.errors.DependencyMissingError" href="#lang_main.errors.DependencyMissingError">DependencyMissingError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.EdgePropertyNotContainedError" href="#lang_main.errors.EdgePropertyNotContainedError">EdgePropertyNotContainedError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.EmptyEdgesError" href="#lang_main.errors.EmptyEdgesError">EmptyEdgesError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.EmptyGraphError" href="#lang_main.errors.EmptyGraphError">EmptyGraphError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.GraphRenderError" href="#lang_main.errors.GraphRenderError">GraphRenderError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.LangMainConfigNotFoundError" href="#lang_main.errors.LangMainConfigNotFoundError">LangMainConfigNotFoundError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.LanguageModelNotFoundError" href="#lang_main.errors.LanguageModelNotFoundError">LanguageModelNotFoundError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.NoPerformableActionError" href="#lang_main.errors.NoPerformableActionError">NoPerformableActionError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.NodePropertyNotContainedError" href="#lang_main.errors.NodePropertyNotContainedError">NodePropertyNotContainedError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.OutputInPipelineContainerError" href="#lang_main.errors.OutputInPipelineContainerError">OutputInPipelineContainerError</a></code></h4>
</li>
<li>
<h4><code><a title="lang_main.errors.WrongActionTypeError" href="#lang_main.errors.WrongActionTypeError">WrongActionTypeError</a></code></h4>
</li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

123
docs/lang_main/index.html Normal file
View File

@ -0,0 +1,123 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Package <code>lang_main</code></h1>
</header>
<section id="section-intro">
</section>
<section>
<h2 class="section-title" id="header-submodules">Sub-modules</h2>
<dl>
<dt><code class="name"><a title="lang_main.analysis" href="analysis/index.html">lang_main.analysis</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.config" href="config.html">lang_main.config</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.constants" href="constants.html">lang_main.constants</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.errors" href="errors.html">lang_main.errors</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.io" href="io.html">lang_main.io</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.loggers" href="loggers.html">lang_main.loggers</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.model_loader" href="model_loader.html">lang_main.model_loader</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.pipelines" href="pipelines/index.html">lang_main.pipelines</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.render" href="render/index.html">lang_main.render</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.search" href="search.html">lang_main.search</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.types" href="types.html">lang_main.types</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3><a href="#header-submodules">Sub-modules</a></h3>
<ul>
<li><code><a title="lang_main.analysis" href="analysis/index.html">lang_main.analysis</a></code></li>
<li><code><a title="lang_main.config" href="config.html">lang_main.config</a></code></li>
<li><code><a title="lang_main.constants" href="constants.html">lang_main.constants</a></code></li>
<li><code><a title="lang_main.errors" href="errors.html">lang_main.errors</a></code></li>
<li><code><a title="lang_main.io" href="io.html">lang_main.io</a></code></li>
<li><code><a title="lang_main.loggers" href="loggers.html">lang_main.loggers</a></code></li>
<li><code><a title="lang_main.model_loader" href="model_loader.html">lang_main.model_loader</a></code></li>
<li><code><a title="lang_main.pipelines" href="pipelines/index.html">lang_main.pipelines</a></code></li>
<li><code><a title="lang_main.render" href="render/index.html">lang_main.render</a></code></li>
<li><code><a title="lang_main.search" href="search.html">lang_main.search</a></code></li>
<li><code><a title="lang_main.types" href="types.html">lang_main.types</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

227
docs/lang_main/io.html Normal file
View File

@ -0,0 +1,227 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.io API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.io</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.io.create_saving_folder"><code class="name flex">
<span>def <span class="ident">create_saving_folder</span></span>(<span>saving_path_folder: str | pathlib.Path, overwrite_existing: bool = False) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def create_saving_folder(
saving_path_folder: str | Path,
overwrite_existing: bool = False,
) -&gt; None:
# check for existence of given path
if isinstance(saving_path_folder, str):
saving_path_folder = Path(saving_path_folder)
if not saving_path_folder.exists():
saving_path_folder.mkdir(parents=True)
else:
if overwrite_existing:
# overwrite if desired (deletes whole path and re-creates it)
shutil.rmtree(saving_path_folder)
saving_path_folder.mkdir(parents=True)
else:
logger.info(
(
&#39;Path &gt;&gt;%s&lt;&lt; already exists and remained unchanged. If you want to &#39;
&#39;overwrite this path, use parameter &gt;&gt;overwrite_existing&lt;&lt;.&#39;,
),
saving_path_folder,
)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.io.decode_from_base64_str"><code class="name flex">
<span>def <span class="ident">decode_from_base64_str</span></span>(<span>b64_str: str, encoding: str = 'utf-8') > Any</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def decode_from_base64_str(
b64_str: str,
encoding: str = &#39;utf-8&#39;,
) -&gt; Any:
b64_bytes = b64_str.encode(encoding=encoding)
decoded = base64.b64decode(b64_bytes)
return pickle.loads(decoded)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.io.encode_file_to_base64_str"><code class="name flex">
<span>def <span class="ident">encode_file_to_base64_str</span></span>(<span>path: pathlib.Path, encoding: str = 'utf-8') > str</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def encode_file_to_base64_str(
path: Path,
encoding: str = &#39;utf-8&#39;,
) -&gt; str:
with open(path, &#39;rb&#39;) as file:
b64_bytes = base64.b64encode(file.read())
return b64_bytes.decode(encoding=encoding)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.io.encode_to_base64_str"><code class="name flex">
<span>def <span class="ident">encode_to_base64_str</span></span>(<span>obj: Any, encoding: str = 'utf-8') > str</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def encode_to_base64_str(
obj: Any,
encoding: str = &#39;utf-8&#39;,
) -&gt; str:
serialised = pickle.dumps(obj, protocol=PICKLE_PROTOCOL_VERSION)
b64_bytes = base64.b64encode(serialised)
return b64_bytes.decode(encoding=encoding)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.io.get_entry_point"><code class="name flex">
<span>def <span class="ident">get_entry_point</span></span>(<span>saving_path: pathlib.Path,<br>filename: str,<br>file_ext: str = '.pkl',<br>check_existence: bool = True) > pathlib.Path</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def get_entry_point(
saving_path: Path,
filename: str,
file_ext: str = &#39;.pkl&#39;,
check_existence: bool = True,
) -&gt; Path:
entry_point_path = (saving_path / filename).with_suffix(file_ext)
if check_existence and not entry_point_path.exists():
raise FileNotFoundError(
f&#39;Could not find provided entry data under path: &gt;&gt;{entry_point_path}&lt;&lt;&#39;
)
return entry_point_path</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.io.load_pickle"><code class="name flex">
<span>def <span class="ident">load_pickle</span></span>(<span>path: str | pathlib.Path) > Any</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_pickle(
path: str | Path,
) -&gt; Any:
with open(path, &#39;rb&#39;) as file:
obj = pickle.load(file)
logger.info(&#39;Loaded file successfully.&#39;)
return obj</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.io.save_pickle"><code class="name flex">
<span>def <span class="ident">save_pickle</span></span>(<span>obj: Any, path: str | pathlib.Path) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def save_pickle(
obj: Any,
path: str | Path,
) -&gt; None:
with open(path, &#39;wb&#39;) as file:
pickle.dump(obj, file, protocol=PICKLE_PROTOCOL_VERSION)
logger.info(&#39;Saved file successfully under %s&#39;, path)</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.io.create_saving_folder" href="#lang_main.io.create_saving_folder">create_saving_folder</a></code></li>
<li><code><a title="lang_main.io.decode_from_base64_str" href="#lang_main.io.decode_from_base64_str">decode_from_base64_str</a></code></li>
<li><code><a title="lang_main.io.encode_file_to_base64_str" href="#lang_main.io.encode_file_to_base64_str">encode_file_to_base64_str</a></code></li>
<li><code><a title="lang_main.io.encode_to_base64_str" href="#lang_main.io.encode_to_base64_str">encode_to_base64_str</a></code></li>
<li><code><a title="lang_main.io.get_entry_point" href="#lang_main.io.get_entry_point">get_entry_point</a></code></li>
<li><code><a title="lang_main.io.load_pickle" href="#lang_main.io.load_pickle">load_pickle</a></code></li>
<li><code><a title="lang_main.io.save_pickle" href="#lang_main.io.save_pickle">save_pickle</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,66 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.loggers API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.loggers</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,162 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.model_loader API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.model_loader</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.model_loader.instantiate_model"><code class="name flex">
<span>def <span class="ident">instantiate_model</span></span>(<span>model_load_map: ModelLoaderMap, model: LanguageModels) > sentence_transformers.SentenceTransformer.SentenceTransformer | spacy.language.Language</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def instantiate_model(
model_load_map: ModelLoaderMap,
model: LanguageModels,
) -&gt; Model:
if model not in model_load_map:
raise KeyError(f&#39;Model &gt;&gt;{model}&lt;&lt; not known. Choose from: {model_load_map.keys()}&#39;)
builder_func = model_load_map[model][&#39;func&#39;]
func_kwargs = model_load_map[model][&#39;kwargs&#39;]
return builder_func(**func_kwargs)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.model_loader.load_sentence_transformer"><code class="name flex">
<span>def <span class="ident">load_sentence_transformer</span></span>(<span>model_name: STFRModelTypes | str,<br>similarity_func: SimilarityFunction = SimilarityFunction.COSINE,<br>backend: STFRBackends = torch,<br>device: STFRDeviceTypes = cpu,<br>local_files_only: bool = True,<br>trust_remote_code: bool = False,<br>model_save_folder: str | None = None,<br>model_kwargs: STFRModelArgs | dict[str, Any] | None = None,<br>force_download: bool = False) > sentence_transformers.SentenceTransformer.SentenceTransformer</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_sentence_transformer(
model_name: STFRModelTypes | str,
similarity_func: SimilarityFunction = SimilarityFunction.COSINE,
backend: STFRBackends = STFRBackends.TORCH,
device: STFRDeviceTypes = STFRDeviceTypes.CPU,
local_files_only: bool = True,
trust_remote_code: bool = False,
model_save_folder: str | None = None,
model_kwargs: STFRModelArgs | dict[str, Any] | None = None,
force_download: bool = False,
) -&gt; SentenceTransformer:
model_name_or_path = _preprocess_STFR_model_name(
model_name=model_name, backend=backend, force_download=force_download
)
model = SentenceTransformer(
model_name_or_path=model_name_or_path,
similarity_fn_name=similarity_func,
backend=backend, # type: ignore Literal matches Enum
device=device,
cache_folder=model_save_folder,
local_files_only=local_files_only,
trust_remote_code=trust_remote_code,
model_kwargs=model_kwargs, # type: ignore
)
logger.info(&#39;[MODEL LOADING] Loaded model &gt;&gt;%s&lt;&lt; successfully&#39;, model_name)
return model</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.model_loader.load_spacy"><code class="name flex">
<span>def <span class="ident">load_spacy</span></span>(<span>model_name: str) > spacy.language.Language</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_spacy(
model_name: str,
) -&gt; SpacyModel:
try:
spacy_model_obj = importlib.import_module(model_name)
except ModuleNotFoundError:
raise LanguageModelNotFoundError(
(
f&#39;Could not find spaCy model &gt;&gt;{model_name}&lt;&lt;. &#39;
f&#39;Check if it is installed correctly.&#39;
)
)
pretrained_model = cast(SpacyModel, spacy_model_obj.load())
logger.info(&#39;[MODEL LOADING] Loaded model &gt;&gt;%s&lt;&lt; successfully&#39;, model_name)
return pretrained_model</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.model_loader.instantiate_model" href="#lang_main.model_loader.instantiate_model">instantiate_model</a></code></li>
<li><code><a title="lang_main.model_loader.load_sentence_transformer" href="#lang_main.model_loader.load_sentence_transformer">load_sentence_transformer</a></code></li>
<li><code><a title="lang_main.model_loader.load_spacy" href="#lang_main.model_loader.load_spacy">load_spacy</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,755 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.pipelines.base API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.pipelines.base</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-classes">Classes</h2>
<dl>
<dt id="lang_main.pipelines.base.BasePipeline"><code class="flex name class">
<span>class <span class="ident">BasePipeline</span></span>
<span>(</span><span>name: str, working_dir: Path)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class BasePipeline(ABC):
def __init__(
self,
name: str,
working_dir: Path,
) -&gt; None:
# init base class
super().__init__()
# name of pipeline
self.name = name
# working directory for pipeline == output path
self.working_dir = working_dir
# container for actions to perform during pass
self.actions: list[Callable] = []
self.action_names: list[str] = []
self.action_skip: list[bool] = []
# progress tracking, start at 1
self.curr_proc_idx: int = 1
def __repr__(self) -&gt; str:
return (
f&#39;{self.__class__.__name__}(name: {self.name}, &#39;
f&#39;working dir: {self.working_dir}, contents: {self.action_names})&#39;
)
def panic_wrong_action_type(
self,
action: Any,
compatible_type: str,
) -&gt; Never:
raise WrongActionTypeError(
(
f&#39;Action must be of type {compatible_type}, &#39;
f&#39;but is of type &gt;&gt;{type(action)}&lt;&lt;.&#39;
)
)
def prep_run(self) -&gt; None:
logger.info(&#39;Starting pipeline &gt;&gt;%s&lt;&lt;...&#39;, self.name)
# progress tracking
self.curr_proc_idx = 1
# check if performable actions available
if len(self.actions) == 0:
raise NoPerformableActionError(
&#39;The pipeline does not contain any performable actions.&#39;
)
def post_run(self) -&gt; None:
logger.info(
&#39;Processing pipeline &gt;&gt;%s&lt;&lt; successfully ended after %d steps.&#39;,
self.name,
(self.curr_proc_idx - 1),
)
@abstractmethod
def add(self) -&gt; None: ...
@abstractmethod
def logic(self) -&gt; None: ...
def run(self, *args, **kwargs) -&gt; Any:
self.prep_run()
ret = self.logic(*args, **kwargs)
self.post_run()
return ret</code></pre>
</details>
<div class="desc"><p>Helper class that provides a standard way to create an ABC using
inheritance.</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>abc.ABC</li>
</ul>
<h3>Subclasses</h3>
<ul class="hlist">
<li><a title="lang_main.pipelines.base.Pipeline" href="#lang_main.pipelines.base.Pipeline">Pipeline</a></li>
<li><a title="lang_main.pipelines.base.PipelineContainer" href="#lang_main.pipelines.base.PipelineContainer">PipelineContainer</a></li>
</ul>
<h3>Methods</h3>
<dl>
<dt id="lang_main.pipelines.base.BasePipeline.add"><code class="name flex">
<span>def <span class="ident">add</span></span>(<span>self) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@abstractmethod
def add(self) -&gt; None: ...</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.BasePipeline.logic"><code class="name flex">
<span>def <span class="ident">logic</span></span>(<span>self) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@abstractmethod
def logic(self) -&gt; None: ...</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.BasePipeline.panic_wrong_action_type"><code class="name flex">
<span>def <span class="ident">panic_wrong_action_type</span></span>(<span>self, action: Any, compatible_type: str) > Never</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def panic_wrong_action_type(
self,
action: Any,
compatible_type: str,
) -&gt; Never:
raise WrongActionTypeError(
(
f&#39;Action must be of type {compatible_type}, &#39;
f&#39;but is of type &gt;&gt;{type(action)}&lt;&lt;.&#39;
)
)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.BasePipeline.post_run"><code class="name flex">
<span>def <span class="ident">post_run</span></span>(<span>self) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def post_run(self) -&gt; None:
logger.info(
&#39;Processing pipeline &gt;&gt;%s&lt;&lt; successfully ended after %d steps.&#39;,
self.name,
(self.curr_proc_idx - 1),
)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.BasePipeline.prep_run"><code class="name flex">
<span>def <span class="ident">prep_run</span></span>(<span>self) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def prep_run(self) -&gt; None:
logger.info(&#39;Starting pipeline &gt;&gt;%s&lt;&lt;...&#39;, self.name)
# progress tracking
self.curr_proc_idx = 1
# check if performable actions available
if len(self.actions) == 0:
raise NoPerformableActionError(
&#39;The pipeline does not contain any performable actions.&#39;
)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.BasePipeline.run"><code class="name flex">
<span>def <span class="ident">run</span></span>(<span>self, *args, **kwargs) > Any</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def run(self, *args, **kwargs) -&gt; Any:
self.prep_run()
ret = self.logic(*args, **kwargs)
self.post_run()
return ret</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</dd>
<dt id="lang_main.pipelines.base.Pipeline"><code class="flex name class">
<span>class <span class="ident">Pipeline</span></span>
<span>(</span><span>name: str, working_dir: Path)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class Pipeline(BasePipeline):
def __init__(
self,
name: str,
working_dir: Path,
) -&gt; None:
# init base class
super().__init__(name=name, working_dir=working_dir)
# name of pipeline
self.name = name
# working directory for pipeline == output path
self.working_dir = working_dir
# container for actions to perform during pass
self.actions_kwargs: list[dict[str, Any]] = []
self.save_results: ResultHandling = []
self.load_results: ResultHandling = []
# intermediate result
self._intermediate_result: tuple[Any, ...] | None = None
def __repr__(self) -&gt; str:
return (
f&#39;{self.__class__.__name__}(name: {self.name}, &#39;
f&#39;working dir: {self.working_dir}, contents: {self.action_names})&#39;
)
@override
def add(
self,
action: Callable,
action_kwargs: dict[str, Any] | None = None,
skip: bool = False,
save_result: bool = False,
load_result: bool = False,
filename: str | None = None,
) -&gt; None:
# check explicitly for function type
# if isinstance(action, FunctionType):
if action_kwargs is None:
action_kwargs = {}
if isinstance(action, Callable):
self.actions.append(action)
self.action_names.append(action.__name__)
self.actions_kwargs.append(action_kwargs.copy())
self.action_skip.append(skip)
self.save_results.append((save_result, filename))
self.load_results.append((load_result, filename))
else:
self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)
def get_result_path(
self,
action_idx: int,
filename: str | None,
) -&gt; tuple[Path, str]:
action_name = self.action_names[action_idx]
if filename is None:
target_filename = f&#39;Pipe-{self.name}_Step-{self.curr_proc_idx}_{action_name}&#39;
else:
target_filename = filename
target_path = self.working_dir.joinpath(target_filename).with_suffix(&#39;.pkl&#39;)
return target_path, action_name
def load_step(
self,
action_idx: int,
filename: str | None,
) -&gt; tuple[Any, ...]:
target_path, action_name = self.get_result_path(action_idx, filename)
if not target_path.exists():
raise FileNotFoundError(
(
f&#39;No intermediate results for action &gt;&gt;{action_name}&lt;&lt; &#39;
f&#39;under &gt;&gt;{target_path}&lt;&lt; found&#39;
)
)
# results should be tuple, but that is not guaranteed
result_loaded = cast(tuple[Any, ...], load_pickle(target_path))
if not isinstance(result_loaded, tuple):
raise TypeError(f&#39;Loaded results must be tuple, not {type(result_loaded)}&#39;)
return result_loaded
def save_step(
self,
action_idx: int,
filename: str | None,
) -&gt; None:
target_path, _ = self.get_result_path(action_idx, filename)
save_pickle(obj=self._intermediate_result, path=target_path)
@override
def logic(
self,
starting_values: tuple[Any, ...] | None = None,
) -&gt; tuple[Any, ...]:
first_performed: bool = False
for idx, (action, action_kwargs) in enumerate(zip(self.actions, self.actions_kwargs)):
if self.action_skip[idx]:
self.curr_proc_idx += 1
continue
# loading
if self.load_results[idx][0]:
filename = self.load_results[idx][1]
ret = self.load_step(action_idx=idx, filename=filename)
self._intermediate_result = ret
logger.info(
&#39;[No Calculation] Loaded result for action &gt;&gt;%s&lt;&lt; successfully&#39;,
self.action_names[idx],
)
self.curr_proc_idx += 1
continue
# calculation
if not first_performed:
args = starting_values
first_performed = True
else:
args = ret
if args is not None:
ret = action(*args, **action_kwargs)
else:
ret = action(**action_kwargs)
if ret is not None and not isinstance(ret, tuple):
ret = (ret,)
ret = cast(tuple[Any, ...], ret)
# save intermediate result
self._intermediate_result = ret
# saving result locally, always save last action
if self.save_results[idx][0] or idx == (len(self.actions) - 1):
filename = self.save_results[idx][1]
self.save_step(action_idx=idx, filename=filename)
# processing tracking
self.curr_proc_idx += 1
return ret</code></pre>
</details>
<div class="desc"><p>Helper class that provides a standard way to create an ABC using
inheritance.</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li><a title="lang_main.pipelines.base.BasePipeline" href="#lang_main.pipelines.base.BasePipeline">BasePipeline</a></li>
<li>abc.ABC</li>
</ul>
<h3>Methods</h3>
<dl>
<dt id="lang_main.pipelines.base.Pipeline.add"><code class="name flex">
<span>def <span class="ident">add</span></span>(<span>self,<br>action: Callable,<br>action_kwargs: dict[str, Any] | None = None,<br>skip: bool = False,<br>save_result: bool = False,<br>load_result: bool = False,<br>filename: str | None = None) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@override
def add(
self,
action: Callable,
action_kwargs: dict[str, Any] | None = None,
skip: bool = False,
save_result: bool = False,
load_result: bool = False,
filename: str | None = None,
) -&gt; None:
# check explicitly for function type
# if isinstance(action, FunctionType):
if action_kwargs is None:
action_kwargs = {}
if isinstance(action, Callable):
self.actions.append(action)
self.action_names.append(action.__name__)
self.actions_kwargs.append(action_kwargs.copy())
self.action_skip.append(skip)
self.save_results.append((save_result, filename))
self.load_results.append((load_result, filename))
else:
self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.Pipeline.get_result_path"><code class="name flex">
<span>def <span class="ident">get_result_path</span></span>(<span>self, action_idx: int, filename: str | None) > tuple[pathlib.Path, str]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def get_result_path(
self,
action_idx: int,
filename: str | None,
) -&gt; tuple[Path, str]:
action_name = self.action_names[action_idx]
if filename is None:
target_filename = f&#39;Pipe-{self.name}_Step-{self.curr_proc_idx}_{action_name}&#39;
else:
target_filename = filename
target_path = self.working_dir.joinpath(target_filename).with_suffix(&#39;.pkl&#39;)
return target_path, action_name</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.Pipeline.load_step"><code class="name flex">
<span>def <span class="ident">load_step</span></span>(<span>self, action_idx: int, filename: str | None) > tuple[typing.Any, ...]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def load_step(
self,
action_idx: int,
filename: str | None,
) -&gt; tuple[Any, ...]:
target_path, action_name = self.get_result_path(action_idx, filename)
if not target_path.exists():
raise FileNotFoundError(
(
f&#39;No intermediate results for action &gt;&gt;{action_name}&lt;&lt; &#39;
f&#39;under &gt;&gt;{target_path}&lt;&lt; found&#39;
)
)
# results should be tuple, but that is not guaranteed
result_loaded = cast(tuple[Any, ...], load_pickle(target_path))
if not isinstance(result_loaded, tuple):
raise TypeError(f&#39;Loaded results must be tuple, not {type(result_loaded)}&#39;)
return result_loaded</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.Pipeline.logic"><code class="name flex">
<span>def <span class="ident">logic</span></span>(<span>self, starting_values: tuple[Any, ...] | None = None) > tuple[typing.Any, ...]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@override
def logic(
self,
starting_values: tuple[Any, ...] | None = None,
) -&gt; tuple[Any, ...]:
first_performed: bool = False
for idx, (action, action_kwargs) in enumerate(zip(self.actions, self.actions_kwargs)):
if self.action_skip[idx]:
self.curr_proc_idx += 1
continue
# loading
if self.load_results[idx][0]:
filename = self.load_results[idx][1]
ret = self.load_step(action_idx=idx, filename=filename)
self._intermediate_result = ret
logger.info(
&#39;[No Calculation] Loaded result for action &gt;&gt;%s&lt;&lt; successfully&#39;,
self.action_names[idx],
)
self.curr_proc_idx += 1
continue
# calculation
if not first_performed:
args = starting_values
first_performed = True
else:
args = ret
if args is not None:
ret = action(*args, **action_kwargs)
else:
ret = action(**action_kwargs)
if ret is not None and not isinstance(ret, tuple):
ret = (ret,)
ret = cast(tuple[Any, ...], ret)
# save intermediate result
self._intermediate_result = ret
# saving result locally, always save last action
if self.save_results[idx][0] or idx == (len(self.actions) - 1):
filename = self.save_results[idx][1]
self.save_step(action_idx=idx, filename=filename)
# processing tracking
self.curr_proc_idx += 1
return ret</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.Pipeline.save_step"><code class="name flex">
<span>def <span class="ident">save_step</span></span>(<span>self, action_idx: int, filename: str | None) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def save_step(
self,
action_idx: int,
filename: str | None,
) -&gt; None:
target_path, _ = self.get_result_path(action_idx, filename)
save_pickle(obj=self._intermediate_result, path=target_path)</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</dd>
<dt id="lang_main.pipelines.base.PipelineContainer"><code class="flex name class">
<span>class <span class="ident">PipelineContainer</span></span>
<span>(</span><span>name: str, working_dir: Path)</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class PipelineContainer(BasePipeline):
&#34;&#34;&#34;Container class for basic actions.
Basic actions are usually functions, which do not take any parameters
and return nothing. Indeed, if an action returns any values after its
procedure is finished, an error is raised. Therefore, PipelineContainers
can be seen as a concatenation of many (independent) simple procedures
which are executed in the order in which they were added to the pipe.
With a simple call of the ``run`` method the actions are performed.
Additionally, there is an option to skip actions which can be set in
the ``add`` method. This allows for easily configurable pipelines,
e.g., via a user configuration.
&#34;&#34;&#34;
def __init__(
self,
name: str,
working_dir: Path,
) -&gt; None:
super().__init__(name=name, working_dir=working_dir)
@override
def add(
self,
action: Callable,
skip: bool = False,
) -&gt; None:
if isinstance(action, Callable):
self.actions.append(action)
self.action_names.append(action.__name__)
self.action_skip.append(skip)
else:
self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)
@override
def logic(self) -&gt; None:
for idx, (action, action_name) in enumerate(zip(self.actions, self.action_names)):
# loading
if self.action_skip[idx]:
logger.info(&#39;[No Calculation] Skipping &gt;&gt;%s&lt;&lt;...&#39;, action_name)
self.curr_proc_idx += 1
continue
# calculation
ret = action()
if ret is not None:
raise OutputInPipelineContainerError(
(
f&#39;Output in PipelineContainers not allowed. Action {action_name} &#39;
f&#39;returned values in Container {self.name}.&#39;
)
)
# processing tracking
self.curr_proc_idx += 1</code></pre>
</details>
<div class="desc"><p>Container class for basic actions.
Basic actions are usually functions, which do not take any parameters
and return nothing. Indeed, if an action returns any values after its
procedure is finished, an error is raised. Therefore, PipelineContainers
can be seen as a concatenation of many (independent) simple procedures
which are executed in the order in which they were added to the pipe.
With a simple call of the <code>run</code> method the actions are performed.
Additionally, there is an option to skip actions which can be set in
the <code>add</code> method. This allows for easily configurable pipelines,
e.g., via a user configuration.</p></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li><a title="lang_main.pipelines.base.BasePipeline" href="#lang_main.pipelines.base.BasePipeline">BasePipeline</a></li>
<li>abc.ABC</li>
</ul>
<h3>Methods</h3>
<dl>
<dt id="lang_main.pipelines.base.PipelineContainer.add"><code class="name flex">
<span>def <span class="ident">add</span></span>(<span>self, action: Callable, skip: bool = False) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@override
def add(
self,
action: Callable,
skip: bool = False,
) -&gt; None:
if isinstance(action, Callable):
self.actions.append(action)
self.action_names.append(action.__name__)
self.action_skip.append(skip)
else:
self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.base.PipelineContainer.logic"><code class="name flex">
<span>def <span class="ident">logic</span></span>(<span>self) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@override
def logic(self) -&gt; None:
for idx, (action, action_name) in enumerate(zip(self.actions, self.action_names)):
# loading
if self.action_skip[idx]:
logger.info(&#39;[No Calculation] Skipping &gt;&gt;%s&lt;&lt;...&#39;, action_name)
self.curr_proc_idx += 1
continue
# calculation
ret = action()
if ret is not None:
raise OutputInPipelineContainerError(
(
f&#39;Output in PipelineContainers not allowed. Action {action_name} &#39;
f&#39;returned values in Container {self.name}.&#39;
)
)
# processing tracking
self.curr_proc_idx += 1</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</dd>
</dl>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.pipelines" href="index.html">lang_main.pipelines</a></code></li>
</ul>
</li>
<li><h3><a href="#header-classes">Classes</a></h3>
<ul>
<li>
<h4><code><a title="lang_main.pipelines.base.BasePipeline" href="#lang_main.pipelines.base.BasePipeline">BasePipeline</a></code></h4>
<ul class="">
<li><code><a title="lang_main.pipelines.base.BasePipeline.add" href="#lang_main.pipelines.base.BasePipeline.add">add</a></code></li>
<li><code><a title="lang_main.pipelines.base.BasePipeline.logic" href="#lang_main.pipelines.base.BasePipeline.logic">logic</a></code></li>
<li><code><a title="lang_main.pipelines.base.BasePipeline.panic_wrong_action_type" href="#lang_main.pipelines.base.BasePipeline.panic_wrong_action_type">panic_wrong_action_type</a></code></li>
<li><code><a title="lang_main.pipelines.base.BasePipeline.post_run" href="#lang_main.pipelines.base.BasePipeline.post_run">post_run</a></code></li>
<li><code><a title="lang_main.pipelines.base.BasePipeline.prep_run" href="#lang_main.pipelines.base.BasePipeline.prep_run">prep_run</a></code></li>
<li><code><a title="lang_main.pipelines.base.BasePipeline.run" href="#lang_main.pipelines.base.BasePipeline.run">run</a></code></li>
</ul>
</li>
<li>
<h4><code><a title="lang_main.pipelines.base.Pipeline" href="#lang_main.pipelines.base.Pipeline">Pipeline</a></code></h4>
<ul class="">
<li><code><a title="lang_main.pipelines.base.Pipeline.add" href="#lang_main.pipelines.base.Pipeline.add">add</a></code></li>
<li><code><a title="lang_main.pipelines.base.Pipeline.get_result_path" href="#lang_main.pipelines.base.Pipeline.get_result_path">get_result_path</a></code></li>
<li><code><a title="lang_main.pipelines.base.Pipeline.load_step" href="#lang_main.pipelines.base.Pipeline.load_step">load_step</a></code></li>
<li><code><a title="lang_main.pipelines.base.Pipeline.logic" href="#lang_main.pipelines.base.Pipeline.logic">logic</a></code></li>
<li><code><a title="lang_main.pipelines.base.Pipeline.save_step" href="#lang_main.pipelines.base.Pipeline.save_step">save_step</a></code></li>
</ul>
</li>
<li>
<h4><code><a title="lang_main.pipelines.base.PipelineContainer" href="#lang_main.pipelines.base.PipelineContainer">PipelineContainer</a></code></h4>
<ul class="">
<li><code><a title="lang_main.pipelines.base.PipelineContainer.add" href="#lang_main.pipelines.base.PipelineContainer.add">add</a></code></li>
<li><code><a title="lang_main.pipelines.base.PipelineContainer.logic" href="#lang_main.pipelines.base.PipelineContainer.logic">logic</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,83 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.pipelines API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.pipelines</code></h1>
</header>
<section id="section-intro">
</section>
<section>
<h2 class="section-title" id="header-submodules">Sub-modules</h2>
<dl>
<dt><code class="name"><a title="lang_main.pipelines.base" href="base.html">lang_main.pipelines.base</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.pipelines.predefined" href="predefined.html">lang_main.pipelines.predefined</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="../index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-submodules">Sub-modules</a></h3>
<ul>
<li><code><a title="lang_main.pipelines.base" href="base.html">lang_main.pipelines.base</a></code></li>
<li><code><a title="lang_main.pipelines.predefined" href="predefined.html">lang_main.pipelines.predefined</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,386 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.pipelines.predefined API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.pipelines.predefined</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.pipelines.predefined.build_base_target_feature_pipe"><code class="name flex">
<span>def <span class="ident">build_base_target_feature_pipe</span></span>(<span>) > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_base_target_feature_pipe() -&gt; Pipeline:
pipe_target_feat = Pipeline(name=&#39;Target_Feature&#39;, working_dir=SAVE_PATH_FOLDER)
pipe_target_feat.add(
load_raw_data,
{
&#39;date_cols&#39;: DATE_COLS,
},
)
pipe_target_feat.add(remove_duplicates)
pipe_target_feat.add(remove_NA, save_result=True)
pipe_target_feat.add(
entry_wise_cleansing,
{
&#39;target_features&#39;: (TARGET_FEATURE,),
&#39;cleansing_func&#39;: clean_string_slim,
},
save_result=True,
filename=EntryPoints.TIMELINE,
)
pipe_target_feat.add(
analyse_feature,
{
&#39;target_feature&#39;: TARGET_FEATURE,
},
save_result=True,
)
return pipe_target_feat</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.predefined.build_merge_duplicates_pipe"><code class="name flex">
<span>def <span class="ident">build_merge_duplicates_pipe</span></span>(<span>) > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_merge_duplicates_pipe() -&gt; Pipeline:
pipe_merge = Pipeline(name=&#39;Merge_Duplicates&#39;, working_dir=SAVE_PATH_FOLDER)
pipe_merge.add(
numeric_pre_filter_feature,
{
&#39;feature&#39;: &#39;len&#39;,
&#39;bound_lower&#39;: THRESHOLD_AMOUNT_CHARACTERS,
&#39;bound_upper&#39;: None,
},
)
pipe_merge.add(
merge_similarity_duplicates,
{
&#39;model&#39;: STFR_MODEL,
&#39;cos_sim_threshold&#39;: THRESHOLD_SIMILARITY,
},
save_result=True,
filename=EntryPoints.TOKEN_ANALYSIS,
)
return pipe_merge</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.predefined.build_timeline_pipe"><code class="name flex">
<span>def <span class="ident">build_timeline_pipe</span></span>(<span>) > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_timeline_pipe() -&gt; Pipeline:
pipe_timeline = Pipeline(name=&#39;Timeline_Analysis&#39;, working_dir=SAVE_PATH_FOLDER)
pipe_timeline.add(
cleanup_descriptions,
{
&#39;properties&#39;: [&#39;ErledigungsBeschreibung&#39;],
},
)
pipe_timeline.add(
calc_delta_to_repair,
{
&#39;date_feature_start&#39;: &#39;ErstellungsDatum&#39;,
&#39;date_feature_end&#39;: &#39;ErledigungsDatum&#39;,
&#39;name_delta_feature&#39;: NAME_DELTA_FEAT_TO_REPAIR,
&#39;convert_to_days&#39;: True,
},
save_result=True,
filename=EntryPoints.TIMELINE_POST,
)
pipe_timeline.add(
remove_non_relevant_obj_ids,
{
&#39;thresh_unique_feat_per_id&#39;: THRESHOLD_UNIQUE_TEXTS,
&#39;feature_uniqueness&#39;: UNIQUE_CRITERION_FEATURE,
&#39;feature_obj_id&#39;: FEATURE_NAME_OBJ_ID,
},
save_result=True,
)
pipe_timeline.add(
generate_model_input,
{
&#39;target_feature_name&#39;: &#39;nlp_model_input&#39;,
&#39;model_input_features&#39;: MODEL_INPUT_FEATURES,
},
)
pipe_timeline.add(
filter_activities_per_obj_id,
{
&#39;activity_feature&#39;: ACTIVITY_FEATURE,
&#39;relevant_activity_types&#39;: ACTIVITY_TYPES,
&#39;feature_obj_id&#39;: FEATURE_NAME_OBJ_ID,
&#39;threshold_num_activities&#39;: THRESHOLD_NUM_ACTIVITIES,
},
)
pipe_timeline.add(
get_timeline_candidates,
{
&#39;model&#39;: STFR_MODEL,
&#39;cos_sim_threshold&#39;: THRESHOLD_TIMELINE_SIMILARITY,
&#39;feature_obj_id&#39;: FEATURE_NAME_OBJ_ID,
&#39;feature_obj_text&#39;: FEATURE_NAME_OBJ_TEXT,
&#39;model_input_feature&#39;: &#39;nlp_model_input&#39;,
},
save_result=True,
filename=EntryPoints.TIMELINE_CANDS,
)
return pipe_timeline</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.predefined.build_tk_graph_pipe"><code class="name flex">
<span>def <span class="ident">build_tk_graph_pipe</span></span>(<span>) > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_tk_graph_pipe() -&gt; Pipeline:
pipe_token_analysis = Pipeline(name=&#39;Token_Analysis&#39;, working_dir=SAVE_PATH_FOLDER)
pipe_token_analysis.add(
build_token_graph,
{
&#39;model&#39;: SPACY_MODEL,
&#39;target_feature&#39;: &#39;entry&#39;,
&#39;weights_feature&#39;: &#39;num_occur&#39;,
&#39;batch_idx_feature&#39;: &#39;batched_idxs&#39;,
&#39;build_map&#39;: False,
&#39;batch_size_model&#39;: 50,
},
save_result=True,
filename=EntryPoints.TK_GRAPH_POST,
)
return pipe_token_analysis</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.predefined.build_tk_graph_post_pipe"><code class="name flex">
<span>def <span class="ident">build_tk_graph_post_pipe</span></span>(<span>) > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_tk_graph_post_pipe() -&gt; Pipeline:
pipe_graph_postprocessing = Pipeline(
name=&#39;Graph_Postprocessing&#39;, working_dir=SAVE_PATH_FOLDER
)
pipe_graph_postprocessing.add(
graphs.filter_graph_by_number_edges,
{
&#39;limit&#39;: MAX_EDGE_NUMBER,
&#39;property&#39;: &#39;weight&#39;,
},
)
pipe_graph_postprocessing.add(
graphs.filter_graph_by_node_degree,
{
&#39;bound_lower&#39;: 1,
&#39;bound_upper&#39;: None,
},
)
pipe_graph_postprocessing.add(
graphs.static_graph_analysis,
save_result=True,
filename=EntryPoints.TK_GRAPH_ANALYSIS,
)
return pipe_graph_postprocessing</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.predefined.build_tk_graph_render_pipe"><code class="name flex">
<span>def <span class="ident">build_tk_graph_render_pipe</span></span>(<span>with_subgraphs: bool,<br>export_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out'),<br>base_network_name: str = 'token_graph') > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_tk_graph_render_pipe(
with_subgraphs: bool,
export_folder: Path = SAVE_PATH_FOLDER,
base_network_name: str = CYTO_BASE_NETWORK_NAME,
) -&gt; Pipeline:
# optional dependency: late import
# raises exception if necessary modules are not found
try:
from lang_main.render import cytoscape as cyto
except ImportError:
raise ImportError(
(
&#39;Dependencies for Cytoscape interaction not found.&#39;
&#39;Install package with optional dependencies.&#39;
)
)
pipe_graph_rendering = Pipeline(
name=&#39;Graph_Static-Rendering&#39;,
working_dir=SAVE_PATH_FOLDER,
)
pipe_graph_rendering.add(
cyto.import_to_cytoscape,
{
&#39;network_name&#39;: base_network_name,
},
)
pipe_graph_rendering.add(
cyto.layout_network,
{
&#39;network_name&#39;: base_network_name,
},
)
pipe_graph_rendering.add(
cyto.apply_style_to_network,
{
&#39;network_name&#39;: base_network_name,
},
)
pipe_graph_rendering.add(
cyto.export_network_to_image,
{
&#39;filename&#39;: base_network_name,
&#39;target_folder&#39;: export_folder,
&#39;network_name&#39;: base_network_name,
},
)
if with_subgraphs:
pipe_graph_rendering.add(
cyto.get_subgraph_node_selection,
{
&#39;network_name&#39;: base_network_name,
},
)
pipe_graph_rendering.add(
cyto.build_subnetworks,
{
&#39;export_image&#39;: True,
&#39;target_folder&#39;: export_folder,
&#39;network_name&#39;: base_network_name,
},
)
return pipe_graph_rendering</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.pipelines.predefined.build_tk_graph_rescaling_pipe"><code class="name flex">
<span>def <span class="ident">build_tk_graph_rescaling_pipe</span></span>(<span>save_result: bool, exit_point: lang_main.types.EntryPoints) > <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_tk_graph_rescaling_pipe(
save_result: bool,
exit_point: EntryPoints,
) -&gt; Pipeline:
pipe_graph_rescaling = Pipeline(name=&#39;Graph_Rescaling&#39;, working_dir=SAVE_PATH_FOLDER)
pipe_graph_rescaling.add(
graphs.pipe_rescale_graph_edge_weights,
)
pipe_graph_rescaling.add(
graphs.pipe_add_graph_metrics,
save_result=save_result,
filename=exit_point,
)
return pipe_graph_rescaling</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.pipelines" href="index.html">lang_main.pipelines</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.pipelines.predefined.build_base_target_feature_pipe" href="#lang_main.pipelines.predefined.build_base_target_feature_pipe">build_base_target_feature_pipe</a></code></li>
<li><code><a title="lang_main.pipelines.predefined.build_merge_duplicates_pipe" href="#lang_main.pipelines.predefined.build_merge_duplicates_pipe">build_merge_duplicates_pipe</a></code></li>
<li><code><a title="lang_main.pipelines.predefined.build_timeline_pipe" href="#lang_main.pipelines.predefined.build_timeline_pipe">build_timeline_pipe</a></code></li>
<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_pipe">build_tk_graph_pipe</a></code></li>
<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_post_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_post_pipe">build_tk_graph_post_pipe</a></code></li>
<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_render_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_render_pipe">build_tk_graph_render_pipe</a></code></li>
<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_rescaling_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_rescaling_pipe">build_tk_graph_rescaling_pipe</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,797 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.render.cytoscape API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.render.cytoscape</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.render.cytoscape.analyse_network"><code class="name flex">
<span>def <span class="ident">analyse_network</span></span>(<span>property_degree_weighted: str = 'degree_weighted',<br>network_name: str = 'token_graph') > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def analyse_network(
property_degree_weighted: str = PROPERTY_NAME_DEGREE_WEIGHTED,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -&gt; None:
node_table = p4c.get_table_columns(table=&#39;node&#39;, network=network_name)
net_analyse_possible: bool = True
if len(node_table) &lt; 4: # pragma: no cover
net_analyse_possible = False
if net_analyse_possible:
p4c.analyze_network(directed=False)
node_table = p4c.get_table_columns(table=&#39;node&#39;, network=network_name)
node_table[&#39;stress_norm&#39;] = node_table[&#39;Stress&#39;] / node_table[&#39;Stress&#39;].max()
node_table[CYTO_SELECTION_PROPERTY] = (
node_table[property_degree_weighted]
* node_table[&#39;BetweennessCentrality&#39;]
* node_table[&#39;stress_norm&#39;]
)
else: # pragma: no cover
node_table[CYTO_SELECTION_PROPERTY] = 1
p4c.load_table_data(node_table, data_key_column=&#39;name&#39;, network=network_name)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.render.cytoscape.apply_style_to_network"><code class="name flex">
<span>def <span class="ident">apply_style_to_network</span></span>(<span>style_name: str = 'lang_main',<br>pth_to_stylesheet: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-main/src/lang_main/cytoscape_config/lang_main.xml'),<br>network_name: str = 'token_graph',<br>node_size_property: str = 'node_selection',<br>min_node_size: int = 15,<br>max_node_size: int = 40,<br>sandbox_name: str = 'lang_main') > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def apply_style_to_network(
style_name: str = CYTO_STYLESHEET_NAME,
pth_to_stylesheet: Path = CYTO_PATH_STYLESHEET,
network_name: str = CYTO_BASE_NETWORK_NAME,
node_size_property: str = CYTO_SELECTION_PROPERTY,
min_node_size: int = 15,
max_node_size: int = 40,
sandbox_name: str = CYTO_SANDBOX_NAME,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: apply a chosen Cytoscape style to the defined network
Parameters
----------
style_name : str, optional
Cytoscape name of the style which should be applied,
by default CYTO_STYLESHEET_NAME
pth_to_stylesheet : Path, optional
path where the stylesheet definition in Cytoscape&#39;s XML format can
be found,
by default CYTO_PATH_STYLESHEET
network_name : str, optional
network to apply the style on, by default CYTO_BASE_NETWORK_NAME
Raises
------
FileNotFoundError
if provided stylesheet can not be found under the provided path
&#34;&#34;&#34;
logger.debug(&#39;Applying style to network...&#39;)
styles_avail = cast(list[str], p4c.get_visual_style_names())
logger.debug(&#39;Available styles: %s&#39;, styles_avail)
if style_name not in styles_avail:
if not pth_to_stylesheet.exists():
# existence for standard path verified at import, but not for other
# provided paths
raise FileNotFoundError(
f&#39;Visual stylesheet for Cytoscape not found under: &gt;&gt;{pth_to_stylesheet}&lt;&lt;&#39;
)
# send to sandbox
sandbox_filename = pth_to_stylesheet.name
p4c.sandbox_send_to(
source_file=pth_to_stylesheet,
dest_file=sandbox_filename,
overwrite=True,
sandbox_name=sandbox_name,
)
# load stylesheet
p4c.import_visual_styles(sandbox_filename)
p4c.set_visual_style(style_name, network=network_name)
# node size mapping, only if needed property is available
scheme = p4c.scheme_c_number_continuous(
start_value=min_node_size, end_value=max_node_size
)
node_size_map = p4c.gen_node_size_map(
node_size_property,
number_scheme=scheme,
mapping_type=&#39;c&#39;,
style_name=style_name,
default_number=min_node_size,
)
p4c.set_node_size_mapping(**node_size_map)
fit_content(network_name=network_name)
logger.debug(&#39;Style application to network successful.&#39;)</code></pre>
</details>
<div class="desc"><p>Cytoscape: apply a chosen Cytoscape style to the defined network</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>style_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>Cytoscape name of the style which should be applied,
by default CYTO_STYLESHEET_NAME</dd>
<dt><strong><code>pth_to_stylesheet</code></strong> :&ensp;<code>Path</code>, optional</dt>
<dd>path where the stylesheet definition in Cytoscape's XML format can
be found,
by default CYTO_PATH_STYLESHEET</dd>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network to apply the style on, by default CYTO_BASE_NETWORK_NAME</dd>
</dl>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>FileNotFoundError</code></dt>
<dd>if provided stylesheet can not be found under the provided path</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.build_subnetworks"><code class="name flex">
<span>def <span class="ident">build_subnetworks</span></span>(<span>nodes_to_analyse: Iterable[int],<br>network_name: str = 'token_graph',<br>export_image: bool = True,<br>target_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out')) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def build_subnetworks(
nodes_to_analyse: Iterable[CytoNodeID],
network_name: str = CYTO_BASE_NETWORK_NAME,
export_image: bool = True,
target_folder: Path = SAVE_PATH_FOLDER,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: iteratively build subnetworks from a collection of nodes
and their respective neighbouring nodes
Parameters
----------
nodes_to_analyse : Iterable[CytoNodeID]
collection of nodes to make subnetworks from, for each node a dedicated
subnetwork will be generated
network_name : str, optional
network which contains the provided nodes,
by default CYTO_BASE_NETWORK_NAME
export_image : bool, optional
trigger image export of newly generated subnetworks, by default True
&#34;&#34;&#34;
logger.debug(&#39;Generating all subnetworks for node selection...&#39;)
for idx, node in enumerate(nodes_to_analyse):
select_neighbours_of_node(node=node, network_name=network_name)
make_subnetwork(
index=idx,
network_name=network_name,
export_image=export_image,
target_folder=target_folder,
)
logger.debug(&#39;Generation of all subnetworks for node selection successful.&#39;)</code></pre>
</details>
<div class="desc"><p>Cytoscape: iteratively build subnetworks from a collection of nodes
and their respective neighbouring nodes</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>nodes_to_analyse</code></strong> :&ensp;<code>Iterable[CytoNodeID]</code></dt>
<dd>collection of nodes to make subnetworks from, for each node a dedicated
subnetwork will be generated</dd>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network which contains the provided nodes,
by default CYTO_BASE_NETWORK_NAME</dd>
<dt><strong><code>export_image</code></strong> :&ensp;<code>bool</code>, optional</dt>
<dd>trigger image export of newly generated subnetworks, by default True</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.change_default_layout"><code class="name flex">
<span>def <span class="ident">change_default_layout</span></span>(<span>) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def change_default_layout() -&gt; None:
&#34;&#34;&#34;Cytoscape: resets the default layout to `grid` to accelerate the import process
(grid layout one of the fastest)
Raises
------
RequestException
API endpoint not reachable or CyREST operation not successful
&#34;&#34;&#34;
body: dict[str, str] = {&#39;value&#39;: &#39;grid&#39;, &#39;key&#39;: &#39;layout.default&#39;}
try:
p4c.cyrest_put(&#39;properties/cytoscape3.props/layout.default&#39;, body=body)
except RequestException as error:
logger.error(&#39;[CytoAPIConnection] Property change of default layout not successful.&#39;)
raise error</code></pre>
</details>
<div class="desc"><p>Cytoscape: resets the default layout to <code>grid</code> to accelerate the import process
(grid layout one of the fastest)</p>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>RequestException</code></dt>
<dd>API endpoint not reachable or CyREST operation not successful</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.export_network_to_image"><code class="name flex">
<span>def <span class="ident">export_network_to_image</span></span>(<span>filename: str,<br>target_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out'),<br>filetype: Literal['JPEG', 'PDF', 'PNG', 'PS', 'SVG'] = 'SVG',<br>network_name: str = 'token_graph',<br>pdf_export_page_size: Literal['A0', 'A1', 'A2', 'A3', 'A4', 'A5', 'Auto', 'Legal', 'Letter', 'Tabloid'] = 'A4',<br>sandbox_name: str = 'lang_main') > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def export_network_to_image(
filename: str,
target_folder: Path = SAVE_PATH_FOLDER,
filetype: CytoExportFileTypes = &#39;SVG&#39;,
network_name: str = CYTO_BASE_NETWORK_NAME,
pdf_export_page_size: CytoExportPageSizes = &#39;A4&#39;,
sandbox_name: str = CYTO_SANDBOX_NAME,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: export current selected view as image
Parameters
----------
filename : str
export filename
filetype : CytoExportFileTypes, optional
export filetype supported by Cytoscape, by default &#39;SVG&#39;
network_name : str, optional
network to export, by default CYTO_BASE_NETWORK_NAME
pdf_export_page_size : CytoExportPageSizes, optional
page size which should be used for PDF exports supported by Cytoscape,
by default &#39;A4&#39;
&#34;&#34;&#34;
logger.debug(&#39;Exporting image to file...&#39;)
if not target_folder.exists(): # pragma: no cover
target_folder.mkdir(parents=True)
dst_file_pth = (target_folder / filename).with_suffix(f&#39;.{filetype.lower()}&#39;)
text_as_font = True
if filetype == &#39;SVG&#39;:
text_as_font = False
# close non-necessary windows and fit graph in frame before image display
fit_content(network_name=network_name)
# image is generated in sandbox directory and transferred to target destination
# (preparation for remote instances of Cytoscape)
p4c.export_image(
filename=filename,
type=filetype,
network=network_name,
overwrite_file=True,
all_graphics_details=True,
export_text_as_font=text_as_font,
page_size=pdf_export_page_size,
)
logger.debug(&#39;Exported image to sandbox.&#39;)
logger.debug(&#39;Transferring image from sandbox to target destination...&#39;)
sandbox_filename = f&#39;{filename}.{filetype.lower()}&#39;
p4c.sandbox_get_from(
source_file=sandbox_filename,
dest_file=str(dst_file_pth),
overwrite=True,
sandbox_name=sandbox_name,
)
logger.debug(&#39;Transfer of image from sandbox to target destination successful.&#39;)</code></pre>
</details>
<div class="desc"><p>Cytoscape: export current selected view as image</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>filename</code></strong> :&ensp;<code>str</code></dt>
<dd>export filename</dd>
<dt><strong><code>filetype</code></strong> :&ensp;<code>CytoExportFileTypes</code>, optional</dt>
<dd>export filetype supported by Cytoscape, by default 'SVG'</dd>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network to export, by default CYTO_BASE_NETWORK_NAME</dd>
<dt><strong><code>pdf_export_page_size</code></strong> :&ensp;<code>CytoExportPageSizes</code>, optional</dt>
<dd>page size which should be used for PDF exports supported by Cytoscape,
by default 'A4'</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.fit_content"><code class="name flex">
<span>def <span class="ident">fit_content</span></span>(<span>zoom_factor: float = 0.96, network_name: str = 'token_graph') > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def fit_content(
zoom_factor: float = CYTO_NETWORK_ZOOM_FACTOR,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -&gt; None:
p4c.hide_all_panels()
p4c.fit_content(selected_only=False, network=network_name)
zoom_current = p4c.get_network_zoom(network=network_name)
zoom_new = zoom_current * zoom_factor
p4c.set_network_zoom_bypass(zoom_new, bypass=False, network=network_name)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.render.cytoscape.get_subgraph_node_selection"><code class="name flex">
<span>def <span class="ident">get_subgraph_node_selection</span></span>(<span>network_name: str = 'token_graph', num_subgraphs: int = 5) > list[int]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def get_subgraph_node_selection(
network_name: str = CYTO_BASE_NETWORK_NAME,
num_subgraphs: int = CYTO_NUMBER_SUBGRAPHS,
) -&gt; list[CytoNodeID]:
&#34;&#34;&#34;Cytoscape: obtain the relevant nodes for iterative subgraph generation
Parameters
----------
network_name : str, optional
network to retrieve the nodes from, by default CYTO_BASE_NETWORK_NAME
property_degree_weighted : str, optional
property name which contains the weighted degree,
by default PROPERTY_NAME_DEGREE_WEIGHTED
num_subgraphs : int, optional
number of relevant nodes which form the basis to generate subgraphs from,
by default CYTO_NUMBER_SUBGRAPHS
Returns
-------
list[CytoNodeID]
list containing all relevant Cytoscape nodes
&#34;&#34;&#34;
logger.debug(&#39;Selecting nodes for subgraph generation...&#39;)
node_table = p4c.get_table_columns(table=&#39;node&#39;, network=network_name)
node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False)
p4c.load_table_data(node_table, data_key_column=&#39;name&#39;, network=network_name)
node_table_choice = node_table.iloc[:num_subgraphs]
logger.debug(&#39;Selection of nodes for subgraph generation successful.&#39;)
return node_table_choice[&#39;SUID&#39;].to_list()</code></pre>
</details>
<div class="desc"><p>Cytoscape: obtain the relevant nodes for iterative subgraph generation</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network to retrieve the nodes from, by default CYTO_BASE_NETWORK_NAME</dd>
<dt><strong><code>property_degree_weighted</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>property name which contains the weighted degree,
by default PROPERTY_NAME_DEGREE_WEIGHTED</dd>
<dt><strong><code>num_subgraphs</code></strong> :&ensp;<code>int</code>, optional</dt>
<dd>number of relevant nodes which form the basis to generate subgraphs from,
by default CYTO_NUMBER_SUBGRAPHS</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>list[CytoNodeID]</code></dt>
<dd>list containing all relevant Cytoscape nodes</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.import_to_cytoscape"><code class="name flex">
<span>def <span class="ident">import_to_cytoscape</span></span>(<span>graph: networkx.classes.digraph.DiGraph | networkx.classes.graph.Graph,<br>network_name: str = 'token_graph',<br>sandbox_name: str = 'lang_main',<br>reinitialise_sandbox: bool = True) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def import_to_cytoscape(
graph: DiGraph | Graph,
network_name: str = CYTO_BASE_NETWORK_NAME,
sandbox_name: str = CYTO_SANDBOX_NAME,
reinitialise_sandbox: bool = True,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: import NetworkX graph as new network collection
Parameters
----------
graph : DiGraph | Graph
NetworkX graph object
&#34;&#34;&#34;
logger.debug(&#39;Checking Cytoscape connection...&#39;)
verify_connection()
logger.debug(&#39;Checking graph size for rendering...&#39;)
verify_graph_render_size(graph)
logger.debug(&#39;Setting default layout to improve import speed...&#39;)
change_default_layout()
logger.debug(&#39;Setting Cytoscape sandbox...&#39;)
p4c.sandbox_set(
sandbox_name=sandbox_name,
reinitialize=reinitialise_sandbox,
copy_samples=False,
)
logger.debug(&#39;Importing to and analysing network in Cytoscape...&#39;)
p4c.delete_all_networks()
p4c.create_network_from_networkx(
graph,
title=network_name,
collection=CYTO_COLLECTION_NAME,
)
analyse_network(network_name=network_name)
logger.debug(&#39;Import and analysis of network to Cytoscape successful.&#39;)</code></pre>
</details>
<div class="desc"><p>Cytoscape: import NetworkX graph as new network collection</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>graph</code></strong> :&ensp;<code>DiGraph | Graph</code></dt>
<dd>NetworkX graph object</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.layout_network"><code class="name flex">
<span>def <span class="ident">layout_network</span></span>(<span>layout_name: Literal['attribute-circle', 'attribute-grid', 'attributes-layout', 'circular', 'cose', 'degree-circle', 'force-directed', 'force-directed-cl', 'fruchterman-rheingold', 'grid', 'hierarchical', 'isom', 'kamada-kawai', 'stacked-node-layout'] = 'force-directed',<br>layout_properties: dict[str, float | bool] = {'numIterations': 1000, 'defaultSpringCoefficient': 0.0001, 'defaultSpringLength': 45, 'defaultNodeMass': 11, 'isDeterministic': True, 'singlePartition': False},<br>network_name: str = 'token_graph') > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def layout_network(
layout_name: CytoLayouts = CYTO_LAYOUT_NAME,
layout_properties: CytoLayoutProperties = CYTO_LAYOUT_PROPERTIES,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: apply a supported layout algorithm to currently selected
network
Parameters
----------
layout_name : CytoLayouts, optional
layout algorithm supported by Cytoscape (name of the CyREST API, does not
necessarily match the name in the Cytoscape UI),
by default CYTO_LAYOUT_NAME
layout_properties : CytoLayoutProperties, optional
configuration of parameters for the given layout algorithm,
by default CYTO_LAYOUT_PROPERTIES
network_name : str, optional
network to apply the layout algorithm on, by default CYTO_BASE_NETWORK_NAME
&#34;&#34;&#34;
logger.debug(&#39;Applying layout to network...&#39;)
p4c.set_layout_properties(layout_name, layout_properties)
p4c.layout_network(layout_name=layout_name, network=network_name)
fit_content(network_name=network_name)
logger.debug(&#39;Layout application to network successful.&#39;)</code></pre>
</details>
<div class="desc"><p>Cytoscape: apply a supported layout algorithm to currently selected
network</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>layout_name</code></strong> :&ensp;<code>CytoLayouts</code>, optional</dt>
<dd>layout algorithm supported by Cytoscape (name of the CyREST API, does not
necessarily match the name in the Cytoscape UI),
by default CYTO_LAYOUT_NAME</dd>
<dt><strong><code>layout_properties</code></strong> :&ensp;<code>CytoLayoutProperties</code>, optional</dt>
<dd>configuration of parameters for the given layout algorithm,
by default CYTO_LAYOUT_PROPERTIES</dd>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network to apply the layout algorithm on, by default CYTO_BASE_NETWORK_NAME</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.make_subnetwork"><code class="name flex">
<span>def <span class="ident">make_subnetwork</span></span>(<span>index: int,<br>network_name: str = 'token_graph',<br>export_image: bool = True,<br>target_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out')) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def make_subnetwork(
index: int,
network_name: str = CYTO_BASE_NETWORK_NAME,
export_image: bool = True,
target_folder: Path = SAVE_PATH_FOLDER,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: generate a new subnetwork based on the currently
selected nodes and edges
Parameters
----------
index : int
id-like property to identify the subnetwork relative to its parent
network_name : str, optional
network to generate subnetwork from, by default CYTO_BASE_NETWORK_NAME
export_image : bool, optional
trigger image export of newly generated subnetwork, by default True
&#34;&#34;&#34;
logger.debug(&#39;Generating subnetwork with index %d...&#39;, index)
subnetwork_name = network_name + f&#39;_sub_{index+1}&#39;
p4c.create_subnetwork(
nodes=&#39;selected&#39;,
edges=&#39;selected&#39;,
subnetwork_name=subnetwork_name,
network=network_name,
)
p4c.set_current_network(subnetwork_name)
if export_image:
time.sleep(1)
export_network_to_image(
filename=subnetwork_name,
target_folder=target_folder,
network_name=subnetwork_name,
)
logger.debug(&#39;Generation of subnetwork with index %d successful.&#39;, index)</code></pre>
</details>
<div class="desc"><p>Cytoscape: generate a new subnetwork based on the currently
selected nodes and edges</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>index</code></strong> :&ensp;<code>int</code></dt>
<dd>id-like property to identify the subnetwork relative to its parent</dd>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network to generate subnetwork from, by default CYTO_BASE_NETWORK_NAME</dd>
<dt><strong><code>export_image</code></strong> :&ensp;<code>bool</code>, optional</dt>
<dd>trigger image export of newly generated subnetwork, by default True</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.reset_current_network_to_base"><code class="name flex">
<span>def <span class="ident">reset_current_network_to_base</span></span>(<span>) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def reset_current_network_to_base() -&gt; None:
&#34;&#34;&#34;resets to currently selected network in Cytoscape back to the base one&#34;&#34;&#34;
p4c.set_current_network(CYTO_BASE_NETWORK_NAME)</code></pre>
</details>
<div class="desc"><p>resets to currently selected network in Cytoscape back to the base one</p></div>
</dd>
<dt id="lang_main.render.cytoscape.select_neighbours_of_node"><code class="name flex">
<span>def <span class="ident">select_neighbours_of_node</span></span>(<span>node: int, neighbour_iter_depth: int = 2, network_name: str = 'token_graph') > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def select_neighbours_of_node(
node: CytoNodeID,
neighbour_iter_depth: int = CYTO_ITER_NEIGHBOUR_DEPTH,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -&gt; None:
&#34;&#34;&#34;Cytoscape: iterative selection of a node&#39;s neighbouring nodes and
their connecting edges
Parameters
----------
node : CytoNodeID
node which neighbours should be selected
neighbour_iter_depth : int, optional
indicates how many levels of neighbours should be choosen, e.g. 1 --&gt; only
first-level neighbours are considered which are directly connected to the node,
2 --&gt; all nodes with iteration depth of 1 are chosen and additionally their
direct neighbours,
by default CYTO_ITER_NEIGHBOUR_DEPTH
network_name : str, optional
network to perform action on, by default CYTO_BASE_NETWORK_NAME
&#34;&#34;&#34;
logger.debug(&#39;Selecting node neighbours for %s...&#39;, node)
p4c.clear_selection(network=network_name)
p4c.select_nodes(node, network=network_name)
for _ in range(neighbour_iter_depth):
_ = p4c.select_first_neighbors(network=network_name)
_ = p4c.select_edges_connecting_selected_nodes()
logger.debug(&#39;Selection of node neighbours for %s successful.&#39;, node)</code></pre>
</details>
<div class="desc"><p>Cytoscape: iterative selection of a node's neighbouring nodes and
their connecting edges</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>node</code></strong> :&ensp;<code>CytoNodeID</code></dt>
<dd>node which neighbours should be selected</dd>
<dt><strong><code>neighbour_iter_depth</code></strong> :&ensp;<code>int</code>, optional</dt>
<dd>indicates how many levels of neighbours should be choosen, e.g. 1 &ndash;&gt; only
first-level neighbours are considered which are directly connected to the node,
2 &ndash;&gt; all nodes with iteration depth of 1 are chosen and additionally their
direct neighbours,
by default CYTO_ITER_NEIGHBOUR_DEPTH</dd>
<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>network to perform action on, by default CYTO_BASE_NETWORK_NAME</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.verify_connection"><code class="name flex">
<span>def <span class="ident">verify_connection</span></span>(<span>) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def verify_connection() -&gt; None:
&#34;&#34;&#34;Cytoscape: checks if CyREST and Cytoscape versions are compatible nad
if Cytoscape API endpoint is reachable
Raises
------
CyError
incompatible CyREST or Cytoscape versions
RequestException
API endpoint not reachable
&#34;&#34;&#34;
try:
p4c.cytoscape_ping()
except CyError as error: # pragma: no cover
logger.error(&#39;[CyError] CyREST or Cytoscape version not supported.&#39;)
raise error
except RequestException as error:
logger.error(&#39;[CytoAPIConnection] Connection to CyREST API failed.&#39;)
raise error</code></pre>
</details>
<div class="desc"><p>Cytoscape: checks if CyREST and Cytoscape versions are compatible nad
if Cytoscape API endpoint is reachable</p>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>CyError</code></dt>
<dd>incompatible CyREST or Cytoscape versions</dd>
<dt><code>RequestException</code></dt>
<dd>API endpoint not reachable</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.verify_graph_render_size"><code class="name flex">
<span>def <span class="ident">verify_graph_render_size</span></span>(<span>graph: networkx.classes.digraph.DiGraph | networkx.classes.graph.Graph,<br>max_node_count: int | None = 500,<br>max_edge_count: int | None = 800) > None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def verify_graph_render_size(
graph: Graph | DiGraph,
max_node_count: int | None = CYTO_MAX_NODE_COUNT,
max_edge_count: int | None = CYTO_MAX_EDGE_COUNT,
) -&gt; None:
&#34;&#34;&#34;verify that the graph size can still be handled within an acceptable time
frame for rendering in Cytoscape
Parameters
----------
graph : Graph | DiGraph
graph to verify
max_node_count : int | None, optional
maximum allowed number of nodes, by default CYTO_MAX_NODE_COUNT
max_edge_count : int | None, optional
maximum allowed number of edges, by default CYTO_MAX_EDGE_COUNT
Raises
------
GraphRenderError
if any of the provided limits is exceeded
&#34;&#34;&#34;
num_nodes = len(graph.nodes)
num_edges = len(graph.edges)
if max_node_count is not None and num_nodes &gt; max_node_count:
raise GraphRenderError(
f&#39;Maximum number of nodes for rendering exceeded. &#39;
f&#39;Limit {max_node_count}, Counted: {num_nodes}&#39;
)
if max_edge_count is not None and num_edges &gt; max_edge_count:
raise GraphRenderError(
f&#39;Maximum number of edges for rendering exceeded. &#39;
f&#39;Limit {max_edge_count}, Counted: {num_edges}&#39;
)</code></pre>
</details>
<div class="desc"><p>verify that the graph size can still be handled within an acceptable time
frame for rendering in Cytoscape</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>graph</code></strong> :&ensp;<code>Graph | DiGraph</code></dt>
<dd>graph to verify</dd>
<dt><strong><code>max_node_count</code></strong> :&ensp;<code>int | None</code>, optional</dt>
<dd>maximum allowed number of nodes, by default CYTO_MAX_NODE_COUNT</dd>
<dt><strong><code>max_edge_count</code></strong> :&ensp;<code>int | None</code>, optional</dt>
<dd>maximum allowed number of edges, by default CYTO_MAX_EDGE_COUNT</dd>
</dl>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>GraphRenderError</code></dt>
<dd>if any of the provided limits is exceeded</dd>
</dl></div>
</dd>
<dt id="lang_main.render.cytoscape.verify_table_property"><code class="name flex">
<span>def <span class="ident">verify_table_property</span></span>(<span>property: str,<br>table_type: Literal['node', 'edge', 'network'] = 'node',<br>network_name: str = 'token_graph') > bool</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def verify_table_property(
property: str,
table_type: Literal[&#39;node&#39;, &#39;edge&#39;, &#39;network&#39;] = &#39;node&#39;,
network_name: str = CYTO_BASE_NETWORK_NAME,
) -&gt; bool:
table = p4c.get_table_columns(table=table_type, network=network_name)
logger.debug(&#39;Table &gt;&gt;%s&lt;&lt; wiht columns: %s&#39;, table, table.columns)
return property in table.columns</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.render" href="index.html">lang_main.render</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.render.cytoscape.analyse_network" href="#lang_main.render.cytoscape.analyse_network">analyse_network</a></code></li>
<li><code><a title="lang_main.render.cytoscape.apply_style_to_network" href="#lang_main.render.cytoscape.apply_style_to_network">apply_style_to_network</a></code></li>
<li><code><a title="lang_main.render.cytoscape.build_subnetworks" href="#lang_main.render.cytoscape.build_subnetworks">build_subnetworks</a></code></li>
<li><code><a title="lang_main.render.cytoscape.change_default_layout" href="#lang_main.render.cytoscape.change_default_layout">change_default_layout</a></code></li>
<li><code><a title="lang_main.render.cytoscape.export_network_to_image" href="#lang_main.render.cytoscape.export_network_to_image">export_network_to_image</a></code></li>
<li><code><a title="lang_main.render.cytoscape.fit_content" href="#lang_main.render.cytoscape.fit_content">fit_content</a></code></li>
<li><code><a title="lang_main.render.cytoscape.get_subgraph_node_selection" href="#lang_main.render.cytoscape.get_subgraph_node_selection">get_subgraph_node_selection</a></code></li>
<li><code><a title="lang_main.render.cytoscape.import_to_cytoscape" href="#lang_main.render.cytoscape.import_to_cytoscape">import_to_cytoscape</a></code></li>
<li><code><a title="lang_main.render.cytoscape.layout_network" href="#lang_main.render.cytoscape.layout_network">layout_network</a></code></li>
<li><code><a title="lang_main.render.cytoscape.make_subnetwork" href="#lang_main.render.cytoscape.make_subnetwork">make_subnetwork</a></code></li>
<li><code><a title="lang_main.render.cytoscape.reset_current_network_to_base" href="#lang_main.render.cytoscape.reset_current_network_to_base">reset_current_network_to_base</a></code></li>
<li><code><a title="lang_main.render.cytoscape.select_neighbours_of_node" href="#lang_main.render.cytoscape.select_neighbours_of_node">select_neighbours_of_node</a></code></li>
<li><code><a title="lang_main.render.cytoscape.verify_connection" href="#lang_main.render.cytoscape.verify_connection">verify_connection</a></code></li>
<li><code><a title="lang_main.render.cytoscape.verify_graph_render_size" href="#lang_main.render.cytoscape.verify_graph_render_size">verify_graph_render_size</a></code></li>
<li><code><a title="lang_main.render.cytoscape.verify_table_property" href="#lang_main.render.cytoscape.verify_table_property">verify_table_property</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,182 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.render.cytoscape_monkeypatch API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.render.cytoscape_monkeypatch</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.render.cytoscape_monkeypatch.select_edges_connecting_selected_nodes"><code class="name flex">
<span>def <span class="ident">select_edges_connecting_selected_nodes</span></span>(<span>network=None, base_url='http://127.0.0.1:1234/v1')</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">@cy_log # pragma: no cover
def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405 # pragma: no cover
&#34;&#34;&#34;Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.
Any edges selected beforehand are deselected before any new edges are selected
Args:
network (SUID or str or None): Name or SUID of a network. Default is the
&#34;current&#34; network active in Cytoscape.
base_url (str): Ignore unless you need to specify a custom domain,
port or version to connect to the CyREST API. Default is http://127.0.0.1:1234
and the latest version of the CyREST API supported by this version of py4cytoscape.
Returns:
dict: {&#39;nodes&#39;: [node list], &#39;edges&#39;: [edge list]} or None if no selected nodes
Raises:
CyError: if network name or SUID doesn&#39;t exist
requests.exceptions.RequestException: if can&#39;t connect to Cytoscape or Cytoscape returns an error
Examples:
&gt;&gt;&gt; select_edges_connecting_selected_nodes()
None
&gt;&gt;&gt; select_edges_connecting_selected_nodes(network=&#39;My Network&#39;)
{&#39;nodes&#39;: [103990, 103991, ...], &#39;edges&#39;: [104432, 104431, ...]}
&gt;&gt;&gt; select_edges_connecting_selected_nodes(network=52)
{&#39;nodes&#39;: [103990, 103991, ...], &#39;edges&#39;: [104432, 104431, ...]}
Note:
In the return value node list is list of all selected nodes, and
edge list is the SUIDs of selected edges -- dict is None if no nodes were selected or there were no newly
created edges
&#34;&#34;&#34;
net_suid = networks.get_network_suid(network, base_url=base_url)
selected_nodes = get_selected_nodes(network=net_suid, base_url=base_url)
# TODO: In R version, NA test is after len() test ... shouldn&#39;t it be before?
if not selected_nodes:
return None
all_edges = networks.get_all_edges(net_suid, base_url=base_url)
selected_sources = set()
selected_targets = set()
for n in selected_nodes:
n = re_parenthesis_1.sub(&#39;\(&#39;, n) # type: ignore
n = re_parenthesis_2.sub(&#39;\)&#39;, n) # type: ignore
selected_sources |= set(filter(re.compile(&#39;^&#39; + n).search, all_edges)) # type: ignore
selected_targets |= set(filter(re.compile(n + &#39;$&#39;).search, all_edges)) # type: ignore
selected_edges = list(selected_sources.intersection(selected_targets))
if len(selected_edges) == 0:
return None
res = select_edges(
selected_edges,
by_col=&#39;name&#39;,
preserve_current_selection=False,
network=net_suid,
base_url=base_url,
)
return res
# TODO: isn&#39;t the pattern match a bit cheesy ... shouldn&#39;t it be ^+n+&#39; (&#39; and &#39;) &#39;+n+$ ???</code></pre>
</details>
<div class="desc"><p>Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.</p>
<p>Any edges selected beforehand are deselected before any new edges are selected</p>
<h2 id="args">Args</h2>
<dl>
<dt><strong><code>network</code></strong> :&ensp;<code>SUID</code> or <code>str</code> or <code>None</code></dt>
<dd>Name or SUID of a network. Default is the
"current" network active in Cytoscape.</dd>
<dt><strong><code>base_url</code></strong> :&ensp;<code>str</code></dt>
<dd>Ignore unless you need to specify a custom domain,
port or version to connect to the CyREST API. Default is <a href="http://127.0.0.1:1234">http://127.0.0.1:1234</a>
and the latest version of the CyREST API supported by this version of py4cytoscape.</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>dict</code></dt>
<dd>{'nodes': [node list], 'edges': [edge list]} or None if no selected nodes</dd>
</dl>
<h2 id="raises">Raises</h2>
<dl>
<dt><code>CyError</code></dt>
<dd>if network name or SUID doesn't exist</dd>
<dt><code>requests.exceptions.RequestException</code></dt>
<dd>if can't connect to Cytoscape or Cytoscape returns an error</dd>
</dl>
<h2 id="examples">Examples</h2>
<pre><code class="language-python-repl">&gt;&gt;&gt; select_edges_connecting_selected_nodes()
None
&gt;&gt;&gt; select_edges_connecting_selected_nodes(network='My Network')
{'nodes': [103990, 103991, ...], 'edges': [104432, 104431, ...]}
&gt;&gt;&gt; select_edges_connecting_selected_nodes(network=52)
{'nodes': [103990, 103991, ...], 'edges': [104432, 104431, ...]}
</code></pre>
<h2 id="note">Note</h2>
<p>In the return value node list is list of all selected nodes, and
edge list is the SUIDs of selected edges &ndash; dict is None if no nodes were selected or there were no newly
created edges</p></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.render" href="index.html">lang_main.render</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.render.cytoscape_monkeypatch.select_edges_connecting_selected_nodes" href="#lang_main.render.cytoscape_monkeypatch.select_edges_connecting_selected_nodes">select_edges_connecting_selected_nodes</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

View File

@ -0,0 +1,83 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.render API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.render</code></h1>
</header>
<section id="section-intro">
</section>
<section>
<h2 class="section-title" id="header-submodules">Sub-modules</h2>
<dl>
<dt><code class="name"><a title="lang_main.render.cytoscape" href="cytoscape.html">lang_main.render.cytoscape</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
<dt><code class="name"><a title="lang_main.render.cytoscape_monkeypatch" href="cytoscape_monkeypatch.html">lang_main.render.cytoscape_monkeypatch</a></code></dt>
<dd>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="../index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-submodules">Sub-modules</a></h3>
<ul>
<li><code><a title="lang_main.render.cytoscape" href="cytoscape.html">lang_main.render.cytoscape</a></code></li>
<li><code><a title="lang_main.render.cytoscape_monkeypatch" href="cytoscape_monkeypatch.html">lang_main.render.cytoscape_monkeypatch</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

261
docs/lang_main/search.html Normal file
View File

@ -0,0 +1,261 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.search API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.search</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.search.search_base_path"><code class="name flex">
<span>def <span class="ident">search_base_path</span></span>(<span>starting_path: pathlib.Path, stop_folder_name: str | None = None) > pathlib.Path | None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def search_base_path(
starting_path: Path,
stop_folder_name: str | None = None,
) -&gt; Path | None:
&#34;&#34;&#34;Iteratively searches the parent directories of the starting path
and look for folders matching the given name. If a match is encountered,
the parent path will be returned.
Example:
starting_path = path/to/start/folder
stop_folder_name = &#39;to&#39;
returned path = &#39;path/&#39;
Parameters
----------
starting_path : Path
non-inclusive starting path
stop_folder_name : str, optional
name of the last folder in the directory tree to search, by default None
Returns
-------
Path | None
Path if corresponding base path was found, None otherwise
&#34;&#34;&#34;
stop_folder_path: Path | None = None
base_path: Path | None = None
for search_path in starting_path.parents:
if stop_folder_name is not None and search_path.name == stop_folder_name:
# library is placed inside a whole python installation for deployment
# only look up to this folder
stop_folder_path = search_path
break
if stop_folder_path is not None:
base_path = stop_folder_path.parent
return base_path</code></pre>
</details>
<div class="desc"><p>Iteratively searches the parent directories of the starting path
and look for folders matching the given name. If a match is encountered,
the parent path will be returned.</p>
<p>Example:
starting_path = path/to/start/folder
stop_folder_name = 'to'
returned path = 'path/'</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>starting_path</code></strong> :&ensp;<code>Path</code></dt>
<dd>non-inclusive starting path</dd>
<dt><strong><code>stop_folder_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>name of the last folder in the directory tree to search, by default None</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>Path | None</code></dt>
<dd>Path if corresponding base path was found, None otherwise</dd>
</dl></div>
</dd>
<dt id="lang_main.search.search_cwd"><code class="name flex">
<span>def <span class="ident">search_cwd</span></span>(<span>glob_pattern: str) > pathlib.Path | None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def search_cwd(
glob_pattern: str,
) -&gt; Path | None:
&#34;&#34;&#34;Searches the current working directory and looks for files
matching the glob pattern.
Returns the first match encountered.
Parameters
----------
glob_pattern : str, optional
pattern to look for, first match will be returned
Returns
-------
Path | None
Path if corresponding object was found, None otherwise
&#34;&#34;&#34;
path_found: Path | None = None
res = tuple(Path.cwd().glob(glob_pattern))
if res:
path_found = res[0]
return path_found</code></pre>
</details>
<div class="desc"><p>Searches the current working directory and looks for files
matching the glob pattern.
Returns the first match encountered.</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>glob_pattern</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>pattern to look for, first match will be returned</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>Path | None</code></dt>
<dd>Path if corresponding object was found, None otherwise</dd>
</dl></div>
</dd>
<dt id="lang_main.search.search_iterative"><code class="name flex">
<span>def <span class="ident">search_iterative</span></span>(<span>starting_path: pathlib.Path,<br>glob_pattern: str,<br>stop_folder_name: str | None = None) > pathlib.Path | None</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def search_iterative(
starting_path: Path,
glob_pattern: str,
stop_folder_name: str | None = None,
) -&gt; Path | None:
&#34;&#34;&#34;Iteratively searches the parent directories of the starting path
and look for files matching the glob pattern. The starting path is not
searched, only its parents. Therefore the starting path can also point
to a file. The folder in which it is placed in will be searched.
Returns the first match encountered.
The parent of the stop folder will be searched if it exists.
Parameters
----------
starting_path : Path
non-inclusive starting path
glob_pattern : str, optional
pattern to look for, first match will be returned
stop_folder_name : str, optional
name of the last folder in the directory tree to search, by default None
Returns
-------
Path | None
Path if corresponding object was found, None otherwise
&#34;&#34;&#34;
file_path: Path | None = None
stop_folder_reached: bool = False
for search_path in starting_path.parents:
res = tuple(search_path.glob(glob_pattern))
if res:
file_path = res[0]
break
elif stop_folder_reached:
break
if stop_folder_name is not None and search_path.name == stop_folder_name:
# library is placed inside a whole python installation for deployment
# if this folder is reached, only look up one parent above
stop_folder_reached = True
return file_path</code></pre>
</details>
<div class="desc"><p>Iteratively searches the parent directories of the starting path
and look for files matching the glob pattern. The starting path is not
searched, only its parents. Therefore the starting path can also point
to a file. The folder in which it is placed in will be searched.
Returns the first match encountered.
The parent of the stop folder will be searched if it exists.</p>
<h2 id="parameters">Parameters</h2>
<dl>
<dt><strong><code>starting_path</code></strong> :&ensp;<code>Path</code></dt>
<dd>non-inclusive starting path</dd>
<dt><strong><code>glob_pattern</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>pattern to look for, first match will be returned</dd>
<dt><strong><code>stop_folder_name</code></strong> :&ensp;<code>str</code>, optional</dt>
<dd>name of the last folder in the directory tree to search, by default None</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
<dt><code>Path | None</code></dt>
<dd>Path if corresponding object was found, None otherwise</dd>
</dl></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.search.search_base_path" href="#lang_main.search.search_base_path">search_base_path</a></code></li>
<li><code><a title="lang_main.search.search_cwd" href="#lang_main.search.search_cwd">search_cwd</a></code></li>
<li><code><a title="lang_main.search.search_iterative" href="#lang_main.search.search_iterative">search_iterative</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>

10637
docs/lang_main/types.html Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,9 +5,6 @@ from typing import cast
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
# import sentence_transformers # TODO check removal
# import sentence_transformers.util # TODO check removal
from networkx import Graph from networkx import Graph
from pandas import DataFrame, Series from pandas import DataFrame, Series
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer

View File

@ -47,7 +47,7 @@ def _non_relevant_obj_ids(
feats_per_obj_id = feats_per_obj_id.dropna() feats_per_obj_id = feats_per_obj_id.dropna()
unique_feats_per_obj_id = len(feats_per_obj_id.unique()) unique_feats_per_obj_id = len(feats_per_obj_id.unique())
if unique_feats_per_obj_id > thresh_unique_feat_per_id: if unique_feats_per_obj_id >= thresh_unique_feat_per_id:
ids_to_ignore.add(obj_id) ids_to_ignore.add(obj_id)
return tuple(ids_to_ignore) return tuple(ids_to_ignore)

View File

@ -119,7 +119,7 @@ def _preprocess_STFR_model_name(
raise FileNotFoundError( raise FileNotFoundError(
f'Target model >{model_name}< not found under {model_path}' f'Target model >{model_name}< not found under {model_path}'
) )
model_name_or_path = str(model_path) model_name_or_path = str(model_path) # pragma: no cover
else: else:
model_name_or_path = model_name model_name_or_path = model_name

View File

@ -30,11 +30,12 @@ from lang_main.constants import (
DATE_COLS, DATE_COLS,
FEATURE_NAME_OBJ_ID, FEATURE_NAME_OBJ_ID,
FEATURE_NAME_OBJ_TEXT, FEATURE_NAME_OBJ_TEXT,
MAX_EDGE_NUMBER,
MODEL_INPUT_FEATURES, MODEL_INPUT_FEATURES,
NAME_DELTA_FEAT_TO_REPAIR, NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER, SAVE_PATH_FOLDER,
TARGET_FEATURE,
THRESHOLD_AMOUNT_CHARACTERS, THRESHOLD_AMOUNT_CHARACTERS,
THRESHOLD_EDGE_NUMBER,
THRESHOLD_NUM_ACTIVITIES, THRESHOLD_NUM_ACTIVITIES,
THRESHOLD_SIMILARITY, THRESHOLD_SIMILARITY,
THRESHOLD_TIMELINE_SIMILARITY, THRESHOLD_TIMELINE_SIMILARITY,
@ -72,7 +73,7 @@ def build_base_target_feature_pipe() -> Pipeline:
pipe_target_feat.add( pipe_target_feat.add(
entry_wise_cleansing, entry_wise_cleansing,
{ {
'target_features': ('VorgangsBeschreibung',), 'target_features': (TARGET_FEATURE,),
'cleansing_func': clean_string_slim, 'cleansing_func': clean_string_slim,
}, },
save_result=True, save_result=True,
@ -81,7 +82,7 @@ def build_base_target_feature_pipe() -> Pipeline:
pipe_target_feat.add( pipe_target_feat.add(
analyse_feature, analyse_feature,
{ {
'target_feature': 'VorgangsBeschreibung', 'target_feature': TARGET_FEATURE,
}, },
save_result=True, save_result=True,
) )
@ -140,7 +141,7 @@ def build_tk_graph_post_pipe() -> Pipeline:
pipe_graph_postprocessing.add( pipe_graph_postprocessing.add(
graphs.filter_graph_by_number_edges, graphs.filter_graph_by_number_edges,
{ {
'limit': THRESHOLD_EDGE_NUMBER, 'limit': MAX_EDGE_NUMBER,
'property': 'weight', 'property': 'weight',
}, },
) )

View File

@ -321,7 +321,7 @@ def test_pipe_add_graph_metrics():
def test_pipe_rescale_graph_edge_weights(tk_graph): def test_pipe_rescale_graph_edge_weights(tk_graph):
rescaled_tkg, rescaled_undir = graphs.pipe_rescale_graph_edge_weights(tk_graph) rescaled_tkg, rescaled_undir = graphs.pipe_rescale_graph_edge_weights(tk_graph)
assert rescaled_tkg[2][1]['weight'] == pytest.approx(1.0) assert rescaled_tkg[2][1]['weight'] == pytest.approx(1.0)
assert rescaled_tkg[1][2]['weight'] == pytest.approx(0.0952) assert rescaled_tkg[1][2]['weight'] == pytest.approx(0.095238)
assert rescaled_undir[2][1]['weight'] == pytest.approx(1.0) assert rescaled_undir[2][1]['weight'] == pytest.approx(1.0)
assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0) assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0)
@ -331,7 +331,7 @@ def test_rescale_edge_weights(import_graph, request):
test_graph = request.getfixturevalue(import_graph) test_graph = request.getfixturevalue(import_graph)
rescaled_graph = graphs.rescale_edge_weights(test_graph) rescaled_graph = graphs.rescale_edge_weights(test_graph)
assert rescaled_graph[2][1]['weight'] == pytest.approx(1.0) assert rescaled_graph[2][1]['weight'] == pytest.approx(1.0)
assert rescaled_graph[1][2]['weight'] == pytest.approx(0.0952) assert rescaled_graph[1][2]['weight'] == pytest.approx(0.095238)
@pytest.mark.parametrize('import_graph', ['graph', 'tk_graph']) @pytest.mark.parametrize('import_graph', ['graph', 'tk_graph'])

View File

@ -72,7 +72,7 @@ def test_calc_delta_to_repair(data_pre_cleaned, convert_to_days):
def test_non_relevant_obj_ids(data_pre_cleaned): def test_non_relevant_obj_ids(data_pre_cleaned):
feature_uniqueness = 'HObjektText' feature_uniqueness = 'HObjektText'
feature_obj_id = 'ObjektID' feature_obj_id = 'ObjektID'
threshold = 1 threshold = 2
data = data_pre_cleaned.copy() data = data_pre_cleaned.copy()
data.at[0, feature_obj_id] = 1 data.at[0, feature_obj_id] = 1
ids_to_ignore = tl._non_relevant_obj_ids( ids_to_ignore = tl._non_relevant_obj_ids(
@ -88,7 +88,7 @@ def test_non_relevant_obj_ids(data_pre_cleaned):
def test_remove_non_relevant_obj_ids(data_pre_cleaned): def test_remove_non_relevant_obj_ids(data_pre_cleaned):
feature_uniqueness = 'HObjektText' feature_uniqueness = 'HObjektText'
feature_obj_id = 'ObjektID' feature_obj_id = 'ObjektID'
threshold = 1 threshold = 2
data = data_pre_cleaned.copy() data = data_pre_cleaned.copy()
data.at[0, feature_obj_id] = 1 data.at[0, feature_obj_id] = 1

View File

@ -25,8 +25,6 @@ from lang_main.types import LanguageModels
@pytest.mark.parametrize( @pytest.mark.parametrize(
'model_name', 'model_name',
[ [
STFRModelTypes.ALL_DISTILROBERTA_V1,
STFRModelTypes.ALL_MINI_LM_L12_V2,
STFRModelTypes.ALL_MINI_LM_L6_V2, STFRModelTypes.ALL_MINI_LM_L6_V2,
STFRModelTypes.ALL_MPNET_BASE_V2, STFRModelTypes.ALL_MPNET_BASE_V2,
], ],
@ -47,6 +45,25 @@ def test_load_sentence_transformer(
assert isinstance(model, SentenceTransformer) assert isinstance(model, SentenceTransformer)
def test_preprocess_STFR_model_name() -> None:
model_name_not_exist = 'TestModel'
ret_model_name = model_loader._preprocess_STFR_model_name(
model_name=model_name_not_exist, backend=STFRBackends.TORCH, force_download=True
)
assert ret_model_name == model_name_not_exist
ret_model_name = model_loader._preprocess_STFR_model_name(
model_name=model_name_not_exist, backend=STFRBackends.TORCH, force_download=False
)
assert ret_model_name == model_name_not_exist
model_name_exist = STFRModelTypes.E5_BASE_STS_EN_DE
backend_exist = STFRBackends.ONNX
with pytest.raises(FileNotFoundError):
_ = model_loader._preprocess_STFR_model_name(
model_name=model_name_exist, backend=backend_exist, force_download=False
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
'similarity_func', 'similarity_func',
[ [
@ -57,8 +74,6 @@ def test_load_sentence_transformer(
@pytest.mark.parametrize( @pytest.mark.parametrize(
'model_name', 'model_name',
[ [
STFRModelTypes.ALL_DISTILROBERTA_V1,
STFRModelTypes.ALL_MINI_LM_L12_V2,
STFRModelTypes.ALL_MINI_LM_L6_V2, STFRModelTypes.ALL_MINI_LM_L6_V2,
STFRModelTypes.ALL_MPNET_BASE_V2, STFRModelTypes.ALL_MPNET_BASE_V2,
], ],
@ -108,6 +123,14 @@ def test_instantiate_spacy_model():
assert isinstance(model, Language) assert isinstance(model, Language)
def test_fail_instantiate_spacy_model():
with pytest.raises(KeyError):
_ = model_loader.instantiate_model(
model_load_map=model_loader.MODEL_LOADER_MAP,
model='test', # type: ignore
) # type: ignore
@pytest.mark.mload @pytest.mark.mload
def test_instantiate_stfr_model(): def test_instantiate_stfr_model():
model = model_loader.instantiate_model( model = model_loader.instantiate_model(