From fb28b8548be00ff1b0e5802fec09b5494ff7e656 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20F=C3=B6rster?=
 <florian.foerster@mb.tu-chemnitz.de>
Date: Wed, 22 Jan 2025 16:54:15 +0100
Subject: [PATCH] added test cases

---
 docs/lang_main/analysis/graphs.html           |  1844 +++
 docs/lang_main/analysis/index.html            |    98 +
 docs/lang_main/analysis/preprocessing.html    |   451 +
 docs/lang_main/analysis/shared.html           |   273 +
 docs/lang_main/analysis/timeline.html         |   333 +
 docs/lang_main/analysis/tokens.html           |   320 +
 docs/lang_main/config.html                    |   206 +
 docs/lang_main/constants.html                 |    66 +
 docs/lang_main/errors.html                    |   330 +
 docs/lang_main/index.html                     |   123 +
 docs/lang_main/io.html                        |   227 +
 docs/lang_main/loggers.html                   |    66 +
 docs/lang_main/model_loader.html              |   162 +
 docs/lang_main/pipelines/base.html            |   755 ++
 docs/lang_main/pipelines/index.html           |    83 +
 docs/lang_main/pipelines/predefined.html      |   386 +
 docs/lang_main/render/cytoscape.html          |   797 ++
 .../render/cytoscape_monkeypatch.html         |   182 +
 docs/lang_main/render/index.html              |    83 +
 docs/lang_main/search.html                    |   261 +
 docs/lang_main/types.html                     | 10637 ++++++++++++++++
 src/lang_main/analysis/shared.py              |     3 -
 src/lang_main/analysis/timeline.py            |     2 +-
 src/lang_main/model_loader.py                 |     2 +-
 src/lang_main/pipelines/predefined.py         |     9 +-
 tests/analysis/test_graphs.py                 |     4 +-
 tests/analysis/test_timeline.py               |     4 +-
 tests/test_model_loader.py                    |    31 +-
 28 files changed, 17721 insertions(+), 17 deletions(-)
 create mode 100644 docs/lang_main/analysis/graphs.html
 create mode 100644 docs/lang_main/analysis/index.html
 create mode 100644 docs/lang_main/analysis/preprocessing.html
 create mode 100644 docs/lang_main/analysis/shared.html
 create mode 100644 docs/lang_main/analysis/timeline.html
 create mode 100644 docs/lang_main/analysis/tokens.html
 create mode 100644 docs/lang_main/config.html
 create mode 100644 docs/lang_main/constants.html
 create mode 100644 docs/lang_main/errors.html
 create mode 100644 docs/lang_main/index.html
 create mode 100644 docs/lang_main/io.html
 create mode 100644 docs/lang_main/loggers.html
 create mode 100644 docs/lang_main/model_loader.html
 create mode 100644 docs/lang_main/pipelines/base.html
 create mode 100644 docs/lang_main/pipelines/index.html
 create mode 100644 docs/lang_main/pipelines/predefined.html
 create mode 100644 docs/lang_main/render/cytoscape.html
 create mode 100644 docs/lang_main/render/cytoscape_monkeypatch.html
 create mode 100644 docs/lang_main/render/index.html
 create mode 100644 docs/lang_main/search.html
 create mode 100644 docs/lang_main/types.html
diff --git a/docs/lang_main/analysis/graphs.html b/docs/lang_main/analysis/graphs.html
new file mode 100644
index 0000000..6c54b7a
--- /dev/null
+++ b/docs/lang_main/analysis/graphs.html
@@ -0,0 +1,1844 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.analysis.graphs API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.analysis.graphs</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.analysis.graphs.add_betweenness_centrality"><code class="name flex">
+<span>def <span class="ident">add_betweenness_centrality</span></span>(<span>graph: DiGraph | Graph,<br>edge_weight_property: str | None = None,<br>property_name: str = 'betweenness_centrality') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def add_betweenness_centrality(
+    graph: DiGraph | Graph,
+    edge_weight_property: str | None = None,
+    property_name: str = PROPERTY_NAME_BETWEENNESS_CENTRALITY,
+) -&gt; None:
+    &#34;&#34;&#34;adds the betweenness centrality as property to each node of the given graph
+    Operation is performed inplace.
+
+    Parameters
+    ----------
+    graph : DiGraph | Graph
+        Graph with betweenness centrality as node property added inplace
+    edge_weight_property : str | None, optional
+        property of the edges which contains the weight information,
+        not necessarily needed, by default &#39;None&#39;
+    property_name : str, optional
+        target name for the property containing the betweenness centrality in nodes,
+        by default PROPERTY_NAME_BETWEENNESS_CENTRALITY
+    &#34;&#34;&#34;
+
+    node_property_mapping = cast(
+        dict[str, float],
+        nx.betweenness_centrality(graph, normalized=True, weight=edge_weight_property),  # type: ignore
+    )
+    nx.set_node_attributes(
+        graph,
+        node_property_mapping,
+        name=property_name,
+    )</code></pre>
+</details>
+<div class="desc"><p>adds the betweenness centrality as property to each node of the given graph
+Operation is performed inplace.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code>DiGraph | Graph</code></dt>
+<dd>Graph with betweenness centrality as node property added inplace</dd>
+<dt><strong><code>edge_weight_property</code></strong> :&ensp;<code>str | None</code>, optional</dt>
+<dd>property of the edges which contains the weight information,
+not necessarily needed, by default 'None'</dd>
+<dt><strong><code>property_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>target name for the property containing the betweenness centrality in nodes,
+by default PROPERTY_NAME_BETWEENNESS_CENTRALITY</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.add_importance_metric"><code class="name flex">
+<span>def <span class="ident">add_importance_metric</span></span>(<span>graph: DiGraph | Graph,<br>property_name: str = 'importance',<br>property_name_weighted_degree: str = 'degree_weighted',<br>property_name_betweenness: str = 'betweenness_centrality') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def add_importance_metric(
+    graph: DiGraph | Graph,
+    property_name: str = PROPERTY_NAME_IMPORTANCE,
+    property_name_weighted_degree: str = PROPERTY_NAME_DEGREE_WEIGHTED,
+    property_name_betweenness: str = PROPERTY_NAME_BETWEENNESS_CENTRALITY,
+) -&gt; None:
+    &#34;&#34;&#34;Adds a custom importance metric as property to each node of the given graph.
+    Can be used to decide which nodes are of high importance and also to build node size
+    mappings.
+    Operation is performed inplace.
+
+    Parameters
+    ----------
+    graph : DiGraph | Graph
+        Graph with weighted degree as node property added inplace
+    property_name : str, optional
+        target name for the property containing the weighted degree in nodes,
+        by default PROPERTY_NAME_DEGREE_WEIGHTED
+    property_name_betweenness : str, optional
+        target name for the property containing the betweenness centrality in nodes,
+        by default PROPERTY_NAME_BETWEENNESS_CENTRALITY
+    &#34;&#34;&#34;
+    # build mapping for importance metric
+    node_property_mapping: dict[str, float] = {}
+    for node in cast(Iterable[str], graph.nodes):
+        node_data = cast(dict[str, float], graph.nodes[node])
+
+        if property_name_weighted_degree not in node_data:
+            raise NodePropertyNotContainedError(
+                (
+                    f&#39;Node data does not contain weighted degree &#39;
+                    f&#39;with name {property_name_weighted_degree}.&#39;
+                )
+            )
+        elif property_name_betweenness not in node_data:
+            raise NodePropertyNotContainedError(
+                (
+                    f&#39;Node data does not contain betweenness centrality &#39;
+                    f&#39;with name {property_name_betweenness}.&#39;
+                )
+            )
+
+        prio = node_data[property_name_weighted_degree] * node_data[property_name_betweenness]
+        node_property_mapping[node] = prio
+
+    nx.set_node_attributes(
+        graph,
+        node_property_mapping,
+        name=property_name,
+    )</code></pre>
+</details>
+<div class="desc"><p>Adds a custom importance metric as property to each node of the given graph.
+Can be used to decide which nodes are of high importance and also to build node size
+mappings.
+Operation is performed inplace.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code>DiGraph | Graph</code></dt>
+<dd>Graph with weighted degree as node property added inplace</dd>
+<dt><strong><code>property_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>target name for the property containing the weighted degree in nodes,
+by default PROPERTY_NAME_DEGREE_WEIGHTED</dd>
+<dt><strong><code>property_name_betweenness</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>target name for the property containing the betweenness centrality in nodes,
+by default PROPERTY_NAME_BETWEENNESS_CENTRALITY</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.add_weighted_degree"><code class="name flex">
+<span>def <span class="ident">add_weighted_degree</span></span>(<span>graph: DiGraph | Graph,<br>edge_weight_property: str = 'weight',<br>property_name: str = 'degree_weighted') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def add_weighted_degree(
+    graph: DiGraph | Graph,
+    edge_weight_property: str = &#39;weight&#39;,
+    property_name: str = PROPERTY_NAME_DEGREE_WEIGHTED,
+) -&gt; None:
+    &#34;&#34;&#34;adds the weighted degree as property to each node of the given graph
+    Operation is performed inplace.
+
+    Parameters
+    ----------
+    graph : DiGraph | Graph
+        Graph with weighted degree as node property added inplace
+    edge_weight_property : str, optional
+        property of the edges which contains the weight information, by default &#39;weight&#39;
+    property_name : str, optional
+        target name for the property containing the weighted degree in nodes,
+        by default PROPERTY_NAME_DEGREE_WEIGHTED
+    &#34;&#34;&#34;
+    node_property_mapping = cast(
+        dict[str, float],
+        dict(graph.degree(weight=edge_weight_property)),  # type: ignore
+    )
+    nx.set_node_attributes(
+        graph,
+        node_property_mapping,
+        name=property_name,
+    )</code></pre>
+</details>
+<div class="desc"><p>adds the weighted degree as property to each node of the given graph
+Operation is performed inplace.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code>DiGraph | Graph</code></dt>
+<dd>Graph with weighted degree as node property added inplace</dd>
+<dt><strong><code>edge_weight_property</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>property of the edges which contains the weight information, by default 'weight'</dd>
+<dt><strong><code>property_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>target name for the property containing the weighted degree in nodes,
+by default PROPERTY_NAME_DEGREE_WEIGHTED</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.convert_graph_to_cytoscape"><code class="name flex">
+<span>def <span class="ident">convert_graph_to_cytoscape</span></span>(<span>graph: Graph | DiGraph) ‑> tuple[list[lang_main.types.CytoscapeData], lang_main.types.WeightData]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def convert_graph_to_cytoscape(
+    graph: Graph | DiGraph,
+) -&gt; tuple[list[CytoscapeData], WeightData]:
+    cyto_data: list[CytoscapeData] = []
+    # iterate over nodes
+    nodes = cast(Iterable[NodeTitle], graph.nodes)
+    for node in nodes:
+        node_data: CytoscapeData = {
+            &#39;data&#39;: {
+                &#39;id&#39;: node,
+                &#39;label&#39;: node,
+            }
+        }
+        cyto_data.append(node_data)
+    # iterate over edges
+    weights: set[int] = set()
+
+    edges = cast(
+        Iterable[
+            tuple[
+                NodeTitle,
+                NodeTitle,
+                EdgeWeight,
+            ]
+        ],
+        graph.edges.data(&#39;weight&#39;, default=1),  # type: ignore
+    )
+    for source, target, weight in edges:
+        weights.add(weight)
+        edge_data: CytoscapeData = {
+            &#39;data&#39;: {
+                &#39;source&#39;: source,
+                &#39;target&#39;: target,
+                &#39;weight&#39;: weight,
+            }
+        }
+        cyto_data.append(edge_data)
+
+    # TODO: add internal behaviour (if edge added check for new min/max)
+    min_weight: int = 0
+    max_weight: int = 0
+    if weights:
+        min_weight = min(weights)
+        max_weight = max(weights)
+    weight_metadata: WeightData = {&#39;min&#39;: min_weight, &#39;max&#39;: max_weight}
+
+    return cyto_data, weight_metadata</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.convert_graph_to_undirected"><code class="name flex">
+<span>def <span class="ident">convert_graph_to_undirected</span></span>(<span>graph: DiGraph, logging: bool = False, cast_int: bool = False) ‑> networkx.classes.graph.Graph</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def convert_graph_to_undirected(
+    graph: DiGraph,
+    logging: bool = LOGGING_DEFAULT_GRAPHS,
+    cast_int: bool = False,
+) -&gt; Graph:
+    dtype = np.float32
+    if cast_int:
+        dtype = np.uint32
+    # get adjacency matrix
+    adj_mat = typing.cast(DataFrame, nx.to_pandas_adjacency(G=graph, dtype=dtype))
+    arr = typing.cast(npt.NDArray[np.float32 | np.uint32], adj_mat.to_numpy())
+    if not cast_int:
+        arr = arr * (10**EDGE_WEIGHT_DECIMALS)
+        arr = np.round(arr, decimals=0)
+        arr = arr.astype(np.uint32)
+    # build undirected array: adding edges of lower triangular matrix to upper one
+    arr_upper = np.triu(arr)
+    arr_lower = np.tril(arr)
+    arr_lower = np.rot90(np.fliplr(arr_lower))
+    arr_new = arr_upper + arr_lower
+    if not cast_int:
+        arr_new = (arr_new / 10**EDGE_WEIGHT_DECIMALS).astype(np.float32)
+        arr_new = np.round(arr_new, decimals=EDGE_WEIGHT_DECIMALS)
+    # assign new data and create graph
+    adj_mat.loc[:] = arr_new  # type: ignore
+    graph_undir = typing.cast(Graph, nx.from_pandas_adjacency(df=adj_mat))
+
+    # info about graph
+    if logging:
+        logger.info(&#39;Successfully converted graph to one with undirected edges.&#39;)
+    _ = get_graph_metadata(graph=graph_undir, logging=logging)
+
+    return graph_undir</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.filter_graph_by_edge_weight"><code class="name flex">
+<span>def <span class="ident">filter_graph_by_edge_weight</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>,<br>bound_lower: int | None,<br>bound_upper: int | None,<br>property: str = 'weight') ‑> <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def filter_graph_by_edge_weight(
+    graph: TokenGraph,
+    bound_lower: int | None,
+    bound_upper: int | None,
+    property: str = &#39;weight&#39;,
+) -&gt; TokenGraph:
+    &#34;&#34;&#34;filters all edges which are within the provided bounds
+    inclusive limits: bound_lower &lt;= edge_weight &lt;= bound_upper are retained
+
+    Parameters
+    ----------
+    bound_lower : int | None
+        lower bound for edge weights, edges with weight equal to this value are retained
+    bound_upper : int | None
+        upper bound for edge weights, edges with weight equal to this value are retained
+
+    Returns
+    -------
+    TokenGraph
+        a copy of the graph with filtered edges
+    &#34;&#34;&#34;
+    original_graph_edges = copy.deepcopy(graph.edges)
+    filtered_graph = graph.copy()
+
+    if not any((bound_lower, bound_upper)):
+        logger.warning(&#39;No bounds provided, returning original graph.&#39;)
+        return filtered_graph
+
+    for edge in original_graph_edges:
+        weight = typing.cast(int, filtered_graph[edge[0]][edge[1]][property])
+        if bound_lower is not None and weight &lt; bound_lower:
+            filtered_graph.remove_edge(edge[0], edge[1])
+        if bound_upper is not None and weight &gt; bound_upper:
+            filtered_graph.remove_edge(edge[0], edge[1])
+
+    filtered_graph.to_undirected(inplace=True, logging=False)
+    filtered_graph.update_metadata(logging=False)
+
+    return filtered_graph</code></pre>
+</details>
+<div class="desc"><p>filters all edges which are within the provided bounds
+inclusive limits: bound_lower &lt;= edge_weight &lt;= bound_upper are retained</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>bound_lower</code></strong> :&ensp;<code>int | None</code></dt>
+<dd>lower bound for edge weights, edges with weight equal to this value are retained</dd>
+<dt><strong><code>bound_upper</code></strong> :&ensp;<code>int | None</code></dt>
+<dd>upper bound for edge weights, edges with weight equal to this value are retained</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code><a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></code></dt>
+<dd>a copy of the graph with filtered edges</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.filter_graph_by_node_degree"><code class="name flex">
+<span>def <span class="ident">filter_graph_by_node_degree</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>,<br>bound_lower: int | None,<br>bound_upper: int | None) ‑> <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def filter_graph_by_node_degree(
+    graph: TokenGraph,
+    bound_lower: int | None,
+    bound_upper: int | None,
+) -&gt; TokenGraph:
+    &#34;&#34;&#34;filters all nodes which are within the provided bounds by their degree,
+    inclusive limits: bound_lower &lt;= node_degree &lt;= bound_upper are retained
+
+    Parameters
+    ----------
+    bound_lower : int | None
+        lower bound for node degree, nodes with degree equal to this value are retained
+    bound_upper : int | None
+        upper bound for node degree, nodes with degree equal to this value are retained
+
+    Returns
+    -------
+    TokenGraph
+        a copy of the graph with filtered nodes
+    &#34;&#34;&#34;
+    # filter nodes by degree
+    original_graph_nodes = copy.deepcopy(graph.nodes)
+    filtered_graph = graph.copy()
+    filtered_graph_degree = copy.deepcopy(filtered_graph.degree)
+
+    if not any([bound_lower, bound_upper]):
+        logger.warning(&#39;No bounds provided, returning original graph.&#39;)
+        return filtered_graph
+
+    for node in original_graph_nodes:
+        degree = cast(int, filtered_graph_degree[node])  # type: ignore
+        if bound_lower is not None and degree &lt; bound_lower:
+            filtered_graph.remove_node(node)
+        if bound_upper is not None and degree &gt; bound_upper:
+            filtered_graph.remove_node(node)
+
+    filtered_graph.to_undirected(inplace=True, logging=False)
+    filtered_graph.update_metadata(logging=False)
+
+    return filtered_graph</code></pre>
+</details>
+<div class="desc"><p>filters all nodes which are within the provided bounds by their degree,
+inclusive limits: bound_lower &lt;= node_degree &lt;= bound_upper are retained</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>bound_lower</code></strong> :&ensp;<code>int | None</code></dt>
+<dd>lower bound for node degree, nodes with degree equal to this value are retained</dd>
+<dt><strong><code>bound_upper</code></strong> :&ensp;<code>int | None</code></dt>
+<dd>upper bound for node degree, nodes with degree equal to this value are retained</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code><a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></code></dt>
+<dd>a copy of the graph with filtered nodes</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.filter_graph_by_number_edges"><code class="name flex">
+<span>def <span class="ident">filter_graph_by_number_edges</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>,<br>limit: int | None,<br>property: str = 'weight',<br>descending: bool = True) ‑> <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def filter_graph_by_number_edges(
+    graph: TokenGraph,
+    limit: int | None,
+    property: str = &#39;weight&#39;,
+    descending: bool = True,
+) -&gt; TokenGraph:
+    graph = graph.copy()
+    # edges
+    original = set(graph.edges(data=property))  # type: ignore
+    original_sorted = sorted(original, key=lambda tup: tup[2], reverse=descending)
+    if limit is not None:
+        chosen = set(original_sorted[:limit])
+    else:
+        chosen = set(original_sorted)
+    edges_to_drop = original.difference(chosen)
+    graph.remove_edges_from(edges_to_drop)
+
+    return graph</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.get_graph_metadata"><code class="name flex">
+<span>def <span class="ident">get_graph_metadata</span></span>(<span>graph: Graph | DiGraph, logging: bool = False) ‑> dict[str, float]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_graph_metadata(
+    graph: Graph | DiGraph,
+    logging: bool = LOGGING_DEFAULT_GRAPHS,
+) -&gt; dict[str, float]:
+    # info about graph
+    graph_info: dict[str, float] = {}
+    # nodes and edges
+    num_nodes = len(graph.nodes)
+    num_edges = len(graph.edges)
+    # edge weights
+    min_edge_weight: int = 1_000_000
+    max_edge_weight: int = 0
+    for edge in graph.edges:
+        weight = typing.cast(int, graph[edge[0]][edge[1]][&#39;weight&#39;])
+        if weight &lt; min_edge_weight:
+            min_edge_weight = weight
+        if weight &gt; max_edge_weight:
+            max_edge_weight = weight
+
+    # memory
+    edge_mem = sum([sys.getsizeof(e) for e in graph.edges])
+    node_mem = sum([sys.getsizeof(n) for n in graph.nodes])
+    total_mem = edge_mem + node_mem
+
+    graph_info.update(
+        num_nodes=num_nodes,
+        num_edges=num_edges,
+        min_edge_weight=min_edge_weight,
+        max_edge_weight=max_edge_weight,
+        node_memory=node_mem,
+        edge_memory=edge_mem,
+        total_memory=total_mem,
+    )
+
+    if logging:
+        logger.info(&#39;Graph properties: %d Nodes, %d Edges&#39;, num_nodes, num_edges)
+        logger.info(&#39;Node memory: %.2f KB&#39;, (node_mem / 1024))
+        logger.info(&#39;Edge memory: %.2f KB&#39;, (edge_mem / 1024))
+        logger.info(&#39;Total memory: %.2f KB&#39;, (total_mem / 1024))
+
+    return graph_info</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.normalise_array_linear"><code class="name flex">
+<span>def <span class="ident">normalise_array_linear</span></span>(<span>array: npt.NDArray[np.float32]) ‑> numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def normalise_array_linear(
+    array: npt.NDArray[np.float32],
+) -&gt; npt.NDArray[np.float32]:
+    &#34;&#34;&#34;apply standard linear normalisation
+
+    Parameters
+    ----------
+    array : npt.NDArray[np.float_]
+        array which shall be normalised
+
+    Returns
+    -------
+    npt.NDArray[np.float32]
+        min/max normalised array
+    &#34;&#34;&#34;
+    div = array.max() - array.min()
+    if div != 0:
+        arr_norm = (array - array.min()) / div
+        return arr_norm.astype(np.float32)
+    else:
+        return np.zeros(shape=array.shape, dtype=np.float32)</code></pre>
+</details>
+<div class="desc"><p>apply standard linear normalisation</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>array</code></strong> :&ensp;<code>npt.NDArray[np.float_]</code></dt>
+<dd>array which shall be normalised</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>npt.NDArray[np.float32]</code></dt>
+<dd>min/max normalised array</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.pipe_add_graph_metrics"><code class="name flex">
+<span>def <span class="ident">pipe_add_graph_metrics</span></span>(<span>*graphs: DiGraph | Graph) ‑> tuple[networkx.classes.digraph.DiGraph | networkx.classes.graph.Graph, ...]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def pipe_add_graph_metrics(
+    *graphs: DiGraph | Graph,
+) -&gt; tuple[DiGraph | Graph, ...]:
+    collection: list[DiGraph | Graph] = []
+    for graph in graphs:
+        graph_copy = copy.deepcopy(graph)
+        add_weighted_degree(graph_copy)
+        add_betweenness_centrality(graph_copy)
+        add_importance_metric(graph_copy)
+        collection.append(graph_copy)
+
+    return tuple(collection)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.pipe_rescale_graph_edge_weights"><code class="name flex">
+<span>def <span class="ident">pipe_rescale_graph_edge_weights</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>) ‑> tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>, networkx.classes.graph.Graph]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def pipe_rescale_graph_edge_weights(
+    graph: TokenGraph,
+) -&gt; tuple[TokenGraph, Graph]:
+    &#34;&#34;&#34;helper function to allow calls in pipelines
+
+    Parameters
+    ----------
+    graph : TokenGraph
+        token graph pushed through pipeline
+
+    Returns
+    -------
+    tuple[TokenGraph, Graph]
+        token graph (directed) and undirected version with rescaled edge weights
+    &#34;&#34;&#34;
+    graph = graph.copy()
+
+    return graph.rescale_edge_weights()</code></pre>
+</details>
+<div class="desc"><p>helper function to allow calls in pipelines</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code><a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></code></dt>
+<dd>token graph pushed through pipeline</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>, Graph]</code></dt>
+<dd>token graph (directed) and undirected version with rescaled edge weights</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.rescale_edge_weights"><code class="name flex">
+<span>def <span class="ident">rescale_edge_weights</span></span>(<span>graph: Graph | DiGraph | <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>,<br>weight_property: str = 'weight') ‑> networkx.classes.graph.Graph | networkx.classes.digraph.DiGraph | <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def rescale_edge_weights(
+    graph: Graph | DiGraph | TokenGraph,
+    weight_property: str = &#39;weight&#39;,
+) -&gt; Graph | DiGraph | TokenGraph:
+    graph = graph.copy()
+    # check non-emptiness
+    verify_non_empty_graph(graph, including_edges=True)
+    # check if all edges contain weight property
+    verify_property(graph, property=weight_property)
+
+    weights = cast(list[int], [data[&#39;weight&#39;] for data in graph.edges.values()])
+    w_log = cast(npt.NDArray[np.float32], np.log(weights, dtype=np.float32))
+    weights_norm = normalise_array_linear(w_log)
+    weights_adjusted = weight_scaling(weights_norm)
+    # assign new weight values
+    for idx, (node_1, node_2) in enumerate(graph.edges):
+        graph[node_1][node_2][&#39;weight&#39;] = weights_adjusted[idx]
+
+    return graph</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.save_to_GraphML"><code class="name flex">
+<span>def <span class="ident">save_to_GraphML</span></span>(<span>graph: DiGraph | Graph, saving_path: Path, filename: str | None = None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def save_to_GraphML(
+    graph: DiGraph | Graph,
+    saving_path: Path,
+    filename: str | None = None,
+) -&gt; None:
+    if filename is not None:
+        saving_path = saving_path.joinpath(filename)
+    saving_path = saving_path.with_suffix(&#39;.graphml&#39;)
+    nx.write_graphml(G=graph, path=saving_path)
+    logger.info(&#39;Successfully saved graph as GraphML file under %s.&#39;, saving_path)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.static_graph_analysis"><code class="name flex">
+<span>def <span class="ident">static_graph_analysis</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>) ‑> tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def static_graph_analysis(
+    graph: TokenGraph,
+) -&gt; tuple[TokenGraph]:
+    &#34;&#34;&#34;helper function to allow the calculation of static metrics in pipelines
+
+    Parameters
+    ----------
+    tk_graph_directed : TokenGraph
+        token graph (directed)
+
+    Returns
+    -------
+    tuple[TokenGraph]
+        token graph (directed) with included undirected version and calculated KPIs
+    &#34;&#34;&#34;
+    graph = graph.copy()
+    graph.perform_static_analysis()
+
+    return (graph,)</code></pre>
+</details>
+<div class="desc"><p>helper function to allow the calculation of static metrics in pipelines</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>tk_graph_directed</code></strong> :&ensp;<code><a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></code></dt>
+<dd>token graph (directed)</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>]</code></dt>
+<dd>token graph (directed) with included undirected version and calculated KPIs</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.update_graph"><code class="name flex">
+<span>def <span class="ident">update_graph</span></span>(<span>graph: Graph | DiGraph,<br>*,<br>batch: Iterable[tuple[Hashable, Hashable]] | None = None,<br>parent: Hashable | None = None,<br>child: Hashable | None = None,<br>weight_connection: int | None = None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def update_graph(
+    graph: Graph | DiGraph,
+    *,
+    batch: Iterable[tuple[Hashable, Hashable]] | None = None,
+    parent: Hashable | None = None,
+    child: Hashable | None = None,
+    weight_connection: int | None = None,
+) -&gt; None:
+    if weight_connection is None:
+        weight_connection = 1
+    # check if edge not in Graph
+    if batch is not None:
+        graph.add_edges_from(batch, weight=weight_connection)
+    elif not graph.has_edge(parent, child):
+        # create new edge, nodes will be created if not already present
+        graph.add_edge(parent, child, weight=weight_connection)
+    else:
+        # update edge
+        graph[parent][child][&#39;weight&#39;] += weight_connection</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.verify_non_empty_graph"><code class="name flex">
+<span>def <span class="ident">verify_non_empty_graph</span></span>(<span>graph: DiGraph | Graph, including_edges: bool = True) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def verify_non_empty_graph(
+    graph: DiGraph | Graph,
+    including_edges: bool = True,
+) -&gt; None:
+    &#34;&#34;&#34;check if the given graph is empty, presence of nodes is checked first,
+    then of edges
+
+    Parameters
+    ----------
+    graph : DiGraph | Graph
+        graph to check for emptiness
+    including_edges : bool, optional
+        whether to check for non-existence of edges, by default True
+
+    Raises
+    ------
+    EmptyGraphError
+        if graph does not contain any nodes and therefore edges
+    EmptyEdgesError
+        if graph does not contain any edges
+    &#34;&#34;&#34;
+    if not tuple(graph.nodes):
+        raise EmptyGraphError(f&#39;Graph object &gt;&gt;{graph}&lt;&lt; does not contain any nodes.&#39;)
+    elif including_edges and not tuple(graph.edges):
+        raise EmptyEdgesError(f&#39;Graph object &gt;&gt;{graph}&lt;&lt; does not contain any edges.&#39;)</code></pre>
+</details>
+<div class="desc"><p>check if the given graph is empty, presence of nodes is checked first,
+then of edges</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code>DiGraph | Graph</code></dt>
+<dd>graph to check for emptiness</dd>
+<dt><strong><code>including_edges</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>whether to check for non-existence of edges, by default True</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>EmptyGraphError</code></dt>
+<dd>if graph does not contain any nodes and therefore edges</dd>
+<dt><code>EmptyEdgesError</code></dt>
+<dd>if graph does not contain any edges</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.verify_property"><code class="name flex">
+<span>def <span class="ident">verify_property</span></span>(<span>graph: Graph | DiGraph, property: str) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def verify_property(
+    graph: Graph | DiGraph,
+    property: str,
+) -&gt; None:
+    for node_1, node_2 in graph.edges:
+        if property not in graph[node_1][node_2]:
+            raise EdgePropertyNotContainedError(
+                (
+                    f&#39;Edge property &gt;&gt;{property}&lt;&lt; not &#39;
+                    f&#39;available for edge &gt;&gt;({node_1}, {node_2})&lt;&lt;&#39;
+                )
+            )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.weight_scaling"><code class="name flex">
+<span>def <span class="ident">weight_scaling</span></span>(<span>weights: npt.NDArray[np.float32], a: float = 1.1, b: float = 0.05) ‑> numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def weight_scaling(
+    weights: npt.NDArray[np.float32],
+    a: float = 1.1,
+    b: float = 0.05,
+) -&gt; npt.NDArray[np.float32]:
+    &#34;&#34;&#34;non-linear scaling of already normalised edge weights [0;1]: bigger weights
+    have smaller weight delta than smaller weights. Bigger values für parameter
+    `b` reinforce this effect.
+    Based on:
+    https://math.stackexchange.com/questions/4297805/exponential-function-that-passes-through-0-0-and-1-1-with-variable-slope
+
+    With default values the range of edge weights lies approximately between [0.1; 1]
+
+    Parameters
+    ----------
+    weights : npt.NDArray[np.float32]
+        pre-normalised edge weights as 1D array
+    a : float, optional
+        factor to determine the value for edge weights with value 0
+        with default approx. 0.1, by default 1.1
+    b : float, optional
+        adjust the curvature, smaller values increase it, by default 0.05
+
+    Returns
+    -------
+    npt.NDArray[np.float32]
+        non-linear adjusted edge weights as 1D array
+    &#34;&#34;&#34;
+    adjusted_weights = (b**weights - a) / (b - a)
+
+    return np.round(adjusted_weights, decimals=EDGE_WEIGHT_DECIMALS)</code></pre>
+</details>
+<div class="desc"><p>non-linear scaling of already normalised edge weights [0;1]: bigger weights
+have smaller weight delta than smaller weights. Bigger values für parameter
+<code>b</code> reinforce this effect.
+Based on:
+<a href="https://math.stackexchange.com/questions/4297805/exponential-function-that-passes-through-0-0-and-1-1-with-variable-slope">https://math.stackexchange.com/questions/4297805/exponential-function-that-passes-through-0-0-and-1-1-with-variable-slope</a></p>
+<p>With default values the range of edge weights lies approximately between [0.1; 1]</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>weights</code></strong> :&ensp;<code>npt.NDArray[np.float32]</code></dt>
+<dd>pre-normalised edge weights as 1D array</dd>
+<dt><strong><code>a</code></strong> :&ensp;<code>float</code>, optional</dt>
+<dd>factor to determine the value for edge weights with value 0
+with default approx. 0.1, by default 1.1</dd>
+<dt><strong><code>b</code></strong> :&ensp;<code>float</code>, optional</dt>
+<dd>adjust the curvature, smaller values increase it, by default 0.05</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>npt.NDArray[np.float32]</code></dt>
+<dd>non-linear adjusted edge weights as 1D array</dd>
+</dl></div>
+</dd>
+</dl>
+</section>
+<section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="lang_main.analysis.graphs.TokenGraph"><code class="flex name class">
+<span>class <span class="ident">TokenGraph</span></span>
+<span>(</span><span>name: str = 'TokenGraph',<br>enable_logging: bool = True,<br>incoming_graph_data: Any | None = None,<br>**attr)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class TokenGraph(DiGraph):
+    def __init__(
+        self,
+        name: str = &#39;TokenGraph&#39;,
+        enable_logging: bool = True,
+        incoming_graph_data: Any | None = None,
+        **attr,
+    ) -&gt; None:
+        super().__init__(incoming_graph_data, **attr)
+        # logging of different actions
+        self.logging = enable_logging
+        # properties
+        self._name = name
+        # directed and undirected graph data
+        self._directed = self
+        self._metadata_directed: dict[str, float] = {}
+        self._undirected: Graph | None = None
+        self._metadata_undirected: dict[str, float] = {}
+        # indicate rescaled weights
+        self.rescaled_weights: bool = False
+
+    def __repr__(self) -&gt; str:
+        return self.__str__()
+
+    def __str__(self) -&gt; str:
+        return (
+            f&#39;TokenGraph(name: {self.name}, number of nodes: &#39;
+            f&#39;{len(self.nodes)}, number of edges: &#39;
+            f&#39;{len(self.edges)})&#39;
+        )
+
+    def disable_logging(self) -&gt; None:
+        self.logging = False
+
+    # !! only used to verify that saving was done correctly
+    &#34;&#34;&#34;
+    def __key(self) -&gt; tuple[Hashable, ...]:
+        return (self.name, tuple(self.nodes), tuple(self.edges))
+    
+    def __hash__(self) -&gt; int:
+        return hash(self.__key())
+    &#34;&#34;&#34;
+
+    def copy(self) -&gt; Self:
+        &#34;&#34;&#34;returns a (deep) copy of the graph
+
+        Returns
+        -------
+        Self
+            deep copy of the graph
+        &#34;&#34;&#34;
+        return copy.deepcopy(self)
+
+    @property
+    def name(self) -&gt; str:
+        return self._name
+
+    @property
+    def directed(self) -&gt; Self:
+        return self._directed
+
+    @property
+    def undirected(self) -&gt; Graph:
+        if self._undirected is None:
+            self._undirected = self.to_undirected(inplace=False, logging=False)
+
+        return self._undirected
+
+    @property
+    def metadata_directed(self) -&gt; dict[str, float]:
+        return self._metadata_directed
+
+    @property
+    def metadata_undirected(self) -&gt; dict[str, float]:
+        return self._metadata_undirected
+
+    @overload
+    def to_undirected(
+        self,
+        inplace: Literal[True] = ...,
+        logging: bool | None = ...,
+    ) -&gt; None: ...
+
+    @overload
+    def to_undirected(
+        self,
+        inplace: Literal[False],
+        logging: bool | None = ...,
+    ) -&gt; Graph: ...
+
+    def to_undirected(
+        self,
+        inplace: bool = True,
+        logging: bool | None = None,
+    ) -&gt; Graph | None:
+        if logging is None:
+            logging = self.logging
+        # cast to integer edge weights only if edges were not rescaled previously
+        cast_int: bool = True
+        if self.rescaled_weights:
+            cast_int = False
+
+        self._undirected = convert_graph_to_undirected(
+            graph=self,
+            logging=logging,
+            cast_int=cast_int,
+        )
+        self._metadata_undirected = get_graph_metadata(graph=self._undirected, logging=False)
+        if not inplace:
+            return self._undirected
+
+    def update_metadata(
+        self,
+        logging: bool | None = None,
+    ) -&gt; None:
+        if logging is None:
+            logging = self.logging
+
+        self._metadata_directed = get_graph_metadata(graph=self, logging=logging)
+        if self._undirected is not None:
+            self._metadata_undirected = get_graph_metadata(
+                graph=self._undirected, logging=logging
+            )
+
+    def rescale_edge_weights(
+        self,
+    ) -&gt; tuple[TokenGraph, Graph]:
+        &#34;&#34;&#34;generate new instances of the directed and undirected TokenGraph with
+        rescaled edge weights
+        Only this method ensures that undirected graphs are scaled properly. If
+        the underlying `to_undirected` method of the directed and rescaled
+        TokenGraph instance is called the weights are not rescaled again. Thus,
+        the maximum edge weight can exceed the theoretical maximum value of 1. To
+        ensure consistent behaviour across different applications of the conversion to
+        undirected graphs new instances are returned, especially for the undirected
+        graph.
+        In contrast, the new directed TokenGraph contains an undirected version without
+        rescaling of the weights. Therefore, this undirected version differs from the version
+        returned by this method.
+
+        Returns
+        -------
+        tuple[TokenGraph, Graph]
+            directed and undirected instances
+        &#34;&#34;&#34;
+        self.to_undirected(inplace=True, logging=False)
+        token_graph = rescale_edge_weights(self.directed)
+        token_graph.rescaled_weights = True
+        token_graph.update_metadata(logging=False)
+        undirected = rescale_edge_weights(self.undirected)
+
+        return token_graph, undirected
+
+    def perform_static_analysis(self) -&gt; None:
+        &#34;&#34;&#34;calculate different metrics directly on the data of the underlying graphs
+        (directed and undirected)
+
+        Current operations:
+            - adding weighted degree
+        &#34;&#34;&#34;
+        add_weighted_degree(self)
+        add_weighted_degree(self.undirected)
+
+    def _save_prepare(
+        self,
+        path: Path,
+        filename: str | None = None,
+    ) -&gt; Path:
+        if filename is not None:
+            saving_path = path.joinpath(f&#39;{filename}&#39;)
+        else:
+            saving_path = path.joinpath(f&#39;{self.name}&#39;)
+
+        return saving_path
+
+    def to_GraphML(
+        self,
+        path: Path,
+        filename: str | None = None,
+        directed: bool = False,
+    ) -&gt; None:
+        &#34;&#34;&#34;save one of the stored graphs to GraphML format on disk,
+
+        Parameters
+        ----------
+        path : Path
+            target path for saving the file
+        filename : str | None, optional
+            filename to be given, by default None
+        directed : bool, optional
+            indicator whether directed or undirected graph
+            should be exported, by default False (undirected)
+
+        Raises
+        ------
+        ValueError
+            undirected graph should be exported but is not available
+        &#34;&#34;&#34;
+        saving_path = self._save_prepare(path=path, filename=filename)
+
+        if directed:
+            target_graph = self.directed
+        else:
+            target_graph = self.undirected
+
+        save_to_GraphML(graph=target_graph, saving_path=saving_path)
+
+    def to_pickle(
+        self,
+        path: Path,
+        filename: str | None = None,
+    ) -&gt; None:
+        &#34;&#34;&#34;save whole TokenGraph object as pickle file
+
+        Parameters
+        ----------
+        path : Path
+            target path for saving the file
+        filename : str | None, optional
+            filename to be given, by default None
+        &#34;&#34;&#34;
+        saving_path = self._save_prepare(path=path, filename=filename)
+        saving_path = saving_path.with_suffix(&#39;.pkl&#39;)
+        save_pickle(obj=self, path=saving_path)
+
+    @classmethod
+    def from_file(
+        cls,
+        path: Path,
+        node_type_graphml: type = str,
+    ) -&gt; Self:
+        # !! no validity checks for pickle files
+        # !! GraphML files not correct because not all properties
+        # !! are parsed correctly
+        # TODO REWORK
+        match path.suffix:
+            case &#39;.graphml&#39;:
+                graph = typing.cast(Self, nx.read_graphml(path, node_type=node_type_graphml))
+                logger.info(&#39;Successfully loaded graph from GraphML file %s.&#39;, path)
+            case &#39;.pkl&#39; | &#39;.pickle&#39;:
+                graph = typing.cast(Self, load_pickle(path))
+                logger.info(&#39;Successfully loaded graph from pickle file %s.&#39;, path)
+            case _:
+                raise ValueError(&#39;File format not supported.&#39;)
+
+        return graph</code></pre>
+</details>
+<div class="desc"><p>Base class for directed graphs.</p>
+<p>A DiGraph stores nodes and edges with optional data, or attributes.</p>
+<p>DiGraphs hold directed edges.
+Self loops are allowed but multiple
+(parallel) edges are not.</p>
+<p>Nodes can be arbitrary (hashable) Python objects with optional
+key/value attributes. By convention <code>None</code> is not used as a node.</p>
+<p>Edges are represented as links between nodes with optional
+key/value attributes.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>incoming_graph_data</code></strong> :&ensp;<code>input graph (optional</code>, default<code>: None)</code></dt>
+<dd>Data to initialize graph. If None (default) an empty
+graph is created.
+The data can be any format that is supported
+by the to_networkx_graph() function, currently including edge list,
+dict of dicts, dict of lists, NetworkX graph, 2D NumPy array, SciPy
+sparse matrix, or PyGraphviz graph.</dd>
+<dt><strong><code>attr</code></strong> :&ensp;<code>keyword arguments</code>, optional <code>(default= no attributes)</code></dt>
+<dd>Attributes to add to graph as key=value pairs.</dd>
+</dl>
+<h2 id="see-also">See Also</h2>
+<p><code>Graph</code>
+<code>MultiGraph</code>
+<code>MultiDiGraph</code></p>
+<h2 id="examples">Examples</h2>
+<p>Create an empty graph structure (a "null graph") with no nodes and
+no edges.</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G = nx.DiGraph()
+</code></pre>
+<p>G can be grown in several ways.</p>
+<p><strong>Nodes:</strong></p>
+<p>Add one node at a time:</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_node(1)
+</code></pre>
+<p>Add the nodes from any container (a list, dict, set or
+even the lines from a file or the nodes from another graph).</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_nodes_from([2, 3])
+&gt;&gt;&gt; G.add_nodes_from(range(100, 110))
+&gt;&gt;&gt; H = nx.path_graph(10)
+&gt;&gt;&gt; G.add_nodes_from(H)
+</code></pre>
+<p>In addition to strings and integers any hashable Python object
+(except None) can represent a node, e.g. a customized node object,
+or even another Graph.</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_node(H)
+</code></pre>
+<p><strong>Edges:</strong></p>
+<p>G can also be grown by adding edges.</p>
+<p>Add one edge,</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_edge(1, 2)
+</code></pre>
+<p>a list of edges,</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_edges_from([(1, 2), (1, 3)])
+</code></pre>
+<p>or a collection of edges,</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_edges_from(H.edges)
+</code></pre>
+<p>If some edges connect nodes not yet in the graph, the nodes
+are added automatically.
+There are no errors when adding
+nodes or edges that already exist.</p>
+<p><strong>Attributes:</strong></p>
+<p>Each graph, node, and edge can hold key/value attribute pairs
+in an associated attribute dictionary (the keys must be hashable).
+By default these are empty, but can be added or changed using
+add_edge, add_node or direct manipulation of the attribute
+dictionaries named graph, node and edge respectively.</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G = nx.DiGraph(day=&quot;Friday&quot;)
+&gt;&gt;&gt; G.graph
+{'day': 'Friday'}
+</code></pre>
+<p>Add node attributes using add_node(), add_nodes_from() or G.nodes</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_node(1, time=&quot;5pm&quot;)
+&gt;&gt;&gt; G.add_nodes_from([3], time=&quot;2pm&quot;)
+&gt;&gt;&gt; G.nodes[1]
+{'time': '5pm'}
+&gt;&gt;&gt; G.nodes[1][&quot;room&quot;] = 714
+&gt;&gt;&gt; del G.nodes[1][&quot;room&quot;]  # remove attribute
+&gt;&gt;&gt; list(G.nodes(data=True))
+[(1, {'time': '5pm'}), (3, {'time': '2pm'})]
+</code></pre>
+<p>Add edge attributes using add_edge(), add_edges_from(), subscript
+notation, or G.edges.</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G.add_edge(1, 2, weight=4.7)
+&gt;&gt;&gt; G.add_edges_from([(3, 4), (4, 5)], color=&quot;red&quot;)
+&gt;&gt;&gt; G.add_edges_from([(1, 2, {&quot;color&quot;: &quot;blue&quot;}), (2, 3, {&quot;weight&quot;: 8})])
+&gt;&gt;&gt; G[1][2][&quot;weight&quot;] = 4.7
+&gt;&gt;&gt; G.edges[1, 2][&quot;weight&quot;] = 4
+</code></pre>
+<p>Warning: we protect the graph data structure by making <code>G.edges[1, 2]</code> a
+read-only dict-like structure. However, you can assign to attributes
+in e.g. <code>G.edges[1, 2]</code>. Thus, use 2 sets of brackets to add/change
+data attributes: <code>G.edges[1, 2]['weight'] = 4</code>
+(For multigraphs: <code>MG.edges[u, v, key][name] = value</code>).</p>
+<p><strong>Shortcuts:</strong></p>
+<p>Many common graph features allow python syntax to speed reporting.</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; 1 in G  # check if node in graph
+True
+&gt;&gt;&gt; [n for n in G if n &lt; 3]  # iterate through nodes
+[1, 2]
+&gt;&gt;&gt; len(G)  # number of nodes in graph
+5
+</code></pre>
+<p>Often the best way to traverse all edges of a graph is via the neighbors.
+The neighbors are reported as an adjacency-dict <code>G.adj</code> or <code>G.adjacency()</code></p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; for n, nbrsdict in G.adjacency():
+...     for nbr, eattr in nbrsdict.items():
+...         if &quot;weight&quot; in eattr:
+...             # Do something useful with the edges
+...             pass
+</code></pre>
+<p>But the edges reporting object is often more convenient:</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; for u, v, weight in G.edges(data=&quot;weight&quot;):
+...     if weight is not None:
+...         # Do something useful with the edges
+...         pass
+</code></pre>
+<p><strong>Reporting:</strong></p>
+<p>Simple graph information is obtained using object-attributes and methods.
+Reporting usually provides views instead of containers to reduce memory
+usage. The views update as the graph is updated similarly to dict-views.
+The objects <code>nodes</code>, <code>edges</code> and <code>adj</code> provide access to data attributes
+via lookup (e.g. <code>nodes[n]</code>, <code>edges[u, v]</code>, <code>adj[u][v]</code>) and iteration
+(e.g. <code>nodes.items()</code>, <code>nodes.data('color')</code>,
+<code>nodes.data('color', default='blue')</code> and similarly for <code>edges</code>)
+Views exist for <code>nodes</code>, <code>edges</code>, <code>neighbors()</code>/<code>adj</code> and <code>degree</code>.</p>
+<p>For details on these and other miscellaneous methods, see below.</p>
+<p><strong>Subclasses (Advanced):</strong></p>
+<p>The Graph class uses a dict-of-dict-of-dict data structure.
+The outer dict (node_dict) holds adjacency information keyed by node.
+The next dict (adjlist_dict) represents the adjacency information and holds
+edge data keyed by neighbor.
+The inner dict (edge_attr_dict) represents
+the edge data and holds edge attribute values keyed by attribute names.</p>
+<p>Each of these three dicts can be replaced in a subclass by a user defined
+dict-like object. In general, the dict-like features should be
+maintained but extra features can be added. To replace one of the
+dicts create a new graph class by changing the class(!) variable
+holding the factory for that dict-like structure. The variable names are
+node_dict_factory, node_attr_dict_factory, adjlist_inner_dict_factory,
+adjlist_outer_dict_factory, edge_attr_dict_factory and graph_attr_dict_factory.</p>
+<p>node_dict_factory : function, (default: dict)
+Factory function to be used to create the dict containing node
+attributes, keyed by node id.
+It should require no arguments and return a dict-like object</p>
+<p>node_attr_dict_factory: function, (default: dict)
+Factory function to be used to create the node attribute
+dict which holds attribute values keyed by attribute name.
+It should require no arguments and return a dict-like object</p>
+<p>adjlist_outer_dict_factory : function, (default: dict)
+Factory function to be used to create the outer-most dict
+in the data structure that holds adjacency info keyed by node.
+It should require no arguments and return a dict-like object.</p>
+<p>adjlist_inner_dict_factory : function, optional (default: dict)
+Factory function to be used to create the adjacency list
+dict which holds edge data keyed by neighbor.
+It should require no arguments and return a dict-like object</p>
+<p>edge_attr_dict_factory : function, optional (default: dict)
+Factory function to be used to create the edge attribute
+dict which holds attribute values keyed by attribute name.
+It should require no arguments and return a dict-like object.</p>
+<p>graph_attr_dict_factory : function, (default: dict)
+Factory function to be used to create the graph attribute
+dict which holds attribute values keyed by attribute name.
+It should require no arguments and return a dict-like object.</p>
+<p>Typically, if your extension doesn't impact the data structure all
+methods will inherited without issue except: <code>to_directed/to_undirected</code>.
+By default these methods create a DiGraph/Graph class and you probably
+want them to create your extension of a DiGraph/Graph. To facilitate
+this we define two class variables that you can set in your subclass.</p>
+<p>to_directed_class : callable, (default: DiGraph or MultiDiGraph)
+Class to create a new graph structure in the <code>to_directed</code> method.
+If <code>None</code>, a NetworkX class (DiGraph or MultiDiGraph) is used.</p>
+<p>to_undirected_class : callable, (default: Graph or MultiGraph)
+Class to create a new graph structure in the <code>to_undirected</code> method.
+If <code>None</code>, a NetworkX class (Graph or MultiGraph) is used.</p>
+<p><strong>Subclassing Example</strong></p>
+<p>Create a low memory graph class that effectively disallows edge
+attributes by using a single attribute dict for all edges.
+This reduces the memory used, but you lose edge attributes.</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; class ThinGraph(nx.Graph):
+...     all_edge_dict = {&quot;weight&quot;: 1}
+...
+...     def single_edge_dict(self):
+...         return self.all_edge_dict
+...
+...     edge_attr_dict_factory = single_edge_dict
+&gt;&gt;&gt; G = ThinGraph()
+&gt;&gt;&gt; G.add_edge(2, 1)
+&gt;&gt;&gt; G[2][1]
+{'weight': 1}
+&gt;&gt;&gt; G.add_edge(2, 2)
+&gt;&gt;&gt; G[2][1] is G[2][2]
+True
+</code></pre>
+<p>Initialize a graph with edges, name, or graph attributes.</p>
+<h2 id="parameters_1">Parameters</h2>
+<dl>
+<dt><strong><code>incoming_graph_data</code></strong> :&ensp;<code>input graph (optional</code>, default<code>: None)</code></dt>
+<dd>Data to initialize graph.
+If None (default) an empty
+graph is created.
+The data can be an edge list, or any
+NetworkX graph object.
+If the corresponding optional Python
+packages are installed the data can also be a 2D NumPy array, a
+SciPy sparse array, or a PyGraphviz graph.</dd>
+<dt><strong><code>attr</code></strong> :&ensp;<code>keyword arguments</code>, optional <code>(default= no attributes)</code></dt>
+<dd>Attributes to add to graph as key=value pairs.</dd>
+</dl>
+<h2 id="see-also_1">See Also</h2>
+<p><code>convert</code></p>
+<h2 id="examples_1">Examples</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G = nx.Graph()  # or DiGraph, MultiGraph, MultiDiGraph, etc
+&gt;&gt;&gt; G = nx.Graph(name=&quot;my graph&quot;)
+&gt;&gt;&gt; e = [(1, 2), (2, 3), (3, 4)]  # list of edges
+&gt;&gt;&gt; G = nx.Graph(e)
+</code></pre>
+<p>Arbitrary graph attribute pairs (key=value) may be assigned</p>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G = nx.Graph(e, day=&quot;Friday&quot;)
+&gt;&gt;&gt; G.graph
+{'day': 'Friday'}
+</code></pre></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>networkx.classes.digraph.DiGraph</li>
+<li>networkx.classes.graph.Graph</li>
+</ul>
+<h3>Static methods</h3>
+<dl>
+<dt id="lang_main.analysis.graphs.TokenGraph.from_file"><code class="name flex">
+<span>def <span class="ident">from_file</span></span>(<span>path: Path, node_type_graphml: type = builtins.str) ‑> Self</span>
+</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+<h3>Instance variables</h3>
+<dl>
+<dt id="lang_main.analysis.graphs.TokenGraph.directed"><code class="name">prop <span class="ident">directed</span> : Self</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def directed(self) -&gt; Self:
+    return self._directed</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.metadata_directed"><code class="name">prop <span class="ident">metadata_directed</span> : dict[str, float]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def metadata_directed(self) -&gt; dict[str, float]:
+    return self._metadata_directed</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.metadata_undirected"><code class="name">prop <span class="ident">metadata_undirected</span> : dict[str, float]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def metadata_undirected(self) -&gt; dict[str, float]:
+    return self._metadata_undirected</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.name"><code class="name">prop <span class="ident">name</span> : str</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def name(self) -&gt; str:
+    return self._name</code></pre>
+</details>
+<div class="desc"><p>String identifier of the graph.</p>
+<p>This graph attribute appears in the attribute dict G.graph
+keyed by the string <code>"name"</code>. as well as an attribute (technically
+a property) <code>G.name</code>. This is entirely user controlled.</p></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.undirected"><code class="name">prop <span class="ident">undirected</span> : Graph</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def undirected(self) -&gt; Graph:
+    if self._undirected is None:
+        self._undirected = self.to_undirected(inplace=False, logging=False)
+
+    return self._undirected</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.analysis.graphs.TokenGraph.copy"><code class="name flex">
+<span>def <span class="ident">copy</span></span>(<span>self) ‑> Self</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def copy(self) -&gt; Self:
+    &#34;&#34;&#34;returns a (deep) copy of the graph
+
+    Returns
+    -------
+    Self
+        deep copy of the graph
+    &#34;&#34;&#34;
+    return copy.deepcopy(self)</code></pre>
+</details>
+<div class="desc"><p>returns a (deep) copy of the graph</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Self</code></dt>
+<dd>deep copy of the graph</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.disable_logging"><code class="name flex">
+<span>def <span class="ident">disable_logging</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def disable_logging(self) -&gt; None:
+    self.logging = False</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.perform_static_analysis"><code class="name flex">
+<span>def <span class="ident">perform_static_analysis</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def perform_static_analysis(self) -&gt; None:
+    &#34;&#34;&#34;calculate different metrics directly on the data of the underlying graphs
+    (directed and undirected)
+
+    Current operations:
+        - adding weighted degree
+    &#34;&#34;&#34;
+    add_weighted_degree(self)
+    add_weighted_degree(self.undirected)</code></pre>
+</details>
+<div class="desc"><p>calculate different metrics directly on the data of the underlying graphs
+(directed and undirected)</p>
+<p>Current operations:
+- adding weighted degree</p></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.rescale_edge_weights"><code class="name flex">
+<span>def <span class="ident">rescale_edge_weights</span></span>(<span>self) ‑> tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>, networkx.classes.graph.Graph]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def rescale_edge_weights(
+    self,
+) -&gt; tuple[TokenGraph, Graph]:
+    &#34;&#34;&#34;generate new instances of the directed and undirected TokenGraph with
+    rescaled edge weights
+    Only this method ensures that undirected graphs are scaled properly. If
+    the underlying `to_undirected` method of the directed and rescaled
+    TokenGraph instance is called the weights are not rescaled again. Thus,
+    the maximum edge weight can exceed the theoretical maximum value of 1. To
+    ensure consistent behaviour across different applications of the conversion to
+    undirected graphs new instances are returned, especially for the undirected
+    graph.
+    In contrast, the new directed TokenGraph contains an undirected version without
+    rescaling of the weights. Therefore, this undirected version differs from the version
+    returned by this method.
+
+    Returns
+    -------
+    tuple[TokenGraph, Graph]
+        directed and undirected instances
+    &#34;&#34;&#34;
+    self.to_undirected(inplace=True, logging=False)
+    token_graph = rescale_edge_weights(self.directed)
+    token_graph.rescaled_weights = True
+    token_graph.update_metadata(logging=False)
+    undirected = rescale_edge_weights(self.undirected)
+
+    return token_graph, undirected</code></pre>
+</details>
+<div class="desc"><p>generate new instances of the directed and undirected TokenGraph with
+rescaled edge weights
+Only this method ensures that undirected graphs are scaled properly. If
+the underlying <code>to_undirected</code> method of the directed and rescaled
+TokenGraph instance is called the weights are not rescaled again. Thus,
+the maximum edge weight can exceed the theoretical maximum value of 1. To
+ensure consistent behaviour across different applications of the conversion to
+undirected graphs new instances are returned, especially for the undirected
+graph.
+In contrast, the new directed TokenGraph contains an undirected version without
+rescaling of the weights. Therefore, this undirected version differs from the version
+returned by this method.</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>, Graph]</code></dt>
+<dd>directed and undirected instances</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.to_GraphML"><code class="name flex">
+<span>def <span class="ident">to_GraphML</span></span>(<span>self, path: Path, filename: str | None = None, directed: bool = False) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def to_GraphML(
+    self,
+    path: Path,
+    filename: str | None = None,
+    directed: bool = False,
+) -&gt; None:
+    &#34;&#34;&#34;save one of the stored graphs to GraphML format on disk,
+
+    Parameters
+    ----------
+    path : Path
+        target path for saving the file
+    filename : str | None, optional
+        filename to be given, by default None
+    directed : bool, optional
+        indicator whether directed or undirected graph
+        should be exported, by default False (undirected)
+
+    Raises
+    ------
+    ValueError
+        undirected graph should be exported but is not available
+    &#34;&#34;&#34;
+    saving_path = self._save_prepare(path=path, filename=filename)
+
+    if directed:
+        target_graph = self.directed
+    else:
+        target_graph = self.undirected
+
+    save_to_GraphML(graph=target_graph, saving_path=saving_path)</code></pre>
+</details>
+<div class="desc"><p>save one of the stored graphs to GraphML format on disk,</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>path</code></strong> :&ensp;<code>Path</code></dt>
+<dd>target path for saving the file</dd>
+<dt><strong><code>filename</code></strong> :&ensp;<code>str | None</code>, optional</dt>
+<dd>filename to be given, by default None</dd>
+<dt><strong><code>directed</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>indicator whether directed or undirected graph
+should be exported, by default False (undirected)</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>ValueError</code></dt>
+<dd>undirected graph should be exported but is not available</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.to_pickle"><code class="name flex">
+<span>def <span class="ident">to_pickle</span></span>(<span>self, path: Path, filename: str | None = None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def to_pickle(
+    self,
+    path: Path,
+    filename: str | None = None,
+) -&gt; None:
+    &#34;&#34;&#34;save whole TokenGraph object as pickle file
+
+    Parameters
+    ----------
+    path : Path
+        target path for saving the file
+    filename : str | None, optional
+        filename to be given, by default None
+    &#34;&#34;&#34;
+    saving_path = self._save_prepare(path=path, filename=filename)
+    saving_path = saving_path.with_suffix(&#39;.pkl&#39;)
+    save_pickle(obj=self, path=saving_path)</code></pre>
+</details>
+<div class="desc"><p>save whole TokenGraph object as pickle file</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>path</code></strong> :&ensp;<code>Path</code></dt>
+<dd>target path for saving the file</dd>
+<dt><strong><code>filename</code></strong> :&ensp;<code>str | None</code>, optional</dt>
+<dd>filename to be given, by default None</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.to_undirected"><code class="name flex">
+<span>def <span class="ident">to_undirected</span></span>(<span>self, inplace: bool = True, logging: bool | None = None) ‑> networkx.classes.graph.Graph | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def to_undirected(
+    self,
+    inplace: bool = True,
+    logging: bool | None = None,
+) -&gt; Graph | None:
+    if logging is None:
+        logging = self.logging
+    # cast to integer edge weights only if edges were not rescaled previously
+    cast_int: bool = True
+    if self.rescaled_weights:
+        cast_int = False
+
+    self._undirected = convert_graph_to_undirected(
+        graph=self,
+        logging=logging,
+        cast_int=cast_int,
+    )
+    self._metadata_undirected = get_graph_metadata(graph=self._undirected, logging=False)
+    if not inplace:
+        return self._undirected</code></pre>
+</details>
+<div class="desc"><p>Returns an undirected representation of the digraph.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>reciprocal</code></strong> :&ensp;<code>bool (optional)</code></dt>
+<dd>&nbsp;</dd>
+<dt>If True only keep edges that appear in both directions</dt>
+<dt>in the original digraph.</dt>
+<dt><strong><code>as_view</code></strong> :&ensp;<code>bool (optional</code>, default=<code>False)</code></dt>
+<dd>&nbsp;</dd>
+</dl>
+<p>If True return an undirected view of the original directed graph.</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><strong><code>G</code></strong> :&ensp;<code>Graph</code></dt>
+<dd>An undirected graph with the same name and nodes and
+with edge (u, v, data) if either (u, v, data) or (v, u, data)
+is in the digraph.
+If both edges exist in digraph and
+their edge data is different, only one edge is created
+with an arbitrary choice of which edge data to use.
+You must check and correct for this manually if desired.</dd>
+</dl>
+<h2 id="see-also">See Also</h2>
+<p><code>Graph</code>, <code>copy</code>, <code>add_edge</code>, <code>add_edges_from</code></p>
+<h2 id="notes">Notes</h2>
+<p>If edges in both directions (u, v) and (v, u) exist in the
+graph, attributes for the new undirected edge will be a combination of
+the attributes of the directed edges.
+The edge data is updated
+in the (arbitrary) order that the edges are encountered.
+For
+more customized control of the edge attributes use add_edge().</p>
+<p>This returns a "deepcopy" of the edge, node, and
+graph attributes which attempts to completely copy
+all of the data and references.</p>
+<p>This is in contrast to the similar G=DiGraph(D) which returns a
+shallow copy of the data.</p>
+<p>See the Python copy module for more information on shallow
+and deep copies, <a href="https://docs.python.org/3/library/copy.html.">https://docs.python.org/3/library/copy.html.</a></p>
+<p>Warning: If you have subclassed DiGraph to use dict-like objects
+in the data structure, those changes do not transfer to the
+Graph created by this method.</p>
+<h2 id="examples">Examples</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; G = nx.path_graph(2)  # or MultiGraph, etc
+&gt;&gt;&gt; H = G.to_directed()
+&gt;&gt;&gt; list(H.edges)
+[(0, 1), (1, 0)]
+&gt;&gt;&gt; G2 = H.to_undirected()
+&gt;&gt;&gt; list(G2.edges)
+[(0, 1)]
+</code></pre></div>
+</dd>
+<dt id="lang_main.analysis.graphs.TokenGraph.update_metadata"><code class="name flex">
+<span>def <span class="ident">update_metadata</span></span>(<span>self, logging: bool | None = None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def update_metadata(
+    self,
+    logging: bool | None = None,
+) -&gt; None:
+    if logging is None:
+        logging = self.logging
+
+    self._metadata_directed = get_graph_metadata(graph=self, logging=logging)
+    if self._undirected is not None:
+        self._metadata_undirected = get_graph_metadata(
+            graph=self._undirected, logging=logging
+        )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+</dl>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.analysis.graphs.add_betweenness_centrality" href="#lang_main.analysis.graphs.add_betweenness_centrality">add_betweenness_centrality</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.add_importance_metric" href="#lang_main.analysis.graphs.add_importance_metric">add_importance_metric</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.add_weighted_degree" href="#lang_main.analysis.graphs.add_weighted_degree">add_weighted_degree</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.convert_graph_to_cytoscape" href="#lang_main.analysis.graphs.convert_graph_to_cytoscape">convert_graph_to_cytoscape</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.convert_graph_to_undirected" href="#lang_main.analysis.graphs.convert_graph_to_undirected">convert_graph_to_undirected</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.filter_graph_by_edge_weight" href="#lang_main.analysis.graphs.filter_graph_by_edge_weight">filter_graph_by_edge_weight</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.filter_graph_by_node_degree" href="#lang_main.analysis.graphs.filter_graph_by_node_degree">filter_graph_by_node_degree</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.filter_graph_by_number_edges" href="#lang_main.analysis.graphs.filter_graph_by_number_edges">filter_graph_by_number_edges</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.get_graph_metadata" href="#lang_main.analysis.graphs.get_graph_metadata">get_graph_metadata</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.normalise_array_linear" href="#lang_main.analysis.graphs.normalise_array_linear">normalise_array_linear</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.pipe_add_graph_metrics" href="#lang_main.analysis.graphs.pipe_add_graph_metrics">pipe_add_graph_metrics</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.pipe_rescale_graph_edge_weights" href="#lang_main.analysis.graphs.pipe_rescale_graph_edge_weights">pipe_rescale_graph_edge_weights</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.rescale_edge_weights" href="#lang_main.analysis.graphs.rescale_edge_weights">rescale_edge_weights</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.save_to_GraphML" href="#lang_main.analysis.graphs.save_to_GraphML">save_to_GraphML</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.static_graph_analysis" href="#lang_main.analysis.graphs.static_graph_analysis">static_graph_analysis</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.update_graph" href="#lang_main.analysis.graphs.update_graph">update_graph</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.verify_non_empty_graph" href="#lang_main.analysis.graphs.verify_non_empty_graph">verify_non_empty_graph</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.verify_property" href="#lang_main.analysis.graphs.verify_property">verify_property</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.weight_scaling" href="#lang_main.analysis.graphs.weight_scaling">weight_scaling</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="lang_main.analysis.graphs.TokenGraph" href="#lang_main.analysis.graphs.TokenGraph">TokenGraph</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.copy" href="#lang_main.analysis.graphs.TokenGraph.copy">copy</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.directed" href="#lang_main.analysis.graphs.TokenGraph.directed">directed</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.disable_logging" href="#lang_main.analysis.graphs.TokenGraph.disable_logging">disable_logging</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.from_file" href="#lang_main.analysis.graphs.TokenGraph.from_file">from_file</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.metadata_directed" href="#lang_main.analysis.graphs.TokenGraph.metadata_directed">metadata_directed</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.metadata_undirected" href="#lang_main.analysis.graphs.TokenGraph.metadata_undirected">metadata_undirected</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.name" href="#lang_main.analysis.graphs.TokenGraph.name">name</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.perform_static_analysis" href="#lang_main.analysis.graphs.TokenGraph.perform_static_analysis">perform_static_analysis</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.rescale_edge_weights" href="#lang_main.analysis.graphs.TokenGraph.rescale_edge_weights">rescale_edge_weights</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.to_GraphML" href="#lang_main.analysis.graphs.TokenGraph.to_GraphML">to_GraphML</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.to_pickle" href="#lang_main.analysis.graphs.TokenGraph.to_pickle">to_pickle</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.to_undirected" href="#lang_main.analysis.graphs.TokenGraph.to_undirected">to_undirected</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.undirected" href="#lang_main.analysis.graphs.TokenGraph.undirected">undirected</a></code></li>
+<li><code><a title="lang_main.analysis.graphs.TokenGraph.update_metadata" href="#lang_main.analysis.graphs.TokenGraph.update_metadata">update_metadata</a></code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/analysis/index.html b/docs/lang_main/analysis/index.html
new file mode 100644
index 0000000..c916f52
--- /dev/null
+++ b/docs/lang_main/analysis/index.html
@@ -0,0 +1,98 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.analysis API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.analysis</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+<h2 class="section-title" id="header-submodules">Sub-modules</h2>
+<dl>
+<dt><code class="name"><a title="lang_main.analysis.graphs" href="graphs.html">lang_main.analysis.graphs</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.analysis.preprocessing" href="preprocessing.html">lang_main.analysis.preprocessing</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.analysis.shared" href="shared.html">lang_main.analysis.shared</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.analysis.timeline" href="timeline.html">lang_main.analysis.timeline</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.analysis.tokens" href="tokens.html">lang_main.analysis.tokens</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="../index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-submodules">Sub-modules</a></h3>
+<ul>
+<li><code><a title="lang_main.analysis.graphs" href="graphs.html">lang_main.analysis.graphs</a></code></li>
+<li><code><a title="lang_main.analysis.preprocessing" href="preprocessing.html">lang_main.analysis.preprocessing</a></code></li>
+<li><code><a title="lang_main.analysis.shared" href="shared.html">lang_main.analysis.shared</a></code></li>
+<li><code><a title="lang_main.analysis.timeline" href="timeline.html">lang_main.analysis.timeline</a></code></li>
+<li><code><a title="lang_main.analysis.tokens" href="tokens.html">lang_main.analysis.tokens</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/analysis/preprocessing.html b/docs/lang_main/analysis/preprocessing.html
new file mode 100644
index 0000000..ebd5e22
--- /dev/null
+++ b/docs/lang_main/analysis/preprocessing.html
@@ -0,0 +1,451 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.analysis.preprocessing API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.analysis.preprocessing</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.analysis.preprocessing.analyse_feature"><code class="name flex">
+<span>def <span class="ident">analyse_feature</span></span>(<span>data: DataFrame, target_feature: str) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def analyse_feature(
+    data: DataFrame,
+    target_feature: str,
+) -&gt; tuple[DataFrame]:
+    # feature columns
+    feature_entries = data[target_feature]
+    logger.info(
+        &#39;Number of entries for feature &gt;&gt;%s&lt;&lt;: %d&#39;, target_feature, len(feature_entries)
+    )
+    # obtain unique entries
+    unique_feature_entries = feature_entries.unique()
+
+    # prepare result DataFrame
+    cols = [&#39;batched_idxs&#39;, &#39;entry&#39;, &#39;len&#39;, &#39;num_occur&#39;, &#39;assoc_obj_ids&#39;, &#39;num_assoc_obj_ids&#39;]
+    result_df = pd.DataFrame(columns=cols)
+
+    for entry in tqdm(unique_feature_entries, mininterval=1.0):
+        len_entry = len(entry)
+        filt = data[target_feature] == entry
+        temp = data[filt]
+        batched_idxs = temp.index.to_numpy()
+        assoc_obj_ids = temp[&#39;ObjektID&#39;].unique()
+        assoc_obj_ids = np.sort(assoc_obj_ids, kind=&#39;stable&#39;)
+        num_assoc_obj_ids = len(assoc_obj_ids)
+        num_dupl = filt.sum()
+
+        conc_df = pd.DataFrame(
+            data=[
+                [batched_idxs, entry, len_entry, num_dupl, assoc_obj_ids, num_assoc_obj_ids]
+            ],
+            columns=cols,
+        )
+
+        result_df = pd.concat([result_df, conc_df], ignore_index=True)
+
+    result_df = result_df.sort_values(
+        by=[&#39;num_occur&#39;, &#39;len&#39;], ascending=[False, False]
+    ).copy()
+
+    return (result_df,)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.preprocessing.load_raw_data"><code class="name flex">
+<span>def <span class="ident">load_raw_data</span></span>(<span>path: Path,<br>date_cols: Collection[str] = ('VorgangsDatum', 'ErledigungsDatum', 'Arbeitsbeginn', 'ErstellungsDatum')) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_raw_data(
+    path: Path,
+    date_cols: Collection[str] = (
+        &#39;VorgangsDatum&#39;,
+        &#39;ErledigungsDatum&#39;,
+        &#39;Arbeitsbeginn&#39;,
+        &#39;ErstellungsDatum&#39;,
+    ),
+) -&gt; tuple[DataFrame]:
+    &#34;&#34;&#34;load IHM dataset with standard structure
+
+    Parameters
+    ----------
+    path : str
+        path to dataset file, usually CSV file
+    date_cols : Collection[str], optional
+        columns which contain dates and are parsed as such,
+        by default (
+            &#39;VorgangsDatum&#39;,
+            &#39;ErledigungsDatum&#39;,
+            &#39;Arbeitsbeginn&#39;,
+            &#39;ErstellungsDatum&#39;,
+        )
+
+    Returns
+    -------
+    DataFrame
+        raw dataset as DataFrame
+    &#34;&#34;&#34;
+    # load dataset
+    date_cols = list(date_cols)
+    data = pd.read_csv(
+        filepath_or_buffer=path,
+        sep=&#39;;&#39;,
+        encoding=&#39;cp1252&#39;,
+        parse_dates=list(date_cols),
+        dayfirst=True,
+    )
+    logger.info(&#39;Loaded dataset successfully.&#39;)
+    logger.info(
+        (
+            f&#39;Dataset properties: number of entries: {len(data)}, &#39;
+            f&#39;number of features {len(data.columns)}&#39;
+        )
+    )
+    return (data,)</code></pre>
+</details>
+<div class="desc"><p>load IHM dataset with standard structure</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>path</code></strong> :&ensp;<code>str</code></dt>
+<dd>path to dataset file, usually CSV file</dd>
+<dt><strong><code>date_cols</code></strong> :&ensp;<code>Collection[str]</code>, optional</dt>
+<dd>columns which contain dates and are parsed as such,
+by default (
+'VorgangsDatum',
+'ErledigungsDatum',
+'Arbeitsbeginn',
+'ErstellungsDatum',
+)</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>DataFrame</code></dt>
+<dd>raw dataset as DataFrame</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.preprocessing.merge_similarity_duplicates"><code class="name flex">
+<span>def <span class="ident">merge_similarity_duplicates</span></span>(<span>data: DataFrame, model: SentenceTransformer, cos_sim_threshold: float) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def merge_similarity_duplicates(
+    data: DataFrame,
+    model: SentenceTransformer,
+    cos_sim_threshold: float,
+) -&gt; tuple[DataFrame]:
+    logger.info(&#39;Start merging of similarity candidates...&#39;)
+
+    # data
+    merged_data = data.copy()
+    model_input = merged_data[&#39;entry&#39;]
+    candidates_idx = candidates_by_index(
+        data_model_input=model_input,
+        model=model,
+        cos_sim_threshold=cos_sim_threshold,
+    )
+    # graph of similar ids
+    similar_id_graph, _ = similar_index_connection_graph(candidates_idx)
+
+    for similar_id_group in similar_index_groups(similar_id_graph):
+        similar_id_group = list(similar_id_group)
+        similar_data = merged_data.loc[similar_id_group, :]
+        # keep first entry with max number occurrences, then number of
+        # associated objects, then length of entry
+        similar_data = similar_data.sort_values(
+            by=[&#39;num_occur&#39;, &#39;num_assoc_obj_ids&#39;, &#39;len&#39;],
+            ascending=[False, False, False],
+        )
+        # merge information to first entry
+        data_idx = cast(PandasIndex, similar_data.index[0])
+        similar_data.at[data_idx, &#39;num_occur&#39;] = similar_data[&#39;num_occur&#39;].sum()
+        assoc_obj_ids = similar_data[&#39;assoc_obj_ids&#39;].to_numpy()
+        assoc_obj_ids = np.concatenate(assoc_obj_ids)
+        assoc_obj_ids = np.unique(assoc_obj_ids)
+        similar_data.at[data_idx, &#39;assoc_obj_ids&#39;] = assoc_obj_ids
+        similar_data.at[data_idx, &#39;num_assoc_obj_ids&#39;] = len(assoc_obj_ids)
+        # remaining indices, should be removed
+        similar_id_group.remove(data_idx)
+        merged_similar_data = similar_data.drop(index=similar_id_group)
+        # update entry in main dataset, drop remaining entries
+        merged_data.update(merged_similar_data)
+        merged_data = merged_data.drop(index=similar_id_group)
+
+    logger.info(&#39;Similarity candidates merged successfully.&#39;)
+
+    return (merged_data,)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.preprocessing.numeric_pre_filter_feature"><code class="name flex">
+<span>def <span class="ident">numeric_pre_filter_feature</span></span>(<span>data: DataFrame, feature: str, bound_lower: int | None, bound_upper: int | None) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def numeric_pre_filter_feature(
+    data: DataFrame,
+    feature: str,
+    bound_lower: int | None,
+    bound_upper: int | None,
+) -&gt; tuple[DataFrame]:
+    &#34;&#34;&#34;filter DataFrame for a given numerical feature regarding their bounds
+    bounds are inclusive: entries (bound_lower &lt;= entry &lt;= bound_upper) are retained
+
+    Parameters
+    ----------
+    data : DataFrame
+        DataFrame to filter
+    feature : str
+        feature name to filter
+    bound_lower : int | None
+        lower bound of values to retain
+    bound_upper : int | None
+        upper bound of values to retain
+
+    Returns
+    -------
+    tuple[DataFrame]
+        filtered DataFrame
+
+    Raises
+    ------
+    ValueError
+        if no bounds are provided, at least one bound must be set
+    &#34;&#34;&#34;
+    if not any([bound_lower, bound_upper]):
+        raise ValueError(&#39;No bounds for filtering provided&#39;)
+
+    data = data.copy()
+    if bound_lower is None:
+        bound_lower = cast(int, data[feature].min())
+    if bound_upper is None:
+        bound_upper = cast(int, data[feature].max())
+
+    filter_lower = data[feature] &gt;= bound_lower
+    filter_upper = data[feature] &lt;= bound_upper
+    filter = filter_lower &amp; filter_upper
+
+    data = data.loc[filter]
+
+    return (data,)</code></pre>
+</details>
+<div class="desc"><p>filter DataFrame for a given numerical feature regarding their bounds
+bounds are inclusive: entries (bound_lower &lt;= entry &lt;= bound_upper) are retained</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>data</code></strong> :&ensp;<code>DataFrame</code></dt>
+<dd>DataFrame to filter</dd>
+<dt><strong><code>feature</code></strong> :&ensp;<code>str</code></dt>
+<dd>feature name to filter</dd>
+<dt><strong><code>bound_lower</code></strong> :&ensp;<code>int | None</code></dt>
+<dd>lower bound of values to retain</dd>
+<dt><strong><code>bound_upper</code></strong> :&ensp;<code>int | None</code></dt>
+<dd>upper bound of values to retain</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>tuple[DataFrame]</code></dt>
+<dd>filtered DataFrame</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>ValueError</code></dt>
+<dd>if no bounds are provided, at least one bound must be set</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.preprocessing.remove_NA"><code class="name flex">
+<span>def <span class="ident">remove_NA</span></span>(<span>data: DataFrame, target_features: Collection[str] = ('VorgangsBeschreibung',)) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def remove_NA(
+    data: DataFrame,
+    target_features: Collection[str] = (&#39;VorgangsBeschreibung&#39;,),
+) -&gt; tuple[DataFrame]:
+    &#34;&#34;&#34;function to drop NA entries based on a subset of features to be analysed
+
+    Parameters
+    ----------
+    data : DataFrame
+        standard IHM dataset, perhaps pre-cleaned
+    target_features : Collection[str], optional
+        subset to analyse to define an NA entry, by default (&#39;VorgangsBeschreibung&#39;,)
+
+    Returns
+    -------
+    DataFrame
+        dataset with removed NA entries for given subset of features
+    &#34;&#34;&#34;
+    target_features = list(target_features)
+    wo_NA = data.dropna(axis=0, subset=target_features, ignore_index=True).copy()  # type: ignore
+    logger.info(
+        f&#39;Removed NA entries for features &gt;&gt;{target_features}&lt;&lt; from dataset successfully.&#39;
+    )
+
+    return (wo_NA,)</code></pre>
+</details>
+<div class="desc"><p>function to drop NA entries based on a subset of features to be analysed</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>data</code></strong> :&ensp;<code>DataFrame</code></dt>
+<dd>standard IHM dataset, perhaps pre-cleaned</dd>
+<dt><strong><code>target_features</code></strong> :&ensp;<code>Collection[str]</code>, optional</dt>
+<dd>subset to analyse to define an NA entry, by default ('VorgangsBeschreibung',)</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>DataFrame</code></dt>
+<dd>dataset with removed NA entries for given subset of features</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.preprocessing.remove_duplicates"><code class="name flex">
+<span>def <span class="ident">remove_duplicates</span></span>(<span>data: DataFrame) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def remove_duplicates(
+    data: DataFrame,
+) -&gt; tuple[DataFrame]:
+    &#34;&#34;&#34;removes duplicated entries over all features in the given dataset
+
+    Parameters
+    ----------
+    data : DataFrame
+        read data with standard structure
+
+    Returns
+    -------
+    DataFrame
+        dataset with removed duplicates over all features
+    &#34;&#34;&#34;
+    # obtain info about duplicates over all features
+    duplicates_filt = data.duplicated()
+    logger.info(f&#39;Number of duplicates over all features: {duplicates_filt.sum()}&#39;)
+    # drop duplicates
+    wo_duplicates = data.drop_duplicates(ignore_index=True)
+    duplicates_subset: list[str] = [
+        &#39;VorgangsID&#39;,
+        &#39;ObjektID&#39;,
+    ]
+    duplicates_subset_filt = wo_duplicates.duplicated(subset=duplicates_subset)
+    logger.info(
+        (
+            &#39;Number of duplicates over subset &#39;
+            f&#39;&gt;&gt;{duplicates_subset}&lt;&lt;: {duplicates_subset_filt.sum()}&#39;
+        )
+    )
+    wo_duplicates = wo_duplicates.drop_duplicates(
+        subset=duplicates_subset, ignore_index=True
+    ).copy()
+    logger.info(&#39;Removed all duplicates from dataset successfully.&#39;)
+    logger.info(
+        &#39;New Dataset properties: number of entries: %d, number of features %d&#39;,
+        len(wo_duplicates),
+        len(wo_duplicates.columns),
+    )
+
+    return (wo_duplicates,)</code></pre>
+</details>
+<div class="desc"><p>removes duplicated entries over all features in the given dataset</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>data</code></strong> :&ensp;<code>DataFrame</code></dt>
+<dd>read data with standard structure</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>DataFrame</code></dt>
+<dd>dataset with removed duplicates over all features</dd>
+</dl></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.analysis.preprocessing.analyse_feature" href="#lang_main.analysis.preprocessing.analyse_feature">analyse_feature</a></code></li>
+<li><code><a title="lang_main.analysis.preprocessing.load_raw_data" href="#lang_main.analysis.preprocessing.load_raw_data">load_raw_data</a></code></li>
+<li><code><a title="lang_main.analysis.preprocessing.merge_similarity_duplicates" href="#lang_main.analysis.preprocessing.merge_similarity_duplicates">merge_similarity_duplicates</a></code></li>
+<li><code><a title="lang_main.analysis.preprocessing.numeric_pre_filter_feature" href="#lang_main.analysis.preprocessing.numeric_pre_filter_feature">numeric_pre_filter_feature</a></code></li>
+<li><code><a title="lang_main.analysis.preprocessing.remove_NA" href="#lang_main.analysis.preprocessing.remove_NA">remove_NA</a></code></li>
+<li><code><a title="lang_main.analysis.preprocessing.remove_duplicates" href="#lang_main.analysis.preprocessing.remove_duplicates">remove_duplicates</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/analysis/shared.html b/docs/lang_main/analysis/shared.html
new file mode 100644
index 0000000..6a1f95e
--- /dev/null
+++ b/docs/lang_main/analysis/shared.html
@@ -0,0 +1,273 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.analysis.shared API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.analysis.shared</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.analysis.shared.candidates_by_index"><code class="name flex">
+<span>def <span class="ident">candidates_by_index</span></span>(<span>data_model_input: pandas.core.series.Series,<br>model: sentence_transformers.SentenceTransformer.SentenceTransformer,<br>cos_sim_threshold: float = 0.5) ‑> Iterator[tuple[int | numpy.int64, int | numpy.int64]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def candidates_by_index(
+    data_model_input: Series,
+    model: SentenceTransformer,
+    cos_sim_threshold: float = 0.5,
+) -&gt; Iterator[tuple[PandasIndex, PandasIndex]]:
+    &#34;&#34;&#34;function to filter candidate indices based on cosine similarity
+    using SentenceTransformer model in batch mode,
+    feed data as Series to retain information about indices of entries and
+    access them later in the original dataset
+
+    Parameters
+    ----------
+    obj_id : ObjectID
+        _description_
+    data_model_input : Series
+        containing indices and text entries to process
+    model : SentenceTransformer
+        necessary SentenceTransformer model to encode text entries
+    cos_sim_threshold : float, optional
+        threshold for cosine similarity to filter candidates, by default 0.5
+
+    Yields
+    ------
+    Iterator[tuple[PandasIndex, PandasIndex]]
+        tuple of index pairs which meet the cosine similarity threshold
+    &#34;&#34;&#34;
+    # embeddings
+    batch = cast(list[str], data_model_input.to_list())
+    embds = cast(
+        Tensor,
+        model.encode(
+            batch,
+            convert_to_numpy=False,
+            convert_to_tensor=True,
+            show_progress_bar=False,
+        ),
+    )
+    # cosine similarity
+    cos_sim = cast(npt.NDArray, model.similarity(embds, embds).numpy())
+    np.fill_diagonal(cos_sim, 0.0)
+    cos_sim = np.triu(cos_sim)
+    cos_sim_idx = np.argwhere(cos_sim &gt;= cos_sim_threshold)
+
+    for idx_array in cos_sim_idx:
+        idx_pair = cast(
+            tuple[np.int64, np.int64], tuple(data_model_input.index[idx] for idx in idx_array)
+        )
+        yield idx_pair</code></pre>
+</details>
+<div class="desc"><p>function to filter candidate indices based on cosine similarity
+using SentenceTransformer model in batch mode,
+feed data as Series to retain information about indices of entries and
+access them later in the original dataset</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>obj_id</code></strong> :&ensp;<code>ObjectID</code></dt>
+<dd><em>description</em></dd>
+<dt><strong><code>data_model_input</code></strong> :&ensp;<code>Series</code></dt>
+<dd>containing indices and text entries to process</dd>
+<dt><strong><code>model</code></strong> :&ensp;<code>SentenceTransformer</code></dt>
+<dd>necessary SentenceTransformer model to encode text entries</dd>
+<dt><strong><code>cos_sim_threshold</code></strong> :&ensp;<code>float</code>, optional</dt>
+<dd>threshold for cosine similarity to filter candidates, by default 0.5</dd>
+</dl>
+<h2 id="yields">Yields</h2>
+<dl>
+<dt><code>Iterator[tuple[PandasIndex, PandasIndex]]</code></dt>
+<dd>tuple of index pairs which meet the cosine similarity threshold</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.shared.clean_string_slim"><code class="name flex">
+<span>def <span class="ident">clean_string_slim</span></span>(<span>string: str) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def clean_string_slim(string: str) -&gt; str:
+    &#34;&#34;&#34;mapping function to clean single string entries in a series (feature-wise)
+    of the dataset, used to be applied element-wise for string features
+
+    Parameters
+    ----------
+    string : str
+        dataset entry feature
+
+    Returns
+    -------
+    str
+        cleaned entry
+    &#34;&#34;&#34;
+    # remove special chars
+    # string = pattern_escape_newline.sub(&#39; &#39;, string)
+    string = pattern_escape_seq.sub(&#39; &#39;, string)
+    string = pattern_repeated_chars.sub(&#39;&#39;, string)
+    # string = pattern_dates.sub(&#39;&#39;, string)
+    # dates are used for context, should not be removed at this stage
+    string = pattern_whitespace.sub(&#39; &#39;, string)
+    # remove whitespaces at the beginning and the end
+    string = string.strip()
+
+    return string</code></pre>
+</details>
+<div class="desc"><p>mapping function to clean single string entries in a series (feature-wise)
+of the dataset, used to be applied element-wise for string features</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>string</code></strong> :&ensp;<code>str</code></dt>
+<dd>dataset entry feature</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>str</code></dt>
+<dd>cleaned entry</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.shared.entry_wise_cleansing"><code class="name flex">
+<span>def <span class="ident">entry_wise_cleansing</span></span>(<span>data: pandas.core.frame.DataFrame,<br>target_features: Collection[str],<br>cleansing_func: Callable[[str], str] = &lt;function clean_string_slim&gt;) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def entry_wise_cleansing(
+    data: DataFrame,
+    target_features: Collection[str],
+    cleansing_func: Callable[[str], str] = clean_string_slim,
+) -&gt; tuple[DataFrame]:
+    # apply given cleansing function to target feature
+    target_features = list(target_features)
+    data[target_features] = data[target_features].map(cleansing_func)
+    logger.info(
+        (&#39;Successfully applied entry-wise cleansing procedure &gt;&gt;%s&lt;&lt; for features &gt;&gt;%s&lt;&lt;&#39;),
+        cleansing_func.__name__,
+        target_features,
+    )
+    return (data,)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.shared.similar_index_connection_graph"><code class="name flex">
+<span>def <span class="ident">similar_index_connection_graph</span></span>(<span>similar_idx_pairs: Iterable[tuple[int | numpy.int64, int | numpy.int64]]) ‑> tuple[networkx.classes.graph.Graph, dict[str, float]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def similar_index_connection_graph(
+    similar_idx_pairs: Iterable[tuple[PandasIndex, PandasIndex]],
+) -&gt; tuple[Graph, dict[str, float]]:
+    # build index graph to obtain graph of connected (similar) indices
+    # use this graph to get connected components (indices which belong together)
+    # retain semantic connection on whole dataset
+    similar_id_graph = nx.Graph()
+    # for idx1, idx2 in similar_idx_pairs:
+    #     # inplace operation, parent/child do not really exist in undirected graph
+    #     update_graph(graph=similar_id_graph, parent=idx1, child=idx2)
+    update_graph(graph=similar_id_graph, batch=similar_idx_pairs)
+
+    graph_info = get_graph_metadata(graph=similar_id_graph, logging=False)
+
+    return similar_id_graph, graph_info</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.shared.similar_index_groups"><code class="name flex">
+<span>def <span class="ident">similar_index_groups</span></span>(<span>similar_id_graph: networkx.classes.graph.Graph) ‑> Iterator[tuple[int | numpy.int64, ...]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def similar_index_groups(
+    similar_id_graph: Graph,
+) -&gt; Iterator[tuple[PandasIndex, ...]]:
+    # groups of connected indices
+    ids_groups = cast(Iterator[set[PandasIndex]], nx.connected_components(G=similar_id_graph))
+
+    for id_group in ids_groups:
+        yield tuple(id_group)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.analysis.shared.candidates_by_index" href="#lang_main.analysis.shared.candidates_by_index">candidates_by_index</a></code></li>
+<li><code><a title="lang_main.analysis.shared.clean_string_slim" href="#lang_main.analysis.shared.clean_string_slim">clean_string_slim</a></code></li>
+<li><code><a title="lang_main.analysis.shared.entry_wise_cleansing" href="#lang_main.analysis.shared.entry_wise_cleansing">entry_wise_cleansing</a></code></li>
+<li><code><a title="lang_main.analysis.shared.similar_index_connection_graph" href="#lang_main.analysis.shared.similar_index_connection_graph">similar_index_connection_graph</a></code></li>
+<li><code><a title="lang_main.analysis.shared.similar_index_groups" href="#lang_main.analysis.shared.similar_index_groups">similar_index_groups</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/analysis/timeline.html b/docs/lang_main/analysis/timeline.html
new file mode 100644
index 0000000..13e2c56
--- /dev/null
+++ b/docs/lang_main/analysis/timeline.html
@@ -0,0 +1,333 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.analysis.timeline API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.analysis.timeline</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.analysis.timeline.calc_delta_to_next_failure"><code class="name flex">
+<span>def <span class="ident">calc_delta_to_next_failure</span></span>(<span>data: pandas.core.frame.DataFrame,<br>date_feature: str = 'ErstellungsDatum',<br>name_delta_feature: str = 'Zeitspanne bis zum nächsten Ereignis [Tage]',<br>convert_to_days: bool = True) ‑> pandas.core.frame.DataFrame</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def calc_delta_to_next_failure(
+    data: DataFrameTLFiltered,
+    date_feature: str = &#39;ErstellungsDatum&#39;,
+    name_delta_feature: str = NAME_DELTA_FEAT_TO_NEXT_FAILURE,
+    convert_to_days: bool = True,
+) -&gt; DataFrameTLFiltered:
+    data = data.copy()
+    last_val = data[date_feature].iat[-1]
+    shifted = data[date_feature].shift(-1, fill_value=last_val)
+    data[name_delta_feature] = shifted - data[date_feature]
+    data = data.sort_values(by=name_delta_feature, ascending=False)
+
+    if convert_to_days:
+        data[name_delta_feature] = data[name_delta_feature].dt.days
+
+    return data</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.calc_delta_to_repair"><code class="name flex">
+<span>def <span class="ident">calc_delta_to_repair</span></span>(<span>data: pandas.core.frame.DataFrame,<br>date_feature_start: str = 'ErstellungsDatum',<br>date_feature_end: str = 'ErledigungsDatum',<br>name_delta_feature: str = 'Zeitspanne bis zur Behebung [Tage]',<br>convert_to_days: bool = True) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def calc_delta_to_repair(
+    data: DataFrame,
+    date_feature_start: str = &#39;ErstellungsDatum&#39;,
+    date_feature_end: str = &#39;ErledigungsDatum&#39;,
+    name_delta_feature: str = NAME_DELTA_FEAT_TO_REPAIR,
+    convert_to_days: bool = True,
+) -&gt; tuple[DataFrame]:
+    logger.info(&#39;Calculating time differences between start and end of operations...&#39;)
+    data = data.copy()
+    data[name_delta_feature] = data[date_feature_end] - data[date_feature_start]
+
+    if convert_to_days:
+        data[name_delta_feature] = data[name_delta_feature].dt.days
+
+    logger.info(&#39;Calculation successful.&#39;)
+
+    return (data,)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.cleanup_descriptions"><code class="name flex">
+<span>def <span class="ident">cleanup_descriptions</span></span>(<span>data: pandas.core.frame.DataFrame,<br>properties: Collection[str] = ('VorgangsBeschreibung', 'ErledigungsBeschreibung')) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def cleanup_descriptions(
+    data: DataFrame,
+    properties: Collection[str] = (
+        &#39;VorgangsBeschreibung&#39;,
+        &#39;ErledigungsBeschreibung&#39;,
+    ),
+) -&gt; tuple[DataFrame]:
+    logger.info(&#39;Cleaning necessary descriptions...&#39;)
+    data = data.copy()
+    features = list(properties)
+    data[features] = data[features].fillna(&#39;N.V.&#39;)
+    (data,) = entry_wise_cleansing(data, target_features=features)
+    logger.info(&#39;Cleansing successful.&#39;)
+
+    return (data.copy(),)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.filter_activities_per_obj_id"><code class="name flex">
+<span>def <span class="ident">filter_activities_per_obj_id</span></span>(<span>data: pandas.core.frame.DataFrame,<br>activity_feature: str = 'VorgangsTypName',<br>relevant_activity_types: Iterable[str] = ('Reparaturauftrag (Portal)',),<br>feature_obj_id: str = 'ObjektID',<br>threshold_num_activities: int = 1) ‑> tuple[pandas.core.frame.DataFrame, pandas.core.series.Series]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def filter_activities_per_obj_id(
+    data: DataFrame,
+    activity_feature: str = &#39;VorgangsTypName&#39;,
+    relevant_activity_types: Iterable[str] = (&#39;Reparaturauftrag (Portal)&#39;,),
+    feature_obj_id: str = &#39;ObjektID&#39;,
+    threshold_num_activities: int = 1,
+) -&gt; tuple[DataFrame, Series]:
+    data = data.copy()
+    # filter only relevant activities, count occurrences for each ObjectID
+    logger.info(&#39;Filtering activities per ObjectID...&#39;)
+    filt_rel_activities = data[activity_feature].isin(relevant_activity_types)
+    data_filter_activities = data.loc[filt_rel_activities].copy()
+    num_activities_per_obj_id = cast(
+        Series, data_filter_activities[feature_obj_id].value_counts(sort=True)
+    )
+    # filter for ObjectIDs with more than given number of activities
+    filt_below_thresh = num_activities_per_obj_id &lt;= threshold_num_activities
+    # index of series contains ObjectIDs
+    obj_ids_below_thresh = num_activities_per_obj_id[filt_below_thresh].index
+    filt_entries_below_thresh = data_filter_activities[feature_obj_id].isin(
+        obj_ids_below_thresh
+    )
+
+    num_activities_per_obj_id = num_activities_per_obj_id.loc[~filt_below_thresh]
+    data_filter_activities = data_filter_activities.loc[~filt_entries_below_thresh]
+    logger.info(&#39;Activities per ObjectID filtered successfully.&#39;)
+
+    return data_filter_activities, num_activities_per_obj_id</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.filter_timeline_cands"><code class="name flex">
+<span>def <span class="ident">filter_timeline_cands</span></span>(<span>data: pandas.core.frame.DataFrame,<br>cands: dict[int, tuple[tuple[int | numpy.int64, ...], ...]],<br>obj_id: int,<br>entry_idx: int,<br>sort_feature: str = 'ErstellungsDatum') ‑> pandas.core.frame.DataFrame</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def filter_timeline_cands(
+    data: DataFrame,
+    cands: TimelineCandidates,
+    obj_id: ObjectID,
+    entry_idx: int,
+    sort_feature: str = &#39;ErstellungsDatum&#39;,
+) -&gt; DataFrameTLFiltered:
+    data = data.copy()
+    cands_for_obj_id = cands[obj_id]
+    cands_choice = cands_for_obj_id[entry_idx]
+    data = data.loc[list(cands_choice)].sort_values(
+        by=sort_feature,
+        ascending=True,
+    )
+
+    return data</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.generate_model_input"><code class="name flex">
+<span>def <span class="ident">generate_model_input</span></span>(<span>data: pandas.core.frame.DataFrame,<br>target_feature_name: str = 'nlp_model_input',<br>model_input_features: Iterable[str] = ('VorgangsTypName', 'VorgangsArtText', 'VorgangsBeschreibung')) ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def generate_model_input(
+    data: DataFrame,
+    target_feature_name: str = &#39;nlp_model_input&#39;,
+    model_input_features: Iterable[str] = (
+        &#39;VorgangsTypName&#39;,
+        &#39;VorgangsArtText&#39;,
+        &#39;VorgangsBeschreibung&#39;,
+    ),
+) -&gt; tuple[DataFrame]:
+    logger.info(&#39;Generating concatenation of model input features...&#39;)
+    data = data.copy()
+    model_input_features = list(model_input_features)
+    input_features = data[model_input_features].fillna(&#39;&#39;).astype(str)
+    data[target_feature_name] = input_features.apply(
+        lambda x: &#39; - &#39;.join(x),
+        axis=1,
+    )
+    logger.info(&#39;Model input generated successfully.&#39;)
+
+    return (data,)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.get_timeline_candidates"><code class="name flex">
+<span>def <span class="ident">get_timeline_candidates</span></span>(<span>data: pandas.core.frame.DataFrame,<br>num_activities_per_obj_id: pandas.core.series.Series,<br>*,<br>model: sentence_transformers.SentenceTransformer.SentenceTransformer,<br>cos_sim_threshold: float,<br>feature_obj_id: str = 'ObjektID',<br>feature_obj_text: str = 'HObjektText',<br>model_input_feature: str = 'nlp_model_input') ‑> tuple[dict[int, tuple[tuple[int | numpy.int64, ...], ...]], dict[int, str]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_timeline_candidates(
+    data: DataFrame,
+    num_activities_per_obj_id: Series,
+    *,
+    model: SentenceTransformer,
+    cos_sim_threshold: float,
+    feature_obj_id: str = &#39;ObjektID&#39;,
+    feature_obj_text: str = &#39;HObjektText&#39;,
+    model_input_feature: str = &#39;nlp_model_input&#39;,
+) -&gt; tuple[TimelineCandidates, dict[ObjectID, str]]:
+    logger.info(&#39;Obtaining timeline candidates...&#39;)
+    candidates = _get_timeline_candidates_index(
+        data=data,
+        num_activities_per_obj_id=num_activities_per_obj_id,
+        model=model,
+        cos_sim_threshold=cos_sim_threshold,
+        feature_obj_id=feature_obj_id,
+        model_input_feature=model_input_feature,
+    )
+    tl_candidates = _transform_timeline_candidates(candidates)
+    logger.info(&#39;Timeline candidates obtained successfully.&#39;)
+    # text mapping to obtain object descriptors
+    logger.info(&#39;Mapping ObjectIDs to their respective text descriptor...&#39;)
+    map_obj_text = _map_obj_id_to_texts(
+        data=data,
+        feature_obj_id=feature_obj_id,
+        feature_obj_text=feature_obj_text,
+    )
+    logger.info(&#39;ObjectIDs successfully mapped to text descriptors.&#39;)
+
+    return tl_candidates, map_obj_text</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.timeline.remove_non_relevant_obj_ids"><code class="name flex">
+<span>def <span class="ident">remove_non_relevant_obj_ids</span></span>(<span>data: pandas.core.frame.DataFrame,<br>thresh_unique_feat_per_id: int,<br>*,<br>feature_uniqueness: str = 'HObjektText',<br>feature_obj_id: str = 'ObjektID') ‑> tuple[pandas.core.frame.DataFrame]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def remove_non_relevant_obj_ids(
+    data: DataFrame,
+    thresh_unique_feat_per_id: int,
+    *,
+    feature_uniqueness: str = &#39;HObjektText&#39;,
+    feature_obj_id: str = &#39;ObjektID&#39;,
+) -&gt; tuple[DataFrame]:
+    logger.info(&#39;Removing non-relevant ObjectIDs from dataset...&#39;)
+    data = data.copy()
+    ids_to_ignore = _non_relevant_obj_ids(
+        data=data,
+        thresh_unique_feat_per_id=thresh_unique_feat_per_id,
+        feature_uniqueness=feature_uniqueness,
+        feature_obj_id=feature_obj_id,
+    )
+    # only retain entries with ObjectIDs not in IDs to ignore
+    data = data.loc[~(data[feature_obj_id].isin(ids_to_ignore))]
+    logger.debug(&#39;Ignored ObjectIDs: %s&#39;, ids_to_ignore)
+    logger.info(&#39;Non-relevant ObjectIDs removed successfully.&#39;)
+
+    return (data,)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.analysis.timeline.calc_delta_to_next_failure" href="#lang_main.analysis.timeline.calc_delta_to_next_failure">calc_delta_to_next_failure</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.calc_delta_to_repair" href="#lang_main.analysis.timeline.calc_delta_to_repair">calc_delta_to_repair</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.cleanup_descriptions" href="#lang_main.analysis.timeline.cleanup_descriptions">cleanup_descriptions</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.filter_activities_per_obj_id" href="#lang_main.analysis.timeline.filter_activities_per_obj_id">filter_activities_per_obj_id</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.filter_timeline_cands" href="#lang_main.analysis.timeline.filter_timeline_cands">filter_timeline_cands</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.generate_model_input" href="#lang_main.analysis.timeline.generate_model_input">generate_model_input</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.get_timeline_candidates" href="#lang_main.analysis.timeline.get_timeline_candidates">get_timeline_candidates</a></code></li>
+<li><code><a title="lang_main.analysis.timeline.remove_non_relevant_obj_ids" href="#lang_main.analysis.timeline.remove_non_relevant_obj_ids">remove_non_relevant_obj_ids</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/analysis/tokens.html b/docs/lang_main/analysis/tokens.html
new file mode 100644
index 0000000..4746df0
--- /dev/null
+++ b/docs/lang_main/analysis/tokens.html
@@ -0,0 +1,320 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.analysis.tokens API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.analysis.tokens</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.analysis.tokens.add_doc_info_to_graph"><code class="name flex">
+<span>def <span class="ident">add_doc_info_to_graph</span></span>(<span>graph: <a title="lang_main.analysis.graphs.TokenGraph" href="graphs.html#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>,<br>doc: spacy.tokens.doc.Doc,<br>weight: int | None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def add_doc_info_to_graph(
+    graph: TokenGraph,
+    doc: SpacyDoc,
+    weight: int | None,
+) -&gt; None:
+    # iterate over sentences
+    for sent in doc.sents:
+        # iterate over tokens in sentence
+        for token in sent:
+            # skip tokens which are not relevant
+            if not (token.pos_ in POS_OF_INTEREST or token.tag_ in TAG_OF_INTEREST):
+                continue
+            # skip token which are dates or times
+            if token.pos_ == &#39;NUM&#39; and is_str_date(string=token.text):
+                continue
+
+            relevant_descendants = obtain_relevant_descendants(token=token)
+            # for non-AUX: add parent &lt;--&gt; descendant pair to graph
+            if token.pos_ not in POS_INDIRECT:
+                for descendant in relevant_descendants:
+                    # add descendant and parent to graph
+                    update_graph(
+                        graph=graph,
+                        parent=token.lemma_,
+                        child=descendant.lemma_,
+                        weight_connection=weight,
+                    )
+            else:
+                # if indirect POS, make connection between all associated words
+                combs = combinations(relevant_descendants, r=2)
+                for comb in combs:
+                    # !! parents and children do not really exist in this case,
+                    # !! but only one connection is made
+                    update_graph(
+                        graph=graph,
+                        parent=comb[0].lemma_,
+                        child=comb[1].lemma_,
+                        weight_connection=weight,
+                    )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.tokens.build_token_graph"><code class="name flex">
+<span>def <span class="ident">build_token_graph</span></span>(<span>data: pandas.core.frame.DataFrame,<br>model: spacy.language.Language,<br>*,<br>target_feature: str = 'entry',<br>weights_feature: str | None = None,<br>batch_idx_feature: str | None = 'batched_idxs',<br>build_map: bool = True,<br>batch_size_model: int = 50,<br>logging_graph: bool = True) ‑> tuple[<a title="lang_main.analysis.graphs.TokenGraph" href="graphs.html#lang_main.analysis.graphs.TokenGraph">TokenGraph</a>, dict[int | numpy.int64, spacy.tokens.doc.Doc] | None]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_token_graph(
+    data: DataFrame,
+    model: SpacyModel,
+    *,
+    target_feature: str = &#39;entry&#39;,
+    weights_feature: str | None = None,
+    batch_idx_feature: str | None = &#39;batched_idxs&#39;,
+    build_map: bool = True,
+    batch_size_model: int = 50,
+    logging_graph: bool = True,
+) -&gt; tuple[TokenGraph, dict[PandasIndex, SpacyDoc] | None]:
+    graph = TokenGraph(enable_logging=logging_graph)
+    model_input = cast(tuple[str], tuple(data[target_feature].to_list()))
+    if weights_feature is not None:
+        weights = cast(tuple[int], tuple(data[weights_feature].to_list()))
+    else:
+        weights = None
+
+    docs_mapping: dict[PandasIndex, SpacyDoc] | None
+    if build_map and batch_idx_feature is None:
+        raise ValueError(&#39;Can not build mapping if batched indices are unknown.&#39;)
+    elif build_map:
+        indices = cast(tuple[list[PandasIndex]], tuple(data[batch_idx_feature].to_list()))
+        docs_mapping = {}
+    else:
+        indices = None
+        docs_mapping = None
+
+    index: int = 0
+
+    for doc in tqdm(
+        model.pipe(model_input, batch_size=batch_size_model), total=len(model_input)
+    ):
+        weight: int | None = None
+        if weights is not None:
+            weight = weights[index]
+
+        add_doc_info_to_graph(
+            graph=graph,
+            doc=doc,
+            weight=weight,
+        )
+        # build map if option chosen
+        if indices is not None and docs_mapping is not None:
+            corresponding_indices = indices[index]
+            for idx in corresponding_indices:
+                docs_mapping[idx] = doc
+
+        index += 1
+
+    # metadata
+    graph.update_metadata()
+    # convert to undirected
+    graph.to_undirected(logging=False)
+    graph.perform_static_analysis()
+
+    return graph, docs_mapping</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.tokens.is_str_date"><code class="name flex">
+<span>def <span class="ident">is_str_date</span></span>(<span>string: str, fuzzy: bool = False) ‑> bool</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def is_str_date(
+    string: str,
+    fuzzy: bool = False,
+) -&gt; bool:
+    &#34;&#34;&#34;not stable function to test strings for dates, not 100 percent reliable
+
+    Parameters
+    ----------
+    string : str
+        string to check for dates
+    fuzzy : bool, optional
+        whether to use dateutils.parser.pase fuzzy capability, by default False
+
+    Returns
+    -------
+    bool
+        indicates whether date was found or not
+    &#34;&#34;&#34;
+    try:
+        # check if string is a number
+        # if length is greater than 8, it is not a date
+        int(string)
+        if len(string) not in {2, 4}:
+            return False
+    except ValueError:
+        # not a number
+        pass
+
+    try:
+        parse(string, fuzzy=fuzzy, dayfirst=True, yearfirst=False)
+        return True
+    except ValueError:
+        date_found: bool = False
+        match = pattern_dates.search(string)
+        if match is None:
+            return date_found
+        date_found = any(match.groups())
+        return date_found</code></pre>
+</details>
+<div class="desc"><p>not stable function to test strings for dates, not 100 percent reliable</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>string</code></strong> :&ensp;<code>str</code></dt>
+<dd>string to check for dates</dd>
+<dt><strong><code>fuzzy</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>whether to use dateutils.parser.pase fuzzy capability, by default False</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>bool</code></dt>
+<dd>indicates whether date was found or not</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.analysis.tokens.obtain_relevant_descendants"><code class="name flex">
+<span>def <span class="ident">obtain_relevant_descendants</span></span>(<span>token: spacy.tokens.token.Token) ‑> Iterator[spacy.tokens.token.Token]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def obtain_relevant_descendants(
+    token: SpacyToken,
+) -&gt; Iterator[SpacyToken]:
+    for descendant in token.subtree:
+        # subtrees contain the token itself
+        # if current element is token skip this element
+        if descendant == token:
+            continue
+
+        # if descendant is a date skip it)
+        if is_str_date(string=descendant.text):
+            continue
+
+        logger.debug(
+            &#39;Token &gt;&gt;%s&lt;&lt;, POS &gt;&gt;%s&lt;&lt; | descendant &gt;&gt;%s&lt;&lt;, POS &gt;&gt;%s&lt;&lt;&#39;,
+            token,
+            token.pos_,
+            descendant,
+            descendant.pos_,
+        )
+
+        # eliminate cases of cross-references with verbs
+        if (token.pos_ == &#39;AUX&#39; or token.pos_ == &#39;VERB&#39;) and (
+            descendant.pos_ == &#39;AUX&#39; or descendant.pos_ == &#39;VERB&#39;
+        ):
+            continue
+        # skip cases in which descendant is indirect POS with others than verbs
+        elif descendant.pos_ in POS_INDIRECT:
+            continue
+        # skip cases in which child has no relevant POS or TAG
+        elif not (descendant.pos_ in POS_OF_INTEREST or descendant.tag_ in TAG_OF_INTEREST):
+            continue
+
+        yield descendant
+
+        # TODO look at results and fine-tune function accordingly</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.analysis.tokens.pre_clean_word"><code class="name flex">
+<span>def <span class="ident">pre_clean_word</span></span>(<span>string: str) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def pre_clean_word(string: str) -&gt; str:
+    pattern = r&#39;[^A-Za-zäöüÄÖÜ]+&#39;
+    string = re.sub(pattern, &#39;&#39;, string)
+
+    return string</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.analysis.tokens.add_doc_info_to_graph" href="#lang_main.analysis.tokens.add_doc_info_to_graph">add_doc_info_to_graph</a></code></li>
+<li><code><a title="lang_main.analysis.tokens.build_token_graph" href="#lang_main.analysis.tokens.build_token_graph">build_token_graph</a></code></li>
+<li><code><a title="lang_main.analysis.tokens.is_str_date" href="#lang_main.analysis.tokens.is_str_date">is_str_date</a></code></li>
+<li><code><a title="lang_main.analysis.tokens.obtain_relevant_descendants" href="#lang_main.analysis.tokens.obtain_relevant_descendants">obtain_relevant_descendants</a></code></li>
+<li><code><a title="lang_main.analysis.tokens.pre_clean_word" href="#lang_main.analysis.tokens.pre_clean_word">pre_clean_word</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/config.html b/docs/lang_main/config.html
new file mode 100644
index 0000000..230f43f
--- /dev/null
+++ b/docs/lang_main/config.html
@@ -0,0 +1,206 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.config API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.config</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.config.get_config_paths"><code class="name flex">
+<span>def <span class="ident">get_config_paths</span></span>(<span>root_folder: Path, cfg_name: str, cyto_stylesheet_name: str) ‑> tuple[pathlib.Path, pathlib.Path]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_config_paths(
+    root_folder: Path,
+    cfg_name: str,
+    cyto_stylesheet_name: str,
+) -&gt; tuple[Path, Path]:
+    cfg_path_internal = (root_folder / cfg_name).resolve()
+    cyto_stylesheet_path = (root_folder / cyto_stylesheet_name).resolve()
+
+    return cfg_path_internal, cyto_stylesheet_path</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.config.load_cfg"><code class="name flex">
+<span>def <span class="ident">load_cfg</span></span>(<span>starting_path: Path,<br>glob_pattern: str,<br>stop_folder_name: str | None,<br>lookup_cwd: bool = False) ‑> dict[str, typing.Any]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_cfg(
+    starting_path: Path,
+    glob_pattern: str,
+    stop_folder_name: str | None,
+    lookup_cwd: bool = False,
+) -&gt; dict[str, Any]:
+    &#34;&#34;&#34;Look for configuration file. Internal configs are not used any more because
+    the library behaviour is only guaranteed by external configurations.
+
+    Parameters
+    ----------
+    starting_path : Path
+        path to start for the lookup
+    glob_pattern : str
+        pattern of the config file naming scheme
+    stop_folder_name : str | None
+        folder name at which the lookup should stop, the parent folder
+        is also searched, e.g.
+        if starting_path is path/to/start/folder and stop_folder_name is &#39;to&#39;,
+        then path/ is also searched
+
+    Returns
+    -------
+    dict[str, Any]
+        loaded config file
+
+    Raises
+    ------
+    LangMainConfigNotFoundError
+        if no config file was found
+    &#34;&#34;&#34;
+    cfg_path: Path | None = None
+    if lookup_cwd:
+        print(&#39;Looking for cfg file in CWD.&#39;, flush=True)
+        cfg_path = search_cwd(glob_pattern)
+
+    if cfg_path is None:
+        print(
+            (
+                f&#39;Looking iteratively for config file. Start: {starting_path}, &#39;
+                f&#39;stop folder: {stop_folder_name}&#39;
+            ),
+            flush=True,
+        )
+        cfg_path = search_iterative(
+            starting_path=starting_path,
+            glob_pattern=glob_pattern,
+            stop_folder_name=stop_folder_name,
+        )
+
+    if cfg_path is None:
+        raise LangMainConfigNotFoundError(&#39;Config file was not found.&#39;)
+
+    config = load_toml_config(path_to_toml=cfg_path)
+    print(f&#39;Loaded config from: &gt;&gt;{cfg_path}&lt;&lt;&#39;)
+
+    return config.copy()</code></pre>
+</details>
+<div class="desc"><p>Look for configuration file. Internal configs are not used any more because
+the library behaviour is only guaranteed by external configurations.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>starting_path</code></strong> :&ensp;<code>Path</code></dt>
+<dd>path to start for the lookup</dd>
+<dt><strong><code>glob_pattern</code></strong> :&ensp;<code>str</code></dt>
+<dd>pattern of the config file naming scheme</dd>
+<dt><strong><code>stop_folder_name</code></strong> :&ensp;<code>str | None</code></dt>
+<dd>folder name at which the lookup should stop, the parent folder
+is also searched, e.g.
+if starting_path is path/to/start/folder and stop_folder_name is 'to',
+then path/ is also searched</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>dict[str, Any]</code></dt>
+<dd>loaded config file</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>LangMainConfigNotFoundError</code></dt>
+<dd>if no config file was found</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.config.load_toml_config"><code class="name flex">
+<span>def <span class="ident">load_toml_config</span></span>(<span>path_to_toml: str | Path) ‑> dict[str, typing.Any]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_toml_config(
+    path_to_toml: str | Path,
+) -&gt; dict[str, Any]:
+    with open(path_to_toml, &#39;rb&#39;) as f:
+        data = tomllib.load(f)
+    print(&#39;Loaded TOML config file successfully.&#39;, flush=True)
+
+    return data</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.config.get_config_paths" href="#lang_main.config.get_config_paths">get_config_paths</a></code></li>
+<li><code><a title="lang_main.config.load_cfg" href="#lang_main.config.load_cfg">load_cfg</a></code></li>
+<li><code><a title="lang_main.config.load_toml_config" href="#lang_main.config.load_toml_config">load_toml_config</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/constants.html b/docs/lang_main/constants.html
new file mode 100644
index 0000000..79ede97
--- /dev/null
+++ b/docs/lang_main/constants.html
@@ -0,0 +1,66 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.constants API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.constants</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/errors.html b/docs/lang_main/errors.html
new file mode 100644
index 0000000..a713ec5
--- /dev/null
+++ b/docs/lang_main/errors.html
@@ -0,0 +1,330 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.errors API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.errors</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="lang_main.errors.DependencyMissingError"><code class="flex name class">
+<span>class <span class="ident">DependencyMissingError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class DependencyMissingError(Exception):
+    &#34;&#34;&#34;Error raised if needed dependency could not be found&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if needed dependency could not be found</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.EdgePropertyNotContainedError"><code class="flex name class">
+<span>class <span class="ident">EdgePropertyNotContainedError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class EdgePropertyNotContainedError(Exception):
+    &#34;&#34;&#34;Error raised if a needed edge property is not contained in graph edges&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if a needed edge property is not contained in graph edges</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.EmptyEdgesError"><code class="flex name class">
+<span>class <span class="ident">EmptyEdgesError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class EmptyEdgesError(EmptyGraphError):
+    &#34;&#34;&#34;Error raised if action should be performed on a graph&#39;s edges, but
+    it does not contain any&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if action should be performed on a graph's edges, but
+it does not contain any</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li><a title="lang_main.errors.EmptyGraphError" href="#lang_main.errors.EmptyGraphError">EmptyGraphError</a></li>
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.EmptyGraphError"><code class="flex name class">
+<span>class <span class="ident">EmptyGraphError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class EmptyGraphError(Exception):
+    &#34;&#34;&#34;Error raised if an operation should be performed on the graph,
+    but it does not contain any nodes or edges&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if an operation should be performed on the graph,
+but it does not contain any nodes or edges</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+<h3>Subclasses</h3>
+<ul class="hlist">
+<li><a title="lang_main.errors.EmptyEdgesError" href="#lang_main.errors.EmptyEdgesError">EmptyEdgesError</a></li>
+</ul>
+</dd>
+<dt id="lang_main.errors.GraphRenderError"><code class="flex name class">
+<span>class <span class="ident">GraphRenderError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class GraphRenderError(Exception):
+    &#34;&#34;&#34;Error raised if a graph object can not be rendered&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if a graph object can not be rendered</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.LangMainConfigNotFoundError"><code class="flex name class">
+<span>class <span class="ident">LangMainConfigNotFoundError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class LangMainConfigNotFoundError(Exception):
+    &#34;&#34;&#34;Error raised if a config file could not be found successfully&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if a config file could not be found successfully</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.LanguageModelNotFoundError"><code class="flex name class">
+<span>class <span class="ident">LanguageModelNotFoundError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class LanguageModelNotFoundError(Exception):
+    &#34;&#34;&#34;Error raised if a given language model could not be loaded successfully&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if a given language model could not be loaded successfully</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.NoPerformableActionError"><code class="flex name class">
+<span>class <span class="ident">NoPerformableActionError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class NoPerformableActionError(Exception):
+    &#34;&#34;&#34;Error describing that no action is available in the current pipeline&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error describing that no action is available in the current pipeline</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.NodePropertyNotContainedError"><code class="flex name class">
+<span>class <span class="ident">NodePropertyNotContainedError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class NodePropertyNotContainedError(Exception):
+    &#34;&#34;&#34;Error raised if a needed node property is not contained in graph edges&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if a needed node property is not contained in graph edges</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.OutputInPipelineContainerError"><code class="flex name class">
+<span>class <span class="ident">OutputInPipelineContainerError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class OutputInPipelineContainerError(Exception):
+    &#34;&#34;&#34;Error raised if an output was detected by one of the performed
+    actions in a PipelineContainer. Each action in a PipelineContainer is itself a
+    procedure which does not have any parameters or return values and should therefore not
+    return any values.&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if an output was detected by one of the performed
+actions in a PipelineContainer. Each action in a PipelineContainer is itself a
+procedure which does not have any parameters or return values and should therefore not
+return any values.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+<dt id="lang_main.errors.WrongActionTypeError"><code class="flex name class">
+<span>class <span class="ident">WrongActionTypeError</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class WrongActionTypeError(Exception):
+    &#34;&#34;&#34;Error raised if added action type is not supported by corresponding pipeline&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Error raised if added action type is not supported by corresponding pipeline</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+</dl>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="lang_main.errors.DependencyMissingError" href="#lang_main.errors.DependencyMissingError">DependencyMissingError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.EdgePropertyNotContainedError" href="#lang_main.errors.EdgePropertyNotContainedError">EdgePropertyNotContainedError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.EmptyEdgesError" href="#lang_main.errors.EmptyEdgesError">EmptyEdgesError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.EmptyGraphError" href="#lang_main.errors.EmptyGraphError">EmptyGraphError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.GraphRenderError" href="#lang_main.errors.GraphRenderError">GraphRenderError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.LangMainConfigNotFoundError" href="#lang_main.errors.LangMainConfigNotFoundError">LangMainConfigNotFoundError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.LanguageModelNotFoundError" href="#lang_main.errors.LanguageModelNotFoundError">LanguageModelNotFoundError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.NoPerformableActionError" href="#lang_main.errors.NoPerformableActionError">NoPerformableActionError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.NodePropertyNotContainedError" href="#lang_main.errors.NodePropertyNotContainedError">NodePropertyNotContainedError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.OutputInPipelineContainerError" href="#lang_main.errors.OutputInPipelineContainerError">OutputInPipelineContainerError</a></code></h4>
+</li>
+<li>
+<h4><code><a title="lang_main.errors.WrongActionTypeError" href="#lang_main.errors.WrongActionTypeError">WrongActionTypeError</a></code></h4>
+</li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/index.html b/docs/lang_main/index.html
new file mode 100644
index 0000000..1688465
--- /dev/null
+++ b/docs/lang_main/index.html
@@ -0,0 +1,123 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Package <code>lang_main</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+<h2 class="section-title" id="header-submodules">Sub-modules</h2>
+<dl>
+<dt><code class="name"><a title="lang_main.analysis" href="analysis/index.html">lang_main.analysis</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.config" href="config.html">lang_main.config</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.constants" href="constants.html">lang_main.constants</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.errors" href="errors.html">lang_main.errors</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.io" href="io.html">lang_main.io</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.loggers" href="loggers.html">lang_main.loggers</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.model_loader" href="model_loader.html">lang_main.model_loader</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.pipelines" href="pipelines/index.html">lang_main.pipelines</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.render" href="render/index.html">lang_main.render</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.search" href="search.html">lang_main.search</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.types" href="types.html">lang_main.types</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3><a href="#header-submodules">Sub-modules</a></h3>
+<ul>
+<li><code><a title="lang_main.analysis" href="analysis/index.html">lang_main.analysis</a></code></li>
+<li><code><a title="lang_main.config" href="config.html">lang_main.config</a></code></li>
+<li><code><a title="lang_main.constants" href="constants.html">lang_main.constants</a></code></li>
+<li><code><a title="lang_main.errors" href="errors.html">lang_main.errors</a></code></li>
+<li><code><a title="lang_main.io" href="io.html">lang_main.io</a></code></li>
+<li><code><a title="lang_main.loggers" href="loggers.html">lang_main.loggers</a></code></li>
+<li><code><a title="lang_main.model_loader" href="model_loader.html">lang_main.model_loader</a></code></li>
+<li><code><a title="lang_main.pipelines" href="pipelines/index.html">lang_main.pipelines</a></code></li>
+<li><code><a title="lang_main.render" href="render/index.html">lang_main.render</a></code></li>
+<li><code><a title="lang_main.search" href="search.html">lang_main.search</a></code></li>
+<li><code><a title="lang_main.types" href="types.html">lang_main.types</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/io.html b/docs/lang_main/io.html
new file mode 100644
index 0000000..2a5c223
--- /dev/null
+++ b/docs/lang_main/io.html
@@ -0,0 +1,227 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.io API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.io</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.io.create_saving_folder"><code class="name flex">
+<span>def <span class="ident">create_saving_folder</span></span>(<span>saving_path_folder: str | pathlib.Path, overwrite_existing: bool = False) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_saving_folder(
+    saving_path_folder: str | Path,
+    overwrite_existing: bool = False,
+) -&gt; None:
+    # check for existence of given path
+    if isinstance(saving_path_folder, str):
+        saving_path_folder = Path(saving_path_folder)
+    if not saving_path_folder.exists():
+        saving_path_folder.mkdir(parents=True)
+    else:
+        if overwrite_existing:
+            # overwrite if desired (deletes whole path and re-creates it)
+            shutil.rmtree(saving_path_folder)
+            saving_path_folder.mkdir(parents=True)
+        else:
+            logger.info(
+                (
+                    &#39;Path &gt;&gt;%s&lt;&lt; already exists and remained unchanged. If you want to &#39;
+                    &#39;overwrite this path, use parameter &gt;&gt;overwrite_existing&lt;&lt;.&#39;,
+                ),
+                saving_path_folder,
+            )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.io.decode_from_base64_str"><code class="name flex">
+<span>def <span class="ident">decode_from_base64_str</span></span>(<span>b64_str: str, encoding: str = 'utf-8') ‑> Any</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def decode_from_base64_str(
+    b64_str: str,
+    encoding: str = &#39;utf-8&#39;,
+) -&gt; Any:
+    b64_bytes = b64_str.encode(encoding=encoding)
+    decoded = base64.b64decode(b64_bytes)
+    return pickle.loads(decoded)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.io.encode_file_to_base64_str"><code class="name flex">
+<span>def <span class="ident">encode_file_to_base64_str</span></span>(<span>path: pathlib.Path, encoding: str = 'utf-8') ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def encode_file_to_base64_str(
+    path: Path,
+    encoding: str = &#39;utf-8&#39;,
+) -&gt; str:
+    with open(path, &#39;rb&#39;) as file:
+        b64_bytes = base64.b64encode(file.read())
+    return b64_bytes.decode(encoding=encoding)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.io.encode_to_base64_str"><code class="name flex">
+<span>def <span class="ident">encode_to_base64_str</span></span>(<span>obj: Any, encoding: str = 'utf-8') ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def encode_to_base64_str(
+    obj: Any,
+    encoding: str = &#39;utf-8&#39;,
+) -&gt; str:
+    serialised = pickle.dumps(obj, protocol=PICKLE_PROTOCOL_VERSION)
+    b64_bytes = base64.b64encode(serialised)
+    return b64_bytes.decode(encoding=encoding)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.io.get_entry_point"><code class="name flex">
+<span>def <span class="ident">get_entry_point</span></span>(<span>saving_path: pathlib.Path,<br>filename: str,<br>file_ext: str = '.pkl',<br>check_existence: bool = True) ‑> pathlib.Path</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_entry_point(
+    saving_path: Path,
+    filename: str,
+    file_ext: str = &#39;.pkl&#39;,
+    check_existence: bool = True,
+) -&gt; Path:
+    entry_point_path = (saving_path / filename).with_suffix(file_ext)
+    if check_existence and not entry_point_path.exists():
+        raise FileNotFoundError(
+            f&#39;Could not find provided entry data under path: &gt;&gt;{entry_point_path}&lt;&lt;&#39;
+        )
+
+    return entry_point_path</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.io.load_pickle"><code class="name flex">
+<span>def <span class="ident">load_pickle</span></span>(<span>path: str | pathlib.Path) ‑> Any</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_pickle(
+    path: str | Path,
+) -&gt; Any:
+    with open(path, &#39;rb&#39;) as file:
+        obj = pickle.load(file)
+    logger.info(&#39;Loaded file successfully.&#39;)
+    return obj</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.io.save_pickle"><code class="name flex">
+<span>def <span class="ident">save_pickle</span></span>(<span>obj: Any, path: str | pathlib.Path) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def save_pickle(
+    obj: Any,
+    path: str | Path,
+) -&gt; None:
+    with open(path, &#39;wb&#39;) as file:
+        pickle.dump(obj, file, protocol=PICKLE_PROTOCOL_VERSION)
+    logger.info(&#39;Saved file successfully under %s&#39;, path)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.io.create_saving_folder" href="#lang_main.io.create_saving_folder">create_saving_folder</a></code></li>
+<li><code><a title="lang_main.io.decode_from_base64_str" href="#lang_main.io.decode_from_base64_str">decode_from_base64_str</a></code></li>
+<li><code><a title="lang_main.io.encode_file_to_base64_str" href="#lang_main.io.encode_file_to_base64_str">encode_file_to_base64_str</a></code></li>
+<li><code><a title="lang_main.io.encode_to_base64_str" href="#lang_main.io.encode_to_base64_str">encode_to_base64_str</a></code></li>
+<li><code><a title="lang_main.io.get_entry_point" href="#lang_main.io.get_entry_point">get_entry_point</a></code></li>
+<li><code><a title="lang_main.io.load_pickle" href="#lang_main.io.load_pickle">load_pickle</a></code></li>
+<li><code><a title="lang_main.io.save_pickle" href="#lang_main.io.save_pickle">save_pickle</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/loggers.html b/docs/lang_main/loggers.html
new file mode 100644
index 0000000..0152ee3
--- /dev/null
+++ b/docs/lang_main/loggers.html
@@ -0,0 +1,66 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.loggers API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.loggers</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/model_loader.html b/docs/lang_main/model_loader.html
new file mode 100644
index 0000000..c354e5e
--- /dev/null
+++ b/docs/lang_main/model_loader.html
@@ -0,0 +1,162 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.model_loader API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.model_loader</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.model_loader.instantiate_model"><code class="name flex">
+<span>def <span class="ident">instantiate_model</span></span>(<span>model_load_map: ModelLoaderMap, model: LanguageModels) ‑> sentence_transformers.SentenceTransformer.SentenceTransformer | spacy.language.Language</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def instantiate_model(
+    model_load_map: ModelLoaderMap,
+    model: LanguageModels,
+) -&gt; Model:
+    if model not in model_load_map:
+        raise KeyError(f&#39;Model &gt;&gt;{model}&lt;&lt; not known. Choose from: {model_load_map.keys()}&#39;)
+    builder_func = model_load_map[model][&#39;func&#39;]
+    func_kwargs = model_load_map[model][&#39;kwargs&#39;]
+
+    return builder_func(**func_kwargs)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.model_loader.load_sentence_transformer"><code class="name flex">
+<span>def <span class="ident">load_sentence_transformer</span></span>(<span>model_name: STFRModelTypes | str,<br>similarity_func: SimilarityFunction = SimilarityFunction.COSINE,<br>backend: STFRBackends = torch,<br>device: STFRDeviceTypes = cpu,<br>local_files_only: bool = True,<br>trust_remote_code: bool = False,<br>model_save_folder: str | None = None,<br>model_kwargs: STFRModelArgs | dict[str, Any] | None = None,<br>force_download: bool = False) ‑> sentence_transformers.SentenceTransformer.SentenceTransformer</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_sentence_transformer(
+    model_name: STFRModelTypes | str,
+    similarity_func: SimilarityFunction = SimilarityFunction.COSINE,
+    backend: STFRBackends = STFRBackends.TORCH,
+    device: STFRDeviceTypes = STFRDeviceTypes.CPU,
+    local_files_only: bool = True,
+    trust_remote_code: bool = False,
+    model_save_folder: str | None = None,
+    model_kwargs: STFRModelArgs | dict[str, Any] | None = None,
+    force_download: bool = False,
+) -&gt; SentenceTransformer:
+    model_name_or_path = _preprocess_STFR_model_name(
+        model_name=model_name, backend=backend, force_download=force_download
+    )
+    model = SentenceTransformer(
+        model_name_or_path=model_name_or_path,
+        similarity_fn_name=similarity_func,
+        backend=backend,  # type: ignore Literal matches Enum
+        device=device,
+        cache_folder=model_save_folder,
+        local_files_only=local_files_only,
+        trust_remote_code=trust_remote_code,
+        model_kwargs=model_kwargs,  # type: ignore
+    )
+    logger.info(&#39;[MODEL LOADING] Loaded model &gt;&gt;%s&lt;&lt; successfully&#39;, model_name)
+
+    return model</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.model_loader.load_spacy"><code class="name flex">
+<span>def <span class="ident">load_spacy</span></span>(<span>model_name: str) ‑> spacy.language.Language</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_spacy(
+    model_name: str,
+) -&gt; SpacyModel:
+    try:
+        spacy_model_obj = importlib.import_module(model_name)
+    except ModuleNotFoundError:
+        raise LanguageModelNotFoundError(
+            (
+                f&#39;Could not find spaCy model &gt;&gt;{model_name}&lt;&lt;. &#39;
+                f&#39;Check if it is installed correctly.&#39;
+            )
+        )
+    pretrained_model = cast(SpacyModel, spacy_model_obj.load())
+    logger.info(&#39;[MODEL LOADING] Loaded model &gt;&gt;%s&lt;&lt; successfully&#39;, model_name)
+
+    return pretrained_model</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.model_loader.instantiate_model" href="#lang_main.model_loader.instantiate_model">instantiate_model</a></code></li>
+<li><code><a title="lang_main.model_loader.load_sentence_transformer" href="#lang_main.model_loader.load_sentence_transformer">load_sentence_transformer</a></code></li>
+<li><code><a title="lang_main.model_loader.load_spacy" href="#lang_main.model_loader.load_spacy">load_spacy</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/pipelines/base.html b/docs/lang_main/pipelines/base.html
new file mode 100644
index 0000000..a95bb8f
--- /dev/null
+++ b/docs/lang_main/pipelines/base.html
@@ -0,0 +1,755 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.pipelines.base API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.pipelines.base</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="lang_main.pipelines.base.BasePipeline"><code class="flex name class">
+<span>class <span class="ident">BasePipeline</span></span>
+<span>(</span><span>name: str, working_dir: Path)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class BasePipeline(ABC):
+    def __init__(
+        self,
+        name: str,
+        working_dir: Path,
+    ) -&gt; None:
+        # init base class
+        super().__init__()
+
+        # name of pipeline
+        self.name = name
+        # working directory for pipeline == output path
+        self.working_dir = working_dir
+
+        # container for actions to perform during pass
+        self.actions: list[Callable] = []
+        self.action_names: list[str] = []
+        self.action_skip: list[bool] = []
+        # progress tracking, start at 1
+        self.curr_proc_idx: int = 1
+
+    def __repr__(self) -&gt; str:
+        return (
+            f&#39;{self.__class__.__name__}(name: {self.name}, &#39;
+            f&#39;working dir: {self.working_dir}, contents: {self.action_names})&#39;
+        )
+
+    def panic_wrong_action_type(
+        self,
+        action: Any,
+        compatible_type: str,
+    ) -&gt; Never:
+        raise WrongActionTypeError(
+            (
+                f&#39;Action must be of type {compatible_type}, &#39;
+                f&#39;but is of type &gt;&gt;{type(action)}&lt;&lt;.&#39;
+            )
+        )
+
+    def prep_run(self) -&gt; None:
+        logger.info(&#39;Starting pipeline &gt;&gt;%s&lt;&lt;...&#39;, self.name)
+        # progress tracking
+        self.curr_proc_idx = 1
+        # check if performable actions available
+        if len(self.actions) == 0:
+            raise NoPerformableActionError(
+                &#39;The pipeline does not contain any performable actions.&#39;
+            )
+
+    def post_run(self) -&gt; None:
+        logger.info(
+            &#39;Processing pipeline &gt;&gt;%s&lt;&lt; successfully ended after %d steps.&#39;,
+            self.name,
+            (self.curr_proc_idx - 1),
+        )
+
+    @abstractmethod
+    def add(self) -&gt; None: ...
+
+    @abstractmethod
+    def logic(self) -&gt; None: ...
+
+    def run(self, *args, **kwargs) -&gt; Any:
+        self.prep_run()
+        ret = self.logic(*args, **kwargs)
+        self.post_run()
+        return ret</code></pre>
+</details>
+<div class="desc"><p>Helper class that provides a standard way to create an ABC using
+inheritance.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>abc.ABC</li>
+</ul>
+<h3>Subclasses</h3>
+<ul class="hlist">
+<li><a title="lang_main.pipelines.base.Pipeline" href="#lang_main.pipelines.base.Pipeline">Pipeline</a></li>
+<li><a title="lang_main.pipelines.base.PipelineContainer" href="#lang_main.pipelines.base.PipelineContainer">PipelineContainer</a></li>
+</ul>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.pipelines.base.BasePipeline.add"><code class="name flex">
+<span>def <span class="ident">add</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@abstractmethod
+def add(self) -&gt; None: ...</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.BasePipeline.logic"><code class="name flex">
+<span>def <span class="ident">logic</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@abstractmethod
+def logic(self) -&gt; None: ...</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.BasePipeline.panic_wrong_action_type"><code class="name flex">
+<span>def <span class="ident">panic_wrong_action_type</span></span>(<span>self, action: Any, compatible_type: str) ‑> Never</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def panic_wrong_action_type(
+    self,
+    action: Any,
+    compatible_type: str,
+) -&gt; Never:
+    raise WrongActionTypeError(
+        (
+            f&#39;Action must be of type {compatible_type}, &#39;
+            f&#39;but is of type &gt;&gt;{type(action)}&lt;&lt;.&#39;
+        )
+    )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.BasePipeline.post_run"><code class="name flex">
+<span>def <span class="ident">post_run</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def post_run(self) -&gt; None:
+    logger.info(
+        &#39;Processing pipeline &gt;&gt;%s&lt;&lt; successfully ended after %d steps.&#39;,
+        self.name,
+        (self.curr_proc_idx - 1),
+    )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.BasePipeline.prep_run"><code class="name flex">
+<span>def <span class="ident">prep_run</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def prep_run(self) -&gt; None:
+    logger.info(&#39;Starting pipeline &gt;&gt;%s&lt;&lt;...&#39;, self.name)
+    # progress tracking
+    self.curr_proc_idx = 1
+    # check if performable actions available
+    if len(self.actions) == 0:
+        raise NoPerformableActionError(
+            &#39;The pipeline does not contain any performable actions.&#39;
+        )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.BasePipeline.run"><code class="name flex">
+<span>def <span class="ident">run</span></span>(<span>self, *args, **kwargs) ‑> Any</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def run(self, *args, **kwargs) -&gt; Any:
+    self.prep_run()
+    ret = self.logic(*args, **kwargs)
+    self.post_run()
+    return ret</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="lang_main.pipelines.base.Pipeline"><code class="flex name class">
+<span>class <span class="ident">Pipeline</span></span>
+<span>(</span><span>name: str, working_dir: Path)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class Pipeline(BasePipeline):
+    def __init__(
+        self,
+        name: str,
+        working_dir: Path,
+    ) -&gt; None:
+        # init base class
+        super().__init__(name=name, working_dir=working_dir)
+        # name of pipeline
+        self.name = name
+        # working directory for pipeline == output path
+        self.working_dir = working_dir
+        # container for actions to perform during pass
+        self.actions_kwargs: list[dict[str, Any]] = []
+        self.save_results: ResultHandling = []
+        self.load_results: ResultHandling = []
+        # intermediate result
+        self._intermediate_result: tuple[Any, ...] | None = None
+
+    def __repr__(self) -&gt; str:
+        return (
+            f&#39;{self.__class__.__name__}(name: {self.name}, &#39;
+            f&#39;working dir: {self.working_dir}, contents: {self.action_names})&#39;
+        )
+
+    @override
+    def add(
+        self,
+        action: Callable,
+        action_kwargs: dict[str, Any] | None = None,
+        skip: bool = False,
+        save_result: bool = False,
+        load_result: bool = False,
+        filename: str | None = None,
+    ) -&gt; None:
+        # check explicitly for function type
+        # if isinstance(action, FunctionType):
+        if action_kwargs is None:
+            action_kwargs = {}
+
+        if isinstance(action, Callable):
+            self.actions.append(action)
+            self.action_names.append(action.__name__)
+            self.actions_kwargs.append(action_kwargs.copy())
+            self.action_skip.append(skip)
+            self.save_results.append((save_result, filename))
+            self.load_results.append((load_result, filename))
+        else:
+            self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)
+
+    def get_result_path(
+        self,
+        action_idx: int,
+        filename: str | None,
+    ) -&gt; tuple[Path, str]:
+        action_name = self.action_names[action_idx]
+        if filename is None:
+            target_filename = f&#39;Pipe-{self.name}_Step-{self.curr_proc_idx}_{action_name}&#39;
+        else:
+            target_filename = filename
+        target_path = self.working_dir.joinpath(target_filename).with_suffix(&#39;.pkl&#39;)
+        return target_path, action_name
+
+    def load_step(
+        self,
+        action_idx: int,
+        filename: str | None,
+    ) -&gt; tuple[Any, ...]:
+        target_path, action_name = self.get_result_path(action_idx, filename)
+
+        if not target_path.exists():
+            raise FileNotFoundError(
+                (
+                    f&#39;No intermediate results for action &gt;&gt;{action_name}&lt;&lt; &#39;
+                    f&#39;under &gt;&gt;{target_path}&lt;&lt; found&#39;
+                )
+            )
+        # results should be tuple, but that is not guaranteed
+        result_loaded = cast(tuple[Any, ...], load_pickle(target_path))
+        if not isinstance(result_loaded, tuple):
+            raise TypeError(f&#39;Loaded results must be tuple, not {type(result_loaded)}&#39;)
+
+        return result_loaded
+
+    def save_step(
+        self,
+        action_idx: int,
+        filename: str | None,
+    ) -&gt; None:
+        target_path, _ = self.get_result_path(action_idx, filename)
+        save_pickle(obj=self._intermediate_result, path=target_path)
+
+    @override
+    def logic(
+        self,
+        starting_values: tuple[Any, ...] | None = None,
+    ) -&gt; tuple[Any, ...]:
+        first_performed: bool = False
+
+        for idx, (action, action_kwargs) in enumerate(zip(self.actions, self.actions_kwargs)):
+            if self.action_skip[idx]:
+                self.curr_proc_idx += 1
+                continue
+
+            # loading
+            if self.load_results[idx][0]:
+                filename = self.load_results[idx][1]
+                ret = self.load_step(action_idx=idx, filename=filename)
+                self._intermediate_result = ret
+                logger.info(
+                    &#39;[No Calculation] Loaded result for action &gt;&gt;%s&lt;&lt; successfully&#39;,
+                    self.action_names[idx],
+                )
+                self.curr_proc_idx += 1
+                continue
+            # calculation
+            if not first_performed:
+                args = starting_values
+                first_performed = True
+            else:
+                args = ret
+
+            if args is not None:
+                ret = action(*args, **action_kwargs)
+            else:
+                ret = action(**action_kwargs)
+
+            if ret is not None and not isinstance(ret, tuple):
+                ret = (ret,)
+            ret = cast(tuple[Any, ...], ret)
+            # save intermediate result
+            self._intermediate_result = ret
+            # saving result locally, always save last action
+            if self.save_results[idx][0] or idx == (len(self.actions) - 1):
+                filename = self.save_results[idx][1]
+                self.save_step(action_idx=idx, filename=filename)
+            # processing tracking
+            self.curr_proc_idx += 1
+
+        return ret</code></pre>
+</details>
+<div class="desc"><p>Helper class that provides a standard way to create an ABC using
+inheritance.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li><a title="lang_main.pipelines.base.BasePipeline" href="#lang_main.pipelines.base.BasePipeline">BasePipeline</a></li>
+<li>abc.ABC</li>
+</ul>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.pipelines.base.Pipeline.add"><code class="name flex">
+<span>def <span class="ident">add</span></span>(<span>self,<br>action: Callable,<br>action_kwargs: dict[str, Any] | None = None,<br>skip: bool = False,<br>save_result: bool = False,<br>load_result: bool = False,<br>filename: str | None = None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@override
+def add(
+    self,
+    action: Callable,
+    action_kwargs: dict[str, Any] | None = None,
+    skip: bool = False,
+    save_result: bool = False,
+    load_result: bool = False,
+    filename: str | None = None,
+) -&gt; None:
+    # check explicitly for function type
+    # if isinstance(action, FunctionType):
+    if action_kwargs is None:
+        action_kwargs = {}
+
+    if isinstance(action, Callable):
+        self.actions.append(action)
+        self.action_names.append(action.__name__)
+        self.actions_kwargs.append(action_kwargs.copy())
+        self.action_skip.append(skip)
+        self.save_results.append((save_result, filename))
+        self.load_results.append((load_result, filename))
+    else:
+        self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.Pipeline.get_result_path"><code class="name flex">
+<span>def <span class="ident">get_result_path</span></span>(<span>self, action_idx: int, filename: str | None) ‑> tuple[pathlib.Path, str]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_result_path(
+    self,
+    action_idx: int,
+    filename: str | None,
+) -&gt; tuple[Path, str]:
+    action_name = self.action_names[action_idx]
+    if filename is None:
+        target_filename = f&#39;Pipe-{self.name}_Step-{self.curr_proc_idx}_{action_name}&#39;
+    else:
+        target_filename = filename
+    target_path = self.working_dir.joinpath(target_filename).with_suffix(&#39;.pkl&#39;)
+    return target_path, action_name</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.Pipeline.load_step"><code class="name flex">
+<span>def <span class="ident">load_step</span></span>(<span>self, action_idx: int, filename: str | None) ‑> tuple[typing.Any, ...]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load_step(
+    self,
+    action_idx: int,
+    filename: str | None,
+) -&gt; tuple[Any, ...]:
+    target_path, action_name = self.get_result_path(action_idx, filename)
+
+    if not target_path.exists():
+        raise FileNotFoundError(
+            (
+                f&#39;No intermediate results for action &gt;&gt;{action_name}&lt;&lt; &#39;
+                f&#39;under &gt;&gt;{target_path}&lt;&lt; found&#39;
+            )
+        )
+    # results should be tuple, but that is not guaranteed
+    result_loaded = cast(tuple[Any, ...], load_pickle(target_path))
+    if not isinstance(result_loaded, tuple):
+        raise TypeError(f&#39;Loaded results must be tuple, not {type(result_loaded)}&#39;)
+
+    return result_loaded</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.Pipeline.logic"><code class="name flex">
+<span>def <span class="ident">logic</span></span>(<span>self, starting_values: tuple[Any, ...] | None = None) ‑> tuple[typing.Any, ...]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@override
+def logic(
+    self,
+    starting_values: tuple[Any, ...] | None = None,
+) -&gt; tuple[Any, ...]:
+    first_performed: bool = False
+
+    for idx, (action, action_kwargs) in enumerate(zip(self.actions, self.actions_kwargs)):
+        if self.action_skip[idx]:
+            self.curr_proc_idx += 1
+            continue
+
+        # loading
+        if self.load_results[idx][0]:
+            filename = self.load_results[idx][1]
+            ret = self.load_step(action_idx=idx, filename=filename)
+            self._intermediate_result = ret
+            logger.info(
+                &#39;[No Calculation] Loaded result for action &gt;&gt;%s&lt;&lt; successfully&#39;,
+                self.action_names[idx],
+            )
+            self.curr_proc_idx += 1
+            continue
+        # calculation
+        if not first_performed:
+            args = starting_values
+            first_performed = True
+        else:
+            args = ret
+
+        if args is not None:
+            ret = action(*args, **action_kwargs)
+        else:
+            ret = action(**action_kwargs)
+
+        if ret is not None and not isinstance(ret, tuple):
+            ret = (ret,)
+        ret = cast(tuple[Any, ...], ret)
+        # save intermediate result
+        self._intermediate_result = ret
+        # saving result locally, always save last action
+        if self.save_results[idx][0] or idx == (len(self.actions) - 1):
+            filename = self.save_results[idx][1]
+            self.save_step(action_idx=idx, filename=filename)
+        # processing tracking
+        self.curr_proc_idx += 1
+
+    return ret</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.Pipeline.save_step"><code class="name flex">
+<span>def <span class="ident">save_step</span></span>(<span>self, action_idx: int, filename: str | None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def save_step(
+    self,
+    action_idx: int,
+    filename: str | None,
+) -&gt; None:
+    target_path, _ = self.get_result_path(action_idx, filename)
+    save_pickle(obj=self._intermediate_result, path=target_path)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="lang_main.pipelines.base.PipelineContainer"><code class="flex name class">
+<span>class <span class="ident">PipelineContainer</span></span>
+<span>(</span><span>name: str, working_dir: Path)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class PipelineContainer(BasePipeline):
+    &#34;&#34;&#34;Container class for basic actions.
+    Basic actions are usually functions, which do not take any parameters
+    and return nothing. Indeed, if an action returns any values after its
+    procedure is finished, an error is raised. Therefore, PipelineContainers
+    can be seen as a concatenation of many (independent) simple procedures
+    which are executed in the order in which they were added to the pipe.
+    With a simple call of the ``run`` method the actions are performed.
+    Additionally, there is an option to skip actions which can be set in
+    the ``add`` method. This allows for easily configurable pipelines,
+    e.g., via a user configuration.
+    &#34;&#34;&#34;
+
+    def __init__(
+        self,
+        name: str,
+        working_dir: Path,
+    ) -&gt; None:
+        super().__init__(name=name, working_dir=working_dir)
+
+    @override
+    def add(
+        self,
+        action: Callable,
+        skip: bool = False,
+    ) -&gt; None:
+        if isinstance(action, Callable):
+            self.actions.append(action)
+            self.action_names.append(action.__name__)
+            self.action_skip.append(skip)
+        else:
+            self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)
+
+    @override
+    def logic(self) -&gt; None:
+        for idx, (action, action_name) in enumerate(zip(self.actions, self.action_names)):
+            # loading
+            if self.action_skip[idx]:
+                logger.info(&#39;[No Calculation] Skipping &gt;&gt;%s&lt;&lt;...&#39;, action_name)
+                self.curr_proc_idx += 1
+                continue
+            # calculation
+            ret = action()
+            if ret is not None:
+                raise OutputInPipelineContainerError(
+                    (
+                        f&#39;Output in PipelineContainers not allowed. Action {action_name} &#39;
+                        f&#39;returned values in Container {self.name}.&#39;
+                    )
+                )
+            # processing tracking
+            self.curr_proc_idx += 1</code></pre>
+</details>
+<div class="desc"><p>Container class for basic actions.
+Basic actions are usually functions, which do not take any parameters
+and return nothing. Indeed, if an action returns any values after its
+procedure is finished, an error is raised. Therefore, PipelineContainers
+can be seen as a concatenation of many (independent) simple procedures
+which are executed in the order in which they were added to the pipe.
+With a simple call of the <code>run</code> method the actions are performed.
+Additionally, there is an option to skip actions which can be set in
+the <code>add</code> method. This allows for easily configurable pipelines,
+e.g., via a user configuration.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li><a title="lang_main.pipelines.base.BasePipeline" href="#lang_main.pipelines.base.BasePipeline">BasePipeline</a></li>
+<li>abc.ABC</li>
+</ul>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.pipelines.base.PipelineContainer.add"><code class="name flex">
+<span>def <span class="ident">add</span></span>(<span>self, action: Callable, skip: bool = False) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@override
+def add(
+    self,
+    action: Callable,
+    skip: bool = False,
+) -&gt; None:
+    if isinstance(action, Callable):
+        self.actions.append(action)
+        self.action_names.append(action.__name__)
+        self.action_skip.append(skip)
+    else:
+        self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.base.PipelineContainer.logic"><code class="name flex">
+<span>def <span class="ident">logic</span></span>(<span>self) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@override
+def logic(self) -&gt; None:
+    for idx, (action, action_name) in enumerate(zip(self.actions, self.action_names)):
+        # loading
+        if self.action_skip[idx]:
+            logger.info(&#39;[No Calculation] Skipping &gt;&gt;%s&lt;&lt;...&#39;, action_name)
+            self.curr_proc_idx += 1
+            continue
+        # calculation
+        ret = action()
+        if ret is not None:
+            raise OutputInPipelineContainerError(
+                (
+                    f&#39;Output in PipelineContainers not allowed. Action {action_name} &#39;
+                    f&#39;returned values in Container {self.name}.&#39;
+                )
+            )
+        # processing tracking
+        self.curr_proc_idx += 1</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+</dl>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.pipelines" href="index.html">lang_main.pipelines</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="lang_main.pipelines.base.BasePipeline" href="#lang_main.pipelines.base.BasePipeline">BasePipeline</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.pipelines.base.BasePipeline.add" href="#lang_main.pipelines.base.BasePipeline.add">add</a></code></li>
+<li><code><a title="lang_main.pipelines.base.BasePipeline.logic" href="#lang_main.pipelines.base.BasePipeline.logic">logic</a></code></li>
+<li><code><a title="lang_main.pipelines.base.BasePipeline.panic_wrong_action_type" href="#lang_main.pipelines.base.BasePipeline.panic_wrong_action_type">panic_wrong_action_type</a></code></li>
+<li><code><a title="lang_main.pipelines.base.BasePipeline.post_run" href="#lang_main.pipelines.base.BasePipeline.post_run">post_run</a></code></li>
+<li><code><a title="lang_main.pipelines.base.BasePipeline.prep_run" href="#lang_main.pipelines.base.BasePipeline.prep_run">prep_run</a></code></li>
+<li><code><a title="lang_main.pipelines.base.BasePipeline.run" href="#lang_main.pipelines.base.BasePipeline.run">run</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="lang_main.pipelines.base.Pipeline" href="#lang_main.pipelines.base.Pipeline">Pipeline</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.pipelines.base.Pipeline.add" href="#lang_main.pipelines.base.Pipeline.add">add</a></code></li>
+<li><code><a title="lang_main.pipelines.base.Pipeline.get_result_path" href="#lang_main.pipelines.base.Pipeline.get_result_path">get_result_path</a></code></li>
+<li><code><a title="lang_main.pipelines.base.Pipeline.load_step" href="#lang_main.pipelines.base.Pipeline.load_step">load_step</a></code></li>
+<li><code><a title="lang_main.pipelines.base.Pipeline.logic" href="#lang_main.pipelines.base.Pipeline.logic">logic</a></code></li>
+<li><code><a title="lang_main.pipelines.base.Pipeline.save_step" href="#lang_main.pipelines.base.Pipeline.save_step">save_step</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="lang_main.pipelines.base.PipelineContainer" href="#lang_main.pipelines.base.PipelineContainer">PipelineContainer</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.pipelines.base.PipelineContainer.add" href="#lang_main.pipelines.base.PipelineContainer.add">add</a></code></li>
+<li><code><a title="lang_main.pipelines.base.PipelineContainer.logic" href="#lang_main.pipelines.base.PipelineContainer.logic">logic</a></code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/pipelines/index.html b/docs/lang_main/pipelines/index.html
new file mode 100644
index 0000000..810682b
--- /dev/null
+++ b/docs/lang_main/pipelines/index.html
@@ -0,0 +1,83 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.pipelines API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.pipelines</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+<h2 class="section-title" id="header-submodules">Sub-modules</h2>
+<dl>
+<dt><code class="name"><a title="lang_main.pipelines.base" href="base.html">lang_main.pipelines.base</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.pipelines.predefined" href="predefined.html">lang_main.pipelines.predefined</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="../index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-submodules">Sub-modules</a></h3>
+<ul>
+<li><code><a title="lang_main.pipelines.base" href="base.html">lang_main.pipelines.base</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined" href="predefined.html">lang_main.pipelines.predefined</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/pipelines/predefined.html b/docs/lang_main/pipelines/predefined.html
new file mode 100644
index 0000000..e66d89a
--- /dev/null
+++ b/docs/lang_main/pipelines/predefined.html
@@ -0,0 +1,386 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.pipelines.predefined API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.pipelines.predefined</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.pipelines.predefined.build_base_target_feature_pipe"><code class="name flex">
+<span>def <span class="ident">build_base_target_feature_pipe</span></span>(<span>) ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_base_target_feature_pipe() -&gt; Pipeline:
+    pipe_target_feat = Pipeline(name=&#39;Target_Feature&#39;, working_dir=SAVE_PATH_FOLDER)
+    pipe_target_feat.add(
+        load_raw_data,
+        {
+            &#39;date_cols&#39;: DATE_COLS,
+        },
+    )
+    pipe_target_feat.add(remove_duplicates)
+    pipe_target_feat.add(remove_NA, save_result=True)
+    pipe_target_feat.add(
+        entry_wise_cleansing,
+        {
+            &#39;target_features&#39;: (TARGET_FEATURE,),
+            &#39;cleansing_func&#39;: clean_string_slim,
+        },
+        save_result=True,
+        filename=EntryPoints.TIMELINE,
+    )
+    pipe_target_feat.add(
+        analyse_feature,
+        {
+            &#39;target_feature&#39;: TARGET_FEATURE,
+        },
+        save_result=True,
+    )
+
+    return pipe_target_feat</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.predefined.build_merge_duplicates_pipe"><code class="name flex">
+<span>def <span class="ident">build_merge_duplicates_pipe</span></span>(<span>) ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_merge_duplicates_pipe() -&gt; Pipeline:
+    pipe_merge = Pipeline(name=&#39;Merge_Duplicates&#39;, working_dir=SAVE_PATH_FOLDER)
+    pipe_merge.add(
+        numeric_pre_filter_feature,
+        {
+            &#39;feature&#39;: &#39;len&#39;,
+            &#39;bound_lower&#39;: THRESHOLD_AMOUNT_CHARACTERS,
+            &#39;bound_upper&#39;: None,
+        },
+    )
+    pipe_merge.add(
+        merge_similarity_duplicates,
+        {
+            &#39;model&#39;: STFR_MODEL,
+            &#39;cos_sim_threshold&#39;: THRESHOLD_SIMILARITY,
+        },
+        save_result=True,
+        filename=EntryPoints.TOKEN_ANALYSIS,
+    )
+
+    return pipe_merge</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.predefined.build_timeline_pipe"><code class="name flex">
+<span>def <span class="ident">build_timeline_pipe</span></span>(<span>) ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_timeline_pipe() -&gt; Pipeline:
+    pipe_timeline = Pipeline(name=&#39;Timeline_Analysis&#39;, working_dir=SAVE_PATH_FOLDER)
+    pipe_timeline.add(
+        cleanup_descriptions,
+        {
+            &#39;properties&#39;: [&#39;ErledigungsBeschreibung&#39;],
+        },
+    )
+    pipe_timeline.add(
+        calc_delta_to_repair,
+        {
+            &#39;date_feature_start&#39;: &#39;ErstellungsDatum&#39;,
+            &#39;date_feature_end&#39;: &#39;ErledigungsDatum&#39;,
+            &#39;name_delta_feature&#39;: NAME_DELTA_FEAT_TO_REPAIR,
+            &#39;convert_to_days&#39;: True,
+        },
+        save_result=True,
+        filename=EntryPoints.TIMELINE_POST,
+    )
+    pipe_timeline.add(
+        remove_non_relevant_obj_ids,
+        {
+            &#39;thresh_unique_feat_per_id&#39;: THRESHOLD_UNIQUE_TEXTS,
+            &#39;feature_uniqueness&#39;: UNIQUE_CRITERION_FEATURE,
+            &#39;feature_obj_id&#39;: FEATURE_NAME_OBJ_ID,
+        },
+        save_result=True,
+    )
+    pipe_timeline.add(
+        generate_model_input,
+        {
+            &#39;target_feature_name&#39;: &#39;nlp_model_input&#39;,
+            &#39;model_input_features&#39;: MODEL_INPUT_FEATURES,
+        },
+    )
+    pipe_timeline.add(
+        filter_activities_per_obj_id,
+        {
+            &#39;activity_feature&#39;: ACTIVITY_FEATURE,
+            &#39;relevant_activity_types&#39;: ACTIVITY_TYPES,
+            &#39;feature_obj_id&#39;: FEATURE_NAME_OBJ_ID,
+            &#39;threshold_num_activities&#39;: THRESHOLD_NUM_ACTIVITIES,
+        },
+    )
+    pipe_timeline.add(
+        get_timeline_candidates,
+        {
+            &#39;model&#39;: STFR_MODEL,
+            &#39;cos_sim_threshold&#39;: THRESHOLD_TIMELINE_SIMILARITY,
+            &#39;feature_obj_id&#39;: FEATURE_NAME_OBJ_ID,
+            &#39;feature_obj_text&#39;: FEATURE_NAME_OBJ_TEXT,
+            &#39;model_input_feature&#39;: &#39;nlp_model_input&#39;,
+        },
+        save_result=True,
+        filename=EntryPoints.TIMELINE_CANDS,
+    )
+
+    return pipe_timeline</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.predefined.build_tk_graph_pipe"><code class="name flex">
+<span>def <span class="ident">build_tk_graph_pipe</span></span>(<span>) ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_tk_graph_pipe() -&gt; Pipeline:
+    pipe_token_analysis = Pipeline(name=&#39;Token_Analysis&#39;, working_dir=SAVE_PATH_FOLDER)
+    pipe_token_analysis.add(
+        build_token_graph,
+        {
+            &#39;model&#39;: SPACY_MODEL,
+            &#39;target_feature&#39;: &#39;entry&#39;,
+            &#39;weights_feature&#39;: &#39;num_occur&#39;,
+            &#39;batch_idx_feature&#39;: &#39;batched_idxs&#39;,
+            &#39;build_map&#39;: False,
+            &#39;batch_size_model&#39;: 50,
+        },
+        save_result=True,
+        filename=EntryPoints.TK_GRAPH_POST,
+    )
+
+    return pipe_token_analysis</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.predefined.build_tk_graph_post_pipe"><code class="name flex">
+<span>def <span class="ident">build_tk_graph_post_pipe</span></span>(<span>) ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_tk_graph_post_pipe() -&gt; Pipeline:
+    pipe_graph_postprocessing = Pipeline(
+        name=&#39;Graph_Postprocessing&#39;, working_dir=SAVE_PATH_FOLDER
+    )
+    pipe_graph_postprocessing.add(
+        graphs.filter_graph_by_number_edges,
+        {
+            &#39;limit&#39;: MAX_EDGE_NUMBER,
+            &#39;property&#39;: &#39;weight&#39;,
+        },
+    )
+    pipe_graph_postprocessing.add(
+        graphs.filter_graph_by_node_degree,
+        {
+            &#39;bound_lower&#39;: 1,
+            &#39;bound_upper&#39;: None,
+        },
+    )
+    pipe_graph_postprocessing.add(
+        graphs.static_graph_analysis,
+        save_result=True,
+        filename=EntryPoints.TK_GRAPH_ANALYSIS,
+    )
+
+    return pipe_graph_postprocessing</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.predefined.build_tk_graph_render_pipe"><code class="name flex">
+<span>def <span class="ident">build_tk_graph_render_pipe</span></span>(<span>with_subgraphs: bool,<br>export_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out'),<br>base_network_name: str = 'token_graph') ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_tk_graph_render_pipe(
+    with_subgraphs: bool,
+    export_folder: Path = SAVE_PATH_FOLDER,
+    base_network_name: str = CYTO_BASE_NETWORK_NAME,
+) -&gt; Pipeline:
+    # optional dependency: late import
+    # raises exception if necessary modules are not found
+    try:
+        from lang_main.render import cytoscape as cyto
+    except ImportError:
+        raise ImportError(
+            (
+                &#39;Dependencies for Cytoscape interaction not found.&#39;
+                &#39;Install package with optional dependencies.&#39;
+            )
+        )
+
+    pipe_graph_rendering = Pipeline(
+        name=&#39;Graph_Static-Rendering&#39;,
+        working_dir=SAVE_PATH_FOLDER,
+    )
+    pipe_graph_rendering.add(
+        cyto.import_to_cytoscape,
+        {
+            &#39;network_name&#39;: base_network_name,
+        },
+    )
+    pipe_graph_rendering.add(
+        cyto.layout_network,
+        {
+            &#39;network_name&#39;: base_network_name,
+        },
+    )
+    pipe_graph_rendering.add(
+        cyto.apply_style_to_network,
+        {
+            &#39;network_name&#39;: base_network_name,
+        },
+    )
+    pipe_graph_rendering.add(
+        cyto.export_network_to_image,
+        {
+            &#39;filename&#39;: base_network_name,
+            &#39;target_folder&#39;: export_folder,
+            &#39;network_name&#39;: base_network_name,
+        },
+    )
+
+    if with_subgraphs:
+        pipe_graph_rendering.add(
+            cyto.get_subgraph_node_selection,
+            {
+                &#39;network_name&#39;: base_network_name,
+            },
+        )
+        pipe_graph_rendering.add(
+            cyto.build_subnetworks,
+            {
+                &#39;export_image&#39;: True,
+                &#39;target_folder&#39;: export_folder,
+                &#39;network_name&#39;: base_network_name,
+            },
+        )
+
+    return pipe_graph_rendering</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.pipelines.predefined.build_tk_graph_rescaling_pipe"><code class="name flex">
+<span>def <span class="ident">build_tk_graph_rescaling_pipe</span></span>(<span>save_result: bool, exit_point: lang_main.types.EntryPoints) ‑> <a title="lang_main.pipelines.base.Pipeline" href="base.html#lang_main.pipelines.base.Pipeline">Pipeline</a></span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_tk_graph_rescaling_pipe(
+    save_result: bool,
+    exit_point: EntryPoints,
+) -&gt; Pipeline:
+    pipe_graph_rescaling = Pipeline(name=&#39;Graph_Rescaling&#39;, working_dir=SAVE_PATH_FOLDER)
+    pipe_graph_rescaling.add(
+        graphs.pipe_rescale_graph_edge_weights,
+    )
+    pipe_graph_rescaling.add(
+        graphs.pipe_add_graph_metrics,
+        save_result=save_result,
+        filename=exit_point,
+    )
+
+    return pipe_graph_rescaling</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.pipelines" href="index.html">lang_main.pipelines</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.pipelines.predefined.build_base_target_feature_pipe" href="#lang_main.pipelines.predefined.build_base_target_feature_pipe">build_base_target_feature_pipe</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined.build_merge_duplicates_pipe" href="#lang_main.pipelines.predefined.build_merge_duplicates_pipe">build_merge_duplicates_pipe</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined.build_timeline_pipe" href="#lang_main.pipelines.predefined.build_timeline_pipe">build_timeline_pipe</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_pipe">build_tk_graph_pipe</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_post_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_post_pipe">build_tk_graph_post_pipe</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_render_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_render_pipe">build_tk_graph_render_pipe</a></code></li>
+<li><code><a title="lang_main.pipelines.predefined.build_tk_graph_rescaling_pipe" href="#lang_main.pipelines.predefined.build_tk_graph_rescaling_pipe">build_tk_graph_rescaling_pipe</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/render/cytoscape.html b/docs/lang_main/render/cytoscape.html
new file mode 100644
index 0000000..f9c643a
--- /dev/null
+++ b/docs/lang_main/render/cytoscape.html
@@ -0,0 +1,797 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.render.cytoscape API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.render.cytoscape</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.render.cytoscape.analyse_network"><code class="name flex">
+<span>def <span class="ident">analyse_network</span></span>(<span>property_degree_weighted: str = 'degree_weighted',<br>network_name: str = 'token_graph') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def analyse_network(
+    property_degree_weighted: str = PROPERTY_NAME_DEGREE_WEIGHTED,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+) -&gt; None:
+    node_table = p4c.get_table_columns(table=&#39;node&#39;, network=network_name)
+    net_analyse_possible: bool = True
+    if len(node_table) &lt; 4:  # pragma: no cover
+        net_analyse_possible = False
+
+    if net_analyse_possible:
+        p4c.analyze_network(directed=False)
+        node_table = p4c.get_table_columns(table=&#39;node&#39;, network=network_name)
+        node_table[&#39;stress_norm&#39;] = node_table[&#39;Stress&#39;] / node_table[&#39;Stress&#39;].max()
+        node_table[CYTO_SELECTION_PROPERTY] = (
+            node_table[property_degree_weighted]
+            * node_table[&#39;BetweennessCentrality&#39;]
+            * node_table[&#39;stress_norm&#39;]
+        )
+    else:  # pragma: no cover
+        node_table[CYTO_SELECTION_PROPERTY] = 1
+
+    p4c.load_table_data(node_table, data_key_column=&#39;name&#39;, network=network_name)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.render.cytoscape.apply_style_to_network"><code class="name flex">
+<span>def <span class="ident">apply_style_to_network</span></span>(<span>style_name: str = 'lang_main',<br>pth_to_stylesheet: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-main/src/lang_main/cytoscape_config/lang_main.xml'),<br>network_name: str = 'token_graph',<br>node_size_property: str = 'node_selection',<br>min_node_size: int = 15,<br>max_node_size: int = 40,<br>sandbox_name: str = 'lang_main') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def apply_style_to_network(
+    style_name: str = CYTO_STYLESHEET_NAME,
+    pth_to_stylesheet: Path = CYTO_PATH_STYLESHEET,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+    node_size_property: str = CYTO_SELECTION_PROPERTY,
+    min_node_size: int = 15,
+    max_node_size: int = 40,
+    sandbox_name: str = CYTO_SANDBOX_NAME,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: apply a chosen Cytoscape style to the defined network
+
+    Parameters
+    ----------
+    style_name : str, optional
+        Cytoscape name of the style which should be applied,
+        by default CYTO_STYLESHEET_NAME
+    pth_to_stylesheet : Path, optional
+        path where the stylesheet definition in Cytoscape&#39;s XML format can
+        be found,
+        by default CYTO_PATH_STYLESHEET
+    network_name : str, optional
+        network to apply the style on, by default CYTO_BASE_NETWORK_NAME
+
+    Raises
+    ------
+    FileNotFoundError
+        if provided stylesheet can not be found under the provided path
+    &#34;&#34;&#34;
+    logger.debug(&#39;Applying style to network...&#39;)
+    styles_avail = cast(list[str], p4c.get_visual_style_names())
+    logger.debug(&#39;Available styles: %s&#39;, styles_avail)
+    if style_name not in styles_avail:
+        if not pth_to_stylesheet.exists():
+            # existence for standard path verified at import, but not for other
+            # provided paths
+            raise FileNotFoundError(
+                f&#39;Visual stylesheet for Cytoscape not found under: &gt;&gt;{pth_to_stylesheet}&lt;&lt;&#39;
+            )
+        # send to sandbox
+        sandbox_filename = pth_to_stylesheet.name
+        p4c.sandbox_send_to(
+            source_file=pth_to_stylesheet,
+            dest_file=sandbox_filename,
+            overwrite=True,
+            sandbox_name=sandbox_name,
+        )
+        # load stylesheet
+        p4c.import_visual_styles(sandbox_filename)
+
+    p4c.set_visual_style(style_name, network=network_name)
+    # node size mapping, only if needed property is available
+    scheme = p4c.scheme_c_number_continuous(
+        start_value=min_node_size, end_value=max_node_size
+    )
+    node_size_map = p4c.gen_node_size_map(
+        node_size_property,
+        number_scheme=scheme,
+        mapping_type=&#39;c&#39;,
+        style_name=style_name,
+        default_number=min_node_size,
+    )
+    p4c.set_node_size_mapping(**node_size_map)
+    fit_content(network_name=network_name)
+    logger.debug(&#39;Style application to network successful.&#39;)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: apply a chosen Cytoscape style to the defined network</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>style_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Cytoscape name of the style which should be applied,
+by default CYTO_STYLESHEET_NAME</dd>
+<dt><strong><code>pth_to_stylesheet</code></strong> :&ensp;<code>Path</code>, optional</dt>
+<dd>path where the stylesheet definition in Cytoscape's XML format can
+be found,
+by default CYTO_PATH_STYLESHEET</dd>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network to apply the style on, by default CYTO_BASE_NETWORK_NAME</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>FileNotFoundError</code></dt>
+<dd>if provided stylesheet can not be found under the provided path</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.build_subnetworks"><code class="name flex">
+<span>def <span class="ident">build_subnetworks</span></span>(<span>nodes_to_analyse: Iterable[int],<br>network_name: str = 'token_graph',<br>export_image: bool = True,<br>target_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out')) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def build_subnetworks(
+    nodes_to_analyse: Iterable[CytoNodeID],
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+    export_image: bool = True,
+    target_folder: Path = SAVE_PATH_FOLDER,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: iteratively build subnetworks from a collection of nodes
+    and their respective neighbouring nodes
+
+    Parameters
+    ----------
+    nodes_to_analyse : Iterable[CytoNodeID]
+        collection of nodes to make subnetworks from, for each node a dedicated
+        subnetwork will be generated
+    network_name : str, optional
+        network which contains the provided nodes,
+        by default CYTO_BASE_NETWORK_NAME
+    export_image : bool, optional
+        trigger image export of newly generated subnetworks, by default True
+    &#34;&#34;&#34;
+    logger.debug(&#39;Generating all subnetworks for node selection...&#39;)
+    for idx, node in enumerate(nodes_to_analyse):
+        select_neighbours_of_node(node=node, network_name=network_name)
+        make_subnetwork(
+            index=idx,
+            network_name=network_name,
+            export_image=export_image,
+            target_folder=target_folder,
+        )
+    logger.debug(&#39;Generation of all subnetworks for node selection successful.&#39;)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: iteratively build subnetworks from a collection of nodes
+and their respective neighbouring nodes</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>nodes_to_analyse</code></strong> :&ensp;<code>Iterable[CytoNodeID]</code></dt>
+<dd>collection of nodes to make subnetworks from, for each node a dedicated
+subnetwork will be generated</dd>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network which contains the provided nodes,
+by default CYTO_BASE_NETWORK_NAME</dd>
+<dt><strong><code>export_image</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>trigger image export of newly generated subnetworks, by default True</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.change_default_layout"><code class="name flex">
+<span>def <span class="ident">change_default_layout</span></span>(<span>) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def change_default_layout() -&gt; None:
+    &#34;&#34;&#34;Cytoscape: resets the default layout to `grid` to accelerate the import process
+    (grid layout one of the fastest)
+
+    Raises
+    ------
+    RequestException
+        API endpoint not reachable or CyREST operation not successful
+    &#34;&#34;&#34;
+    body: dict[str, str] = {&#39;value&#39;: &#39;grid&#39;, &#39;key&#39;: &#39;layout.default&#39;}
+    try:
+        p4c.cyrest_put(&#39;properties/cytoscape3.props/layout.default&#39;, body=body)
+    except RequestException as error:
+        logger.error(&#39;[CytoAPIConnection] Property change of default layout not successful.&#39;)
+        raise error</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: resets the default layout to <code>grid</code> to accelerate the import process
+(grid layout one of the fastest)</p>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>RequestException</code></dt>
+<dd>API endpoint not reachable or CyREST operation not successful</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.export_network_to_image"><code class="name flex">
+<span>def <span class="ident">export_network_to_image</span></span>(<span>filename: str,<br>target_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out'),<br>filetype: Literal['JPEG', 'PDF', 'PNG', 'PS', 'SVG'] = 'SVG',<br>network_name: str = 'token_graph',<br>pdf_export_page_size: Literal['A0', 'A1', 'A2', 'A3', 'A4', 'A5', 'Auto', 'Legal', 'Letter', 'Tabloid'] = 'A4',<br>sandbox_name: str = 'lang_main') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def export_network_to_image(
+    filename: str,
+    target_folder: Path = SAVE_PATH_FOLDER,
+    filetype: CytoExportFileTypes = &#39;SVG&#39;,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+    pdf_export_page_size: CytoExportPageSizes = &#39;A4&#39;,
+    sandbox_name: str = CYTO_SANDBOX_NAME,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: export current selected view as image
+
+    Parameters
+    ----------
+    filename : str
+        export filename
+    filetype : CytoExportFileTypes, optional
+        export filetype supported by Cytoscape, by default &#39;SVG&#39;
+    network_name : str, optional
+        network to export, by default CYTO_BASE_NETWORK_NAME
+    pdf_export_page_size : CytoExportPageSizes, optional
+        page size which should be used for PDF exports supported by Cytoscape,
+        by default &#39;A4&#39;
+    &#34;&#34;&#34;
+    logger.debug(&#39;Exporting image to file...&#39;)
+    if not target_folder.exists():  # pragma: no cover
+        target_folder.mkdir(parents=True)
+    dst_file_pth = (target_folder / filename).with_suffix(f&#39;.{filetype.lower()}&#39;)
+
+    text_as_font = True
+    if filetype == &#39;SVG&#39;:
+        text_as_font = False
+
+    # close non-necessary windows and fit graph in frame before image display
+    fit_content(network_name=network_name)
+    # image is generated in sandbox directory and transferred to target destination
+    # (preparation for remote instances of Cytoscape)
+    p4c.export_image(
+        filename=filename,
+        type=filetype,
+        network=network_name,
+        overwrite_file=True,
+        all_graphics_details=True,
+        export_text_as_font=text_as_font,
+        page_size=pdf_export_page_size,
+    )
+    logger.debug(&#39;Exported image to sandbox.&#39;)
+    logger.debug(&#39;Transferring image from sandbox to target destination...&#39;)
+    sandbox_filename = f&#39;{filename}.{filetype.lower()}&#39;
+    p4c.sandbox_get_from(
+        source_file=sandbox_filename,
+        dest_file=str(dst_file_pth),
+        overwrite=True,
+        sandbox_name=sandbox_name,
+    )
+    logger.debug(&#39;Transfer of image from sandbox to target destination successful.&#39;)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: export current selected view as image</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>filename</code></strong> :&ensp;<code>str</code></dt>
+<dd>export filename</dd>
+<dt><strong><code>filetype</code></strong> :&ensp;<code>CytoExportFileTypes</code>, optional</dt>
+<dd>export filetype supported by Cytoscape, by default 'SVG'</dd>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network to export, by default CYTO_BASE_NETWORK_NAME</dd>
+<dt><strong><code>pdf_export_page_size</code></strong> :&ensp;<code>CytoExportPageSizes</code>, optional</dt>
+<dd>page size which should be used for PDF exports supported by Cytoscape,
+by default 'A4'</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.fit_content"><code class="name flex">
+<span>def <span class="ident">fit_content</span></span>(<span>zoom_factor: float = 0.96, network_name: str = 'token_graph') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def fit_content(
+    zoom_factor: float = CYTO_NETWORK_ZOOM_FACTOR,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+) -&gt; None:
+    p4c.hide_all_panels()
+    p4c.fit_content(selected_only=False, network=network_name)
+    zoom_current = p4c.get_network_zoom(network=network_name)
+    zoom_new = zoom_current * zoom_factor
+    p4c.set_network_zoom_bypass(zoom_new, bypass=False, network=network_name)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.render.cytoscape.get_subgraph_node_selection"><code class="name flex">
+<span>def <span class="ident">get_subgraph_node_selection</span></span>(<span>network_name: str = 'token_graph', num_subgraphs: int = 5) ‑> list[int]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_subgraph_node_selection(
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+    num_subgraphs: int = CYTO_NUMBER_SUBGRAPHS,
+) -&gt; list[CytoNodeID]:
+    &#34;&#34;&#34;Cytoscape: obtain the relevant nodes for iterative subgraph generation
+
+    Parameters
+    ----------
+    network_name : str, optional
+        network to retrieve the nodes from, by default CYTO_BASE_NETWORK_NAME
+    property_degree_weighted : str, optional
+        property name which contains the weighted degree,
+        by default PROPERTY_NAME_DEGREE_WEIGHTED
+    num_subgraphs : int, optional
+        number of relevant nodes which form the basis to generate subgraphs from,
+        by default CYTO_NUMBER_SUBGRAPHS
+
+    Returns
+    -------
+    list[CytoNodeID]
+        list containing all relevant Cytoscape nodes
+    &#34;&#34;&#34;
+    logger.debug(&#39;Selecting nodes for subgraph generation...&#39;)
+    node_table = p4c.get_table_columns(table=&#39;node&#39;, network=network_name)
+    node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False)
+    p4c.load_table_data(node_table, data_key_column=&#39;name&#39;, network=network_name)
+    node_table_choice = node_table.iloc[:num_subgraphs]
+    logger.debug(&#39;Selection of nodes for subgraph generation successful.&#39;)
+
+    return node_table_choice[&#39;SUID&#39;].to_list()</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: obtain the relevant nodes for iterative subgraph generation</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network to retrieve the nodes from, by default CYTO_BASE_NETWORK_NAME</dd>
+<dt><strong><code>property_degree_weighted</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>property name which contains the weighted degree,
+by default PROPERTY_NAME_DEGREE_WEIGHTED</dd>
+<dt><strong><code>num_subgraphs</code></strong> :&ensp;<code>int</code>, optional</dt>
+<dd>number of relevant nodes which form the basis to generate subgraphs from,
+by default CYTO_NUMBER_SUBGRAPHS</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>list[CytoNodeID]</code></dt>
+<dd>list containing all relevant Cytoscape nodes</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.import_to_cytoscape"><code class="name flex">
+<span>def <span class="ident">import_to_cytoscape</span></span>(<span>graph: networkx.classes.digraph.DiGraph | networkx.classes.graph.Graph,<br>network_name: str = 'token_graph',<br>sandbox_name: str = 'lang_main',<br>reinitialise_sandbox: bool = True) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def import_to_cytoscape(
+    graph: DiGraph | Graph,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+    sandbox_name: str = CYTO_SANDBOX_NAME,
+    reinitialise_sandbox: bool = True,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: import NetworkX graph as new network collection
+
+    Parameters
+    ----------
+    graph : DiGraph | Graph
+        NetworkX graph object
+    &#34;&#34;&#34;
+    logger.debug(&#39;Checking Cytoscape connection...&#39;)
+    verify_connection()
+    logger.debug(&#39;Checking graph size for rendering...&#39;)
+    verify_graph_render_size(graph)
+    logger.debug(&#39;Setting default layout to improve import speed...&#39;)
+    change_default_layout()
+    logger.debug(&#39;Setting Cytoscape sandbox...&#39;)
+    p4c.sandbox_set(
+        sandbox_name=sandbox_name,
+        reinitialize=reinitialise_sandbox,
+        copy_samples=False,
+    )
+    logger.debug(&#39;Importing to and analysing network in Cytoscape...&#39;)
+    p4c.delete_all_networks()
+    p4c.create_network_from_networkx(
+        graph,
+        title=network_name,
+        collection=CYTO_COLLECTION_NAME,
+    )
+    analyse_network(network_name=network_name)
+    logger.debug(&#39;Import and analysis of network to Cytoscape successful.&#39;)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: import NetworkX graph as new network collection</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code>DiGraph | Graph</code></dt>
+<dd>NetworkX graph object</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.layout_network"><code class="name flex">
+<span>def <span class="ident">layout_network</span></span>(<span>layout_name: Literal['attribute-circle', 'attribute-grid', 'attributes-layout', 'circular', 'cose', 'degree-circle', 'force-directed', 'force-directed-cl', 'fruchterman-rheingold', 'grid', 'hierarchical', 'isom', 'kamada-kawai', 'stacked-node-layout'] = 'force-directed',<br>layout_properties: dict[str, float | bool] = {'numIterations': 1000, 'defaultSpringCoefficient': 0.0001, 'defaultSpringLength': 45, 'defaultNodeMass': 11, 'isDeterministic': True, 'singlePartition': False},<br>network_name: str = 'token_graph') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def layout_network(
+    layout_name: CytoLayouts = CYTO_LAYOUT_NAME,
+    layout_properties: CytoLayoutProperties = CYTO_LAYOUT_PROPERTIES,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: apply a supported layout algorithm to currently selected
+    network
+
+    Parameters
+    ----------
+    layout_name : CytoLayouts, optional
+        layout algorithm supported by Cytoscape (name of the CyREST API, does not
+        necessarily match the name in the Cytoscape UI),
+        by default CYTO_LAYOUT_NAME
+    layout_properties : CytoLayoutProperties, optional
+        configuration of parameters for the given layout algorithm,
+        by default CYTO_LAYOUT_PROPERTIES
+    network_name : str, optional
+        network to apply the layout algorithm on, by default CYTO_BASE_NETWORK_NAME
+    &#34;&#34;&#34;
+    logger.debug(&#39;Applying layout to network...&#39;)
+    p4c.set_layout_properties(layout_name, layout_properties)
+    p4c.layout_network(layout_name=layout_name, network=network_name)
+    fit_content(network_name=network_name)
+    logger.debug(&#39;Layout application to network successful.&#39;)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: apply a supported layout algorithm to currently selected
+network</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>layout_name</code></strong> :&ensp;<code>CytoLayouts</code>, optional</dt>
+<dd>layout algorithm supported by Cytoscape (name of the CyREST API, does not
+necessarily match the name in the Cytoscape UI),
+by default CYTO_LAYOUT_NAME</dd>
+<dt><strong><code>layout_properties</code></strong> :&ensp;<code>CytoLayoutProperties</code>, optional</dt>
+<dd>configuration of parameters for the given layout algorithm,
+by default CYTO_LAYOUT_PROPERTIES</dd>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network to apply the layout algorithm on, by default CYTO_BASE_NETWORK_NAME</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.make_subnetwork"><code class="name flex">
+<span>def <span class="ident">make_subnetwork</span></span>(<span>index: int,<br>network_name: str = 'token_graph',<br>export_image: bool = True,<br>target_folder: pathlib.Path = WindowsPath('A:/Arbeitsaufgaben/lang-data/out')) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def make_subnetwork(
+    index: int,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+    export_image: bool = True,
+    target_folder: Path = SAVE_PATH_FOLDER,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: generate a new subnetwork based on the currently
+    selected nodes and edges
+
+    Parameters
+    ----------
+    index : int
+        id-like property to identify the subnetwork relative to its parent
+    network_name : str, optional
+        network to generate subnetwork from, by default CYTO_BASE_NETWORK_NAME
+    export_image : bool, optional
+        trigger image export of newly generated subnetwork, by default True
+    &#34;&#34;&#34;
+    logger.debug(&#39;Generating subnetwork with index %d...&#39;, index)
+    subnetwork_name = network_name + f&#39;_sub_{index+1}&#39;
+    p4c.create_subnetwork(
+        nodes=&#39;selected&#39;,
+        edges=&#39;selected&#39;,
+        subnetwork_name=subnetwork_name,
+        network=network_name,
+    )
+    p4c.set_current_network(subnetwork_name)
+
+    if export_image:
+        time.sleep(1)
+        export_network_to_image(
+            filename=subnetwork_name,
+            target_folder=target_folder,
+            network_name=subnetwork_name,
+        )
+
+    logger.debug(&#39;Generation of subnetwork with index %d successful.&#39;, index)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: generate a new subnetwork based on the currently
+selected nodes and edges</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>index</code></strong> :&ensp;<code>int</code></dt>
+<dd>id-like property to identify the subnetwork relative to its parent</dd>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network to generate subnetwork from, by default CYTO_BASE_NETWORK_NAME</dd>
+<dt><strong><code>export_image</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>trigger image export of newly generated subnetwork, by default True</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.reset_current_network_to_base"><code class="name flex">
+<span>def <span class="ident">reset_current_network_to_base</span></span>(<span>) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def reset_current_network_to_base() -&gt; None:
+    &#34;&#34;&#34;resets to currently selected network in Cytoscape back to the base one&#34;&#34;&#34;
+    p4c.set_current_network(CYTO_BASE_NETWORK_NAME)</code></pre>
+</details>
+<div class="desc"><p>resets to currently selected network in Cytoscape back to the base one</p></div>
+</dd>
+<dt id="lang_main.render.cytoscape.select_neighbours_of_node"><code class="name flex">
+<span>def <span class="ident">select_neighbours_of_node</span></span>(<span>node: int, neighbour_iter_depth: int = 2, network_name: str = 'token_graph') ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def select_neighbours_of_node(
+    node: CytoNodeID,
+    neighbour_iter_depth: int = CYTO_ITER_NEIGHBOUR_DEPTH,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+) -&gt; None:
+    &#34;&#34;&#34;Cytoscape: iterative selection of a node&#39;s neighbouring nodes and
+    their connecting edges
+
+    Parameters
+    ----------
+    node : CytoNodeID
+        node which neighbours should be selected
+    neighbour_iter_depth : int, optional
+        indicates how many levels of neighbours should be choosen, e.g. 1 --&gt; only
+        first-level neighbours are considered which are directly connected to the node,
+        2 --&gt; all nodes with iteration depth of 1 are chosen and additionally their
+        direct neighbours,
+        by default CYTO_ITER_NEIGHBOUR_DEPTH
+    network_name : str, optional
+        network to perform action on, by default CYTO_BASE_NETWORK_NAME
+    &#34;&#34;&#34;
+    logger.debug(&#39;Selecting node neighbours for %s...&#39;, node)
+    p4c.clear_selection(network=network_name)
+    p4c.select_nodes(node, network=network_name)
+
+    for _ in range(neighbour_iter_depth):
+        _ = p4c.select_first_neighbors(network=network_name)
+
+    _ = p4c.select_edges_connecting_selected_nodes()
+    logger.debug(&#39;Selection of node neighbours for %s successful.&#39;, node)</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: iterative selection of a node's neighbouring nodes and
+their connecting edges</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>node</code></strong> :&ensp;<code>CytoNodeID</code></dt>
+<dd>node which neighbours should be selected</dd>
+<dt><strong><code>neighbour_iter_depth</code></strong> :&ensp;<code>int</code>, optional</dt>
+<dd>indicates how many levels of neighbours should be choosen, e.g. 1 &ndash;&gt; only
+first-level neighbours are considered which are directly connected to the node,
+2 &ndash;&gt; all nodes with iteration depth of 1 are chosen and additionally their
+direct neighbours,
+by default CYTO_ITER_NEIGHBOUR_DEPTH</dd>
+<dt><strong><code>network_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>network to perform action on, by default CYTO_BASE_NETWORK_NAME</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.verify_connection"><code class="name flex">
+<span>def <span class="ident">verify_connection</span></span>(<span>) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def verify_connection() -&gt; None:
+    &#34;&#34;&#34;Cytoscape: checks if CyREST and Cytoscape versions are compatible nad
+    if Cytoscape API endpoint is reachable
+
+    Raises
+    ------
+    CyError
+        incompatible CyREST or Cytoscape versions
+    RequestException
+        API endpoint not reachable
+    &#34;&#34;&#34;
+    try:
+        p4c.cytoscape_ping()
+    except CyError as error:  # pragma: no cover
+        logger.error(&#39;[CyError] CyREST or Cytoscape version not supported.&#39;)
+        raise error
+    except RequestException as error:
+        logger.error(&#39;[CytoAPIConnection] Connection to CyREST API failed.&#39;)
+        raise error</code></pre>
+</details>
+<div class="desc"><p>Cytoscape: checks if CyREST and Cytoscape versions are compatible nad
+if Cytoscape API endpoint is reachable</p>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>CyError</code></dt>
+<dd>incompatible CyREST or Cytoscape versions</dd>
+<dt><code>RequestException</code></dt>
+<dd>API endpoint not reachable</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.verify_graph_render_size"><code class="name flex">
+<span>def <span class="ident">verify_graph_render_size</span></span>(<span>graph: networkx.classes.digraph.DiGraph | networkx.classes.graph.Graph,<br>max_node_count: int | None = 500,<br>max_edge_count: int | None = 800) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def verify_graph_render_size(
+    graph: Graph | DiGraph,
+    max_node_count: int | None = CYTO_MAX_NODE_COUNT,
+    max_edge_count: int | None = CYTO_MAX_EDGE_COUNT,
+) -&gt; None:
+    &#34;&#34;&#34;verify that the graph size can still be handled within an acceptable time
+    frame for rendering in Cytoscape
+
+    Parameters
+    ----------
+    graph : Graph | DiGraph
+        graph to verify
+    max_node_count : int | None, optional
+        maximum allowed number of nodes, by default CYTO_MAX_NODE_COUNT
+    max_edge_count : int | None, optional
+        maximum allowed number of edges, by default CYTO_MAX_EDGE_COUNT
+
+    Raises
+    ------
+    GraphRenderError
+        if any of the provided limits is exceeded
+    &#34;&#34;&#34;
+    num_nodes = len(graph.nodes)
+    num_edges = len(graph.edges)
+    if max_node_count is not None and num_nodes &gt; max_node_count:
+        raise GraphRenderError(
+            f&#39;Maximum number of nodes for rendering exceeded. &#39;
+            f&#39;Limit {max_node_count}, Counted: {num_nodes}&#39;
+        )
+
+    if max_edge_count is not None and num_edges &gt; max_edge_count:
+        raise GraphRenderError(
+            f&#39;Maximum number of edges for rendering exceeded. &#39;
+            f&#39;Limit {max_edge_count}, Counted: {num_edges}&#39;
+        )</code></pre>
+</details>
+<div class="desc"><p>verify that the graph size can still be handled within an acceptable time
+frame for rendering in Cytoscape</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>graph</code></strong> :&ensp;<code>Graph | DiGraph</code></dt>
+<dd>graph to verify</dd>
+<dt><strong><code>max_node_count</code></strong> :&ensp;<code>int | None</code>, optional</dt>
+<dd>maximum allowed number of nodes, by default CYTO_MAX_NODE_COUNT</dd>
+<dt><strong><code>max_edge_count</code></strong> :&ensp;<code>int | None</code>, optional</dt>
+<dd>maximum allowed number of edges, by default CYTO_MAX_EDGE_COUNT</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>GraphRenderError</code></dt>
+<dd>if any of the provided limits is exceeded</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.render.cytoscape.verify_table_property"><code class="name flex">
+<span>def <span class="ident">verify_table_property</span></span>(<span>property: str,<br>table_type: Literal['node', 'edge', 'network'] = 'node',<br>network_name: str = 'token_graph') ‑> bool</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def verify_table_property(
+    property: str,
+    table_type: Literal[&#39;node&#39;, &#39;edge&#39;, &#39;network&#39;] = &#39;node&#39;,
+    network_name: str = CYTO_BASE_NETWORK_NAME,
+) -&gt; bool:
+    table = p4c.get_table_columns(table=table_type, network=network_name)
+    logger.debug(&#39;Table &gt;&gt;%s&lt;&lt; wiht columns: %s&#39;, table, table.columns)
+
+    return property in table.columns</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.render" href="index.html">lang_main.render</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.render.cytoscape.analyse_network" href="#lang_main.render.cytoscape.analyse_network">analyse_network</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.apply_style_to_network" href="#lang_main.render.cytoscape.apply_style_to_network">apply_style_to_network</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.build_subnetworks" href="#lang_main.render.cytoscape.build_subnetworks">build_subnetworks</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.change_default_layout" href="#lang_main.render.cytoscape.change_default_layout">change_default_layout</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.export_network_to_image" href="#lang_main.render.cytoscape.export_network_to_image">export_network_to_image</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.fit_content" href="#lang_main.render.cytoscape.fit_content">fit_content</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.get_subgraph_node_selection" href="#lang_main.render.cytoscape.get_subgraph_node_selection">get_subgraph_node_selection</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.import_to_cytoscape" href="#lang_main.render.cytoscape.import_to_cytoscape">import_to_cytoscape</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.layout_network" href="#lang_main.render.cytoscape.layout_network">layout_network</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.make_subnetwork" href="#lang_main.render.cytoscape.make_subnetwork">make_subnetwork</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.reset_current_network_to_base" href="#lang_main.render.cytoscape.reset_current_network_to_base">reset_current_network_to_base</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.select_neighbours_of_node" href="#lang_main.render.cytoscape.select_neighbours_of_node">select_neighbours_of_node</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.verify_connection" href="#lang_main.render.cytoscape.verify_connection">verify_connection</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.verify_graph_render_size" href="#lang_main.render.cytoscape.verify_graph_render_size">verify_graph_render_size</a></code></li>
+<li><code><a title="lang_main.render.cytoscape.verify_table_property" href="#lang_main.render.cytoscape.verify_table_property">verify_table_property</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/render/cytoscape_monkeypatch.html b/docs/lang_main/render/cytoscape_monkeypatch.html
new file mode 100644
index 0000000..a7f4893
--- /dev/null
+++ b/docs/lang_main/render/cytoscape_monkeypatch.html
@@ -0,0 +1,182 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.render.cytoscape_monkeypatch API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.render.cytoscape_monkeypatch</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.render.cytoscape_monkeypatch.select_edges_connecting_selected_nodes"><code class="name flex">
+<span>def <span class="ident">select_edges_connecting_selected_nodes</span></span>(<span>network=None, base_url='http://127.0.0.1:1234/v1')</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@cy_log  # pragma: no cover
+def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL):  # noqa: F405 # pragma: no cover
+    &#34;&#34;&#34;Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.
+
+    Any edges selected beforehand are deselected before any new edges are selected
+
+    Args:
+        network (SUID or str or None): Name or SUID of a network. Default is the
+            &#34;current&#34; network active in Cytoscape.
+        base_url (str): Ignore unless you need to specify a custom domain,
+            port or version to connect to the CyREST API. Default is http://127.0.0.1:1234
+            and the latest version of the CyREST API supported by this version of py4cytoscape.
+
+    Returns:
+         dict: {&#39;nodes&#39;: [node list], &#39;edges&#39;: [edge list]} or None if no selected nodes
+    Raises:
+        CyError: if network name or SUID doesn&#39;t exist
+        requests.exceptions.RequestException: if can&#39;t connect to Cytoscape or Cytoscape returns an error
+
+    Examples:
+        &gt;&gt;&gt; select_edges_connecting_selected_nodes()
+        None
+        &gt;&gt;&gt; select_edges_connecting_selected_nodes(network=&#39;My Network&#39;)
+        {&#39;nodes&#39;: [103990, 103991, ...], &#39;edges&#39;: [104432, 104431, ...]}
+        &gt;&gt;&gt; select_edges_connecting_selected_nodes(network=52)
+        {&#39;nodes&#39;: [103990, 103991, ...], &#39;edges&#39;: [104432, 104431, ...]}
+
+    Note:
+        In the return value node list is list of all selected nodes, and
+        edge list is the SUIDs of selected edges -- dict is None if no nodes were selected or there were no newly
+        created edges
+    &#34;&#34;&#34;
+    net_suid = networks.get_network_suid(network, base_url=base_url)
+
+    selected_nodes = get_selected_nodes(network=net_suid, base_url=base_url)
+    # TODO: In R version, NA test is after len() test ... shouldn&#39;t it be before?
+    if not selected_nodes:
+        return None
+
+    all_edges = networks.get_all_edges(net_suid, base_url=base_url)
+
+    selected_sources = set()
+    selected_targets = set()
+    for n in selected_nodes:
+        n = re_parenthesis_1.sub(&#39;\(&#39;, n)  # type: ignore
+        n = re_parenthesis_2.sub(&#39;\)&#39;, n)  # type: ignore
+        selected_sources |= set(filter(re.compile(&#39;^&#39; + n).search, all_edges))  # type: ignore
+        selected_targets |= set(filter(re.compile(n + &#39;$&#39;).search, all_edges))  # type: ignore
+
+    selected_edges = list(selected_sources.intersection(selected_targets))
+
+    if len(selected_edges) == 0:
+        return None
+    res = select_edges(
+        selected_edges,
+        by_col=&#39;name&#39;,
+        preserve_current_selection=False,
+        network=net_suid,
+        base_url=base_url,
+    )
+    return res
+    # TODO: isn&#39;t the pattern match a bit cheesy ... shouldn&#39;t it be ^+n+&#39; (&#39;    and    &#39;) &#39;+n+$ ???</code></pre>
+</details>
+<div class="desc"><p>Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.</p>
+<p>Any edges selected beforehand are deselected before any new edges are selected</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>network</code></strong> :&ensp;<code>SUID</code> or <code>str</code> or <code>None</code></dt>
+<dd>Name or SUID of a network. Default is the
+"current" network active in Cytoscape.</dd>
+<dt><strong><code>base_url</code></strong> :&ensp;<code>str</code></dt>
+<dd>Ignore unless you need to specify a custom domain,
+port or version to connect to the CyREST API. Default is <a href="http://127.0.0.1:1234">http://127.0.0.1:1234</a>
+and the latest version of the CyREST API supported by this version of py4cytoscape.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>dict</code></dt>
+<dd>{'nodes': [node list], 'edges': [edge list]} or None if no selected nodes</dd>
+</dl>
+<h2 id="raises">Raises</h2>
+<dl>
+<dt><code>CyError</code></dt>
+<dd>if network name or SUID doesn't exist</dd>
+<dt><code>requests.exceptions.RequestException</code></dt>
+<dd>if can't connect to Cytoscape or Cytoscape returns an error</dd>
+</dl>
+<h2 id="examples">Examples</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; select_edges_connecting_selected_nodes()
+None
+&gt;&gt;&gt; select_edges_connecting_selected_nodes(network='My Network')
+{'nodes': [103990, 103991, ...], 'edges': [104432, 104431, ...]}
+&gt;&gt;&gt; select_edges_connecting_selected_nodes(network=52)
+{'nodes': [103990, 103991, ...], 'edges': [104432, 104431, ...]}
+</code></pre>
+<h2 id="note">Note</h2>
+<p>In the return value node list is list of all selected nodes, and
+edge list is the SUIDs of selected edges &ndash; dict is None if no nodes were selected or there were no newly
+created edges</p></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main.render" href="index.html">lang_main.render</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.render.cytoscape_monkeypatch.select_edges_connecting_selected_nodes" href="#lang_main.render.cytoscape_monkeypatch.select_edges_connecting_selected_nodes">select_edges_connecting_selected_nodes</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/render/index.html b/docs/lang_main/render/index.html
new file mode 100644
index 0000000..a3588dd
--- /dev/null
+++ b/docs/lang_main/render/index.html
@@ -0,0 +1,83 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.render API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.render</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+<h2 class="section-title" id="header-submodules">Sub-modules</h2>
+<dl>
+<dt><code class="name"><a title="lang_main.render.cytoscape" href="cytoscape.html">lang_main.render.cytoscape</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt><code class="name"><a title="lang_main.render.cytoscape_monkeypatch" href="cytoscape_monkeypatch.html">lang_main.render.cytoscape_monkeypatch</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="../index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-submodules">Sub-modules</a></h3>
+<ul>
+<li><code><a title="lang_main.render.cytoscape" href="cytoscape.html">lang_main.render.cytoscape</a></code></li>
+<li><code><a title="lang_main.render.cytoscape_monkeypatch" href="cytoscape_monkeypatch.html">lang_main.render.cytoscape_monkeypatch</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/search.html b/docs/lang_main/search.html
new file mode 100644
index 0000000..f2f18ce
--- /dev/null
+++ b/docs/lang_main/search.html
@@ -0,0 +1,261 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.search API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.search</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="lang_main.search.search_base_path"><code class="name flex">
+<span>def <span class="ident">search_base_path</span></span>(<span>starting_path: pathlib.Path, stop_folder_name: str | None = None) ‑> pathlib.Path | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def search_base_path(
+    starting_path: Path,
+    stop_folder_name: str | None = None,
+) -&gt; Path | None:
+    &#34;&#34;&#34;Iteratively searches the parent directories of the starting path
+    and look for folders matching the given name. If a match is encountered,
+    the parent path will be returned.
+
+    Example:
+    starting_path = path/to/start/folder
+    stop_folder_name = &#39;to&#39;
+    returned path = &#39;path/&#39;
+
+    Parameters
+    ----------
+    starting_path : Path
+        non-inclusive starting path
+    stop_folder_name : str, optional
+        name of the last folder in the directory tree to search, by default None
+
+    Returns
+    -------
+    Path | None
+        Path if corresponding base path was found, None otherwise
+    &#34;&#34;&#34;
+    stop_folder_path: Path | None = None
+    base_path: Path | None = None
+    for search_path in starting_path.parents:
+        if stop_folder_name is not None and search_path.name == stop_folder_name:
+            # library is placed inside a whole python installation for deployment
+            # only look up to this folder
+            stop_folder_path = search_path
+            break
+
+    if stop_folder_path is not None:
+        base_path = stop_folder_path.parent
+
+    return base_path</code></pre>
+</details>
+<div class="desc"><p>Iteratively searches the parent directories of the starting path
+and look for folders matching the given name. If a match is encountered,
+the parent path will be returned.</p>
+<p>Example:
+starting_path = path/to/start/folder
+stop_folder_name = 'to'
+returned path = 'path/'</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>starting_path</code></strong> :&ensp;<code>Path</code></dt>
+<dd>non-inclusive starting path</dd>
+<dt><strong><code>stop_folder_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>name of the last folder in the directory tree to search, by default None</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Path | None</code></dt>
+<dd>Path if corresponding base path was found, None otherwise</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.search.search_cwd"><code class="name flex">
+<span>def <span class="ident">search_cwd</span></span>(<span>glob_pattern: str) ‑> pathlib.Path | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def search_cwd(
+    glob_pattern: str,
+) -&gt; Path | None:
+    &#34;&#34;&#34;Searches the current working directory and looks for files
+    matching the glob pattern.
+    Returns the first match encountered.
+
+    Parameters
+    ----------
+    glob_pattern : str, optional
+        pattern to look for, first match will be returned
+
+    Returns
+    -------
+    Path | None
+        Path if corresponding object was found, None otherwise
+    &#34;&#34;&#34;
+    path_found: Path | None = None
+    res = tuple(Path.cwd().glob(glob_pattern))
+    if res:
+        path_found = res[0]
+
+    return path_found</code></pre>
+</details>
+<div class="desc"><p>Searches the current working directory and looks for files
+matching the glob pattern.
+Returns the first match encountered.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>glob_pattern</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>pattern to look for, first match will be returned</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Path | None</code></dt>
+<dd>Path if corresponding object was found, None otherwise</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.search.search_iterative"><code class="name flex">
+<span>def <span class="ident">search_iterative</span></span>(<span>starting_path: pathlib.Path,<br>glob_pattern: str,<br>stop_folder_name: str | None = None) ‑> pathlib.Path | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def search_iterative(
+    starting_path: Path,
+    glob_pattern: str,
+    stop_folder_name: str | None = None,
+) -&gt; Path | None:
+    &#34;&#34;&#34;Iteratively searches the parent directories of the starting path
+    and look for files matching the glob pattern. The starting path is not
+    searched, only its parents. Therefore the starting path can also point
+    to a file. The folder in which it is placed in will be searched.
+    Returns the first match encountered.
+    The parent of the stop folder will be searched if it exists.
+
+    Parameters
+    ----------
+    starting_path : Path
+        non-inclusive starting path
+    glob_pattern : str, optional
+        pattern to look for, first match will be returned
+    stop_folder_name : str, optional
+        name of the last folder in the directory tree to search, by default None
+
+    Returns
+    -------
+    Path | None
+        Path if corresponding object was found, None otherwise
+    &#34;&#34;&#34;
+    file_path: Path | None = None
+    stop_folder_reached: bool = False
+    for search_path in starting_path.parents:
+        res = tuple(search_path.glob(glob_pattern))
+        if res:
+            file_path = res[0]
+            break
+        elif stop_folder_reached:
+            break
+
+        if stop_folder_name is not None and search_path.name == stop_folder_name:
+            # library is placed inside a whole python installation for deployment
+            # if this folder is reached, only look up one parent above
+            stop_folder_reached = True
+
+    return file_path</code></pre>
+</details>
+<div class="desc"><p>Iteratively searches the parent directories of the starting path
+and look for files matching the glob pattern. The starting path is not
+searched, only its parents. Therefore the starting path can also point
+to a file. The folder in which it is placed in will be searched.
+Returns the first match encountered.
+The parent of the stop folder will be searched if it exists.</p>
+<h2 id="parameters">Parameters</h2>
+<dl>
+<dt><strong><code>starting_path</code></strong> :&ensp;<code>Path</code></dt>
+<dd>non-inclusive starting path</dd>
+<dt><strong><code>glob_pattern</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>pattern to look for, first match will be returned</dd>
+<dt><strong><code>stop_folder_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>name of the last folder in the directory tree to search, by default None</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Path | None</code></dt>
+<dd>Path if corresponding object was found, None otherwise</dd>
+</dl></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="lang_main.search.search_base_path" href="#lang_main.search.search_base_path">search_base_path</a></code></li>
+<li><code><a title="lang_main.search.search_cwd" href="#lang_main.search.search_cwd">search_cwd</a></code></li>
+<li><code><a title="lang_main.search.search_iterative" href="#lang_main.search.search_iterative">search_iterative</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/lang_main/types.html b/docs/lang_main/types.html
new file mode 100644
index 0000000..6de1573
--- /dev/null
+++ b/docs/lang_main/types.html
@@ -0,0 +1,10637 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.5">
+<title>lang_main.types API documentation</title>
+<meta name="description" content="">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>lang_main.types</code></h1>
+</header>
+<section id="section-intro">
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="lang_main.types.Doc"><code class="flex name class">
+<span>class <span class="ident">SpacyDoc</span></span>
+<span>(</span><span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc(Vocab vocab, words=None, spaces=None, user_data=None, *, tags=None, pos=None, morphs=None, lemmas=None, heads=None, deps=None, sent_starts=None, ents=None)
+A sequence of Token objects. Access sentences and named entities, export
+annotations to numpy arrays, losslessly serialize to compressed binary
+strings. The <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code> object holds an array of <code>TokenC</code> structs. The
+Python-level <code><a title="lang_main.types.Token" href="#lang_main.types.Token">Token</a></code> and <code>Span</code> objects are views of this array, i.e.
+they don't own the data themselves.</p>
+<pre><code>EXAMPLE:
+    Construction 1
+    &gt;&gt;&gt; doc = nlp(u'Some text')
+
+    Construction 2
+    &gt;&gt;&gt; from spacy.tokens import Doc
+    &gt;&gt;&gt; doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
+
+DOCS: &lt;https://spacy.io/api/doc&gt;
+</code></pre>
+<p>Create a Doc object.</p>
+<p>vocab (Vocab): A vocabulary object, which must match any models you
+want to use (e.g. tokenizer, parser, entity recognizer).
+words (Optional[List[Union[str, int]]]): A list of unicode strings or
+hash values to add to the document as words. If <code>None</code>, defaults to
+empty list.
+spaces (Optional[List[bool]]): A list of boolean values, of the same
+length as <code>words</code>. <code>True</code> means that the word is followed by a space,
+<code>False</code> means it is not. If <code>None</code>, defaults to <code>[True]*len(words)</code>
+user_data (dict or None): Optional extra data to attach to the Doc.
+tags (Optional[List[str]]): A list of unicode strings, of the same
+length as words, to assign as token.tag. Defaults to None.
+pos (Optional[List[str]]): A list of unicode strings, of the same
+length as words, to assign as token.pos. Defaults to None.
+morphs (Optional[List[str]]): A list of unicode strings, of the same
+length as words, to assign as token.morph. Defaults to None.
+lemmas (Optional[List[str]]): A list of unicode strings, of the same
+length as words, to assign as token.lemma. Defaults to None.
+heads (Optional[List[int]]): A list of values, of the same length as
+words, to assign as heads. Head indices are the position of the
+head in the doc. Defaults to None.
+deps (Optional[List[str]]): A list of unicode strings, of the same
+length as words, to assign as token.dep. Defaults to None.
+sent_starts (Optional[List[Union[bool, int, None]]]): A list of values,
+of the same length as words, to assign as token.is_sent_start. Will
+be overridden by heads if heads is provided. Defaults to None.
+ents (Optional[List[str]]): A list of unicode strings, of the same
+length as words, as IOB tags to assign as token.ent_iob and
+token.ent_type. Defaults to None.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#init">https://spacy.io/api/doc#init</a></p></div>
+<h3>Static methods</h3>
+<dl>
+<dt id="lang_main.types.Doc.from_docs"><code class="name flex">
+<span>def <span class="ident">from_docs</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.from_docs(docs, ensure_whitespace=True, attrs=None, *, exclude=tuple())
+Concatenate multiple Doc objects to form a new one. Raises an error
+if the <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code> objects do not all share the same <code>Vocab</code>.</p>
+<pre><code>    docs (list): A list of Doc objects.
+    ensure_whitespace (bool): Insert a space between two adjacent docs
+        whenever the first doc does not end in whitespace.
+    attrs (list): Optional list of attribute ID ints or attribute name
+        strings.
+    exclude (Iterable[str]): Doc attributes to exclude. Supported
+        attributes: &lt;code&gt;spans&lt;/code&gt;, &lt;code&gt;tensor&lt;/code&gt;, &lt;code&gt;user\_data&lt;/code&gt;.
+    RETURNS (Doc): A doc that contains the concatenated docs, or None if no
+        docs were given.
+
+    DOCS: &lt;https://spacy.io/api/doc#from_docs&gt;
+</code></pre></div>
+</dd>
+</dl>
+<h3>Instance variables</h3>
+<dl>
+<dt id="lang_main.types.Doc.cats"><code class="name">var <span class="ident">cats</span></code></dt>
+<dd>
+<div class="desc"><p>cats: object</p></div>
+</dd>
+<dt id="lang_main.types.Doc.doc"><code class="name">var <span class="ident">doc</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.ents"><code class="name">var <span class="ident">ents</span></code></dt>
+<dd>
+<div class="desc"><p>The named entities in the document. Returns a tuple of named entity
+<code>Span</code> objects, if the entity recognizer has been applied.</p>
+<p>RETURNS (tuple): Entities in the document, one <code>Span</code> per entity.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#ents">https://spacy.io/api/doc#ents</a></p></div>
+</dd>
+<dt id="lang_main.types.Doc.has_unknown_spaces"><code class="name">var <span class="ident">has_unknown_spaces</span></code></dt>
+<dd>
+<div class="desc"><p>has_unknown_spaces: 'bool'</p></div>
+</dd>
+<dt id="lang_main.types.Doc.has_vector"><code class="name">var <span class="ident">has_vector</span></code></dt>
+<dd>
+<div class="desc"><p>A boolean value indicating whether a word vector is associated with
+the object.</p>
+<p>RETURNS (bool): Whether a word vector is associated with the object.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#has_vector">https://spacy.io/api/doc#has_vector</a></p></div>
+</dd>
+<dt id="lang_main.types.Doc.is_nered"><code class="name">var <span class="ident">is_nered</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.is_parsed"><code class="name">var <span class="ident">is_parsed</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.is_sentenced"><code class="name">var <span class="ident">is_sentenced</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.is_tagged"><code class="name">var <span class="ident">is_tagged</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.lang"><code class="name">var <span class="ident">lang</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the language of the doc's vocabulary.</p></div>
+</dd>
+<dt id="lang_main.types.Doc.lang_"><code class="name">var <span class="ident">lang_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Language of the doc's vocabulary, e.g. 'en'.</p></div>
+</dd>
+<dt id="lang_main.types.Doc.mem"><code class="name">var <span class="ident">mem</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.noun_chunks"><code class="name">var <span class="ident">noun_chunks</span></code></dt>
+<dd>
+<div class="desc"><p>Iterate over the base noun phrases in the document. Yields base
+noun-phrase #[code Span] objects, if the language has a noun chunk iterator.
+Raises a NotImplementedError otherwise.</p>
+<p>A base noun phrase, or "NP chunk", is a noun
+phrase that does not permit other NPs to be nested within it – so no
+NP-level coordination, no prepositional phrases, and no relative
+clauses.</p>
+<p>YIELDS (Span): Noun chunks in the document.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#noun_chunks">https://spacy.io/api/doc#noun_chunks</a></p></div>
+</dd>
+<dt id="lang_main.types.Doc.noun_chunks_iterator"><code class="name">var <span class="ident">noun_chunks_iterator</span></code></dt>
+<dd>
+<div class="desc"><p>noun_chunks_iterator: object</p></div>
+</dd>
+<dt id="lang_main.types.Doc.sentiment"><code class="name">var <span class="ident">sentiment</span></code></dt>
+<dd>
+<div class="desc"><p>sentiment: 'float'</p></div>
+</dd>
+<dt id="lang_main.types.Doc.sents"><code class="name">var <span class="ident">sents</span></code></dt>
+<dd>
+<div class="desc"><p>Iterate over the sentences in the document. Yields sentence <code>Span</code>
+objects. Sentence spans have no label.</p>
+<p>YIELDS (Span): Sentences in the document.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#sents">https://spacy.io/api/doc#sents</a></p></div>
+</dd>
+<dt id="lang_main.types.Doc.spans"><code class="name">var <span class="ident">spans</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Doc.tensor"><code class="name">var <span class="ident">tensor</span></code></dt>
+<dd>
+<div class="desc"><p>tensor: object</p></div>
+</dd>
+<dt id="lang_main.types.Doc.text"><code class="name">var <span class="ident">text</span></code></dt>
+<dd>
+<div class="desc"><p>A unicode representation of the document text.</p>
+<p>RETURNS (str): The original verbatim text of the document.</p></div>
+</dd>
+<dt id="lang_main.types.Doc.text_with_ws"><code class="name">var <span class="ident">text_with_ws</span></code></dt>
+<dd>
+<div class="desc"><p>An alias of <code><a title="lang_main.types.Doc.text" href="#lang_main.types.Doc.text">Doc.text</a></code>, provided for duck-type compatibility with
+<code>Span</code> and <code><a title="lang_main.types.Token" href="#lang_main.types.Token">Token</a></code>.</p>
+<p>RETURNS (str): The original verbatim text of the document.</p></div>
+</dd>
+<dt id="lang_main.types.Doc.user_data"><code class="name">var <span class="ident">user_data</span></code></dt>
+<dd>
+<div class="desc"><p>user_data: object</p></div>
+</dd>
+<dt id="lang_main.types.Doc.user_hooks"><code class="name">var <span class="ident">user_hooks</span></code></dt>
+<dd>
+<div class="desc"><p>user_hooks: dict</p></div>
+</dd>
+<dt id="lang_main.types.Doc.user_span_hooks"><code class="name">var <span class="ident">user_span_hooks</span></code></dt>
+<dd>
+<div class="desc"><p>user_span_hooks: dict</p></div>
+</dd>
+<dt id="lang_main.types.Doc.user_token_hooks"><code class="name">var <span class="ident">user_token_hooks</span></code></dt>
+<dd>
+<div class="desc"><p>user_token_hooks: dict</p></div>
+</dd>
+<dt id="lang_main.types.Doc.vector"><code class="name">var <span class="ident">vector</span></code></dt>
+<dd>
+<div class="desc"><p>A real-valued meaning representation. Defaults to an average of the
+token vectors.</p>
+<p>RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
+representing the document's semantics.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#vector">https://spacy.io/api/doc#vector</a></p></div>
+</dd>
+<dt id="lang_main.types.Doc.vector_norm"><code class="name">var <span class="ident">vector_norm</span></code></dt>
+<dd>
+<div class="desc"><p>The L2 norm of the document's vector representation.</p>
+<p>RETURNS (float): The L2 norm of the vector representation.</p>
+<p>DOCS: <a href="https://spacy.io/api/doc#vector_norm">https://spacy.io/api/doc#vector_norm</a></p></div>
+</dd>
+<dt id="lang_main.types.Doc.vocab"><code class="name">var <span class="ident">vocab</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.types.Doc.char_span"><code class="name flex">
+<span>def <span class="ident">char_span</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode='strict', span_id=0)
+Create a <code>Span</code> object from the slice
+<code>doc.text[start_idx : end_idx]</code>. Returns None if no valid <code>Span</code> can be
+created.</p>
+<pre><code>    doc (Doc): The parent document.
+    start_idx (int): The index of the first character of the span.
+    end_idx (int): The index of the first character after the span.
+    label (Union[int, str]): A label to attach to the Span, e.g. for
+        named entities.
+    kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a
+        named entity.
+    vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
+        the span.
+    alignment_mode (str): How character indices are aligned to token
+        boundaries. Options: "strict" (character indices must be aligned
+        with token boundaries), "contract" (span of all tokens completely
+        within the character span), "expand" (span of all tokens at least
+        partially covered by the character span). Defaults to "strict".
+    span_id (Union[int, str]): An identifier to associate with the span.
+    RETURNS (Span): The newly constructed object.
+
+    DOCS: &lt;https://spacy.io/api/doc#char_span&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.copy"><code class="name flex">
+<span>def <span class="ident">copy</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.copy(self)</p></div>
+</dd>
+<dt id="lang_main.types.Doc.count_by"><code class="name flex">
+<span>def <span class="ident">count_by</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.count_by(self, attr_id_t attr_id, exclude=None, counts=None)
+Count the frequencies of a given attribute. Produces a dict of
+<code>{attribute (int): count (ints)}</code> frequencies, keyed by the values of
+the given attribute ID.</p>
+<pre><code>    attr_id (int): The attribute ID to key the counts.
+    RETURNS (dict): A dictionary mapping attributes to integer counts.
+
+    DOCS: &lt;https://spacy.io/api/doc#count_by&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.extend_tensor"><code class="name flex">
+<span>def <span class="ident">extend_tensor</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.extend_tensor(self, tensor)
+Concatenate a new tensor onto the doc.tensor object.</p>
+<pre><code>    The doc.tensor attribute holds dense feature vectors
+    computed by the models in the pipeline. Let's say a
+    document with 30 words has a tensor with 128 dimensions
+    per word. doc.tensor.shape will be (30, 128). After
+    calling doc.extend_tensor with an array of shape (30, 64),
+    doc.tensor == (30, 192).
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.from_array"><code class="name flex">
+<span>def <span class="ident">from_array</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.from_array(self, attrs, array)
+Load attributes from a numpy array. Write to a <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code> object, from an
+<code>(M, N)</code> array of attributes.</p>
+<pre><code>    attrs (list) A list of attribute ID ints.
+    array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
+    RETURNS (Doc): Itself.
+
+    DOCS: &lt;https://spacy.io/api/doc#from_array&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.from_bytes"><code class="name flex">
+<span>def <span class="ident">from_bytes</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.from_bytes(self, bytes_data, *, exclude=tuple())
+Deserialize, i.e. import the document contents from a binary string.</p>
+<pre><code>    data (bytes): The string to load from.
+    exclude (Iterable[str]): String names of serialization fields to exclude.
+    RETURNS (Doc): Itself.
+
+    DOCS: &lt;https://spacy.io/api/doc#from_bytes&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.from_dict"><code class="name flex">
+<span>def <span class="ident">from_dict</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.from_dict(self, msg, *, exclude=tuple())
+Deserialize the document contents from a dictionary representation.</p>
+<pre><code>    msg (Dict[str, Any]): The dictionary to load from.
+    exclude (Iterable[str]): String names of serialization fields to exclude.
+    RETURNS (Doc): Itself.
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.from_disk"><code class="name flex">
+<span>def <span class="ident">from_disk</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.from_disk(self, path, *, exclude=tuple())
+Loads state from a directory. Modifies the object in place and
+returns it.</p>
+<pre><code>    path (str / Path): A path to a directory. Paths may be either
+        strings or &lt;code&gt;Path&lt;/code&gt;-like objects.
+    exclude (Iterable[str]): String names of serialization fields to exclude.
+    RETURNS (Doc): The modified &lt;code&gt;&lt;a title="lang_main.types.Doc" href="#lang_main.types.Doc"&gt;Doc&lt;/a&gt;&lt;/code&gt; object.
+
+    DOCS: &lt;https://spacy.io/api/doc#from_disk&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.from_json"><code class="name flex">
+<span>def <span class="ident">from_json</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.from_json(self, doc_json, *, validate=False)
+Convert a JSON document generated by Doc.to_json() to a Doc.</p>
+<pre><code>    doc_json (Dict): JSON representation of doc object to load.
+    validate (bool): Whether to validate &lt;code&gt;doc\_json&lt;/code&gt; against the expected schema.
+        Defaults to False.
+    RETURNS (Doc): A doc instance corresponding to the specified JSON representation.
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.get_extension"><code class="name flex">
+<span>def <span class="ident">get_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.get_extension(type cls, name)
+Look up a previously registered extension by name.</p>
+<pre><code>    name (str): Name of the extension.
+    RETURNS (tuple): A &lt;code&gt;(default, method, getter, setter)&lt;/code&gt; tuple.
+
+    DOCS: &lt;https://spacy.io/api/doc#get_extension&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.get_lca_matrix"><code class="name flex">
+<span>def <span class="ident">get_lca_matrix</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.get_lca_matrix(self)
+Calculates a matrix of Lowest Common Ancestors (LCA) for a given
+<code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code>, where LCA[i, j] is the index of the lowest common ancestor among
+token i and j.</p>
+<pre><code>    RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
+        (n, n), where n = len(self).
+
+    DOCS: &lt;https://spacy.io/api/doc#get_lca_matrix&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.has_annotation"><code class="name flex">
+<span>def <span class="ident">has_annotation</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.has_annotation(self, attr, *, require_complete=False)
+Check whether the doc contains annotation on a token attribute.</p>
+<pre><code>    attr (Union[int, str]): The attribute string name or int ID.
+    require_complete (bool): Whether to check that the attribute is set on
+        every token in the doc.
+    RETURNS (bool): Whether annotation is present.
+
+    DOCS: &lt;https://spacy.io/api/doc#has_annotation&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.has_extension"><code class="name flex">
+<span>def <span class="ident">has_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.has_extension(type cls, name)
+Check whether an extension has been registered.</p>
+<pre><code>    name (str): Name of the extension.
+    RETURNS (bool): Whether the extension has been registered.
+
+    DOCS: &lt;https://spacy.io/api/doc#has_extension&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.remove_extension"><code class="name flex">
+<span>def <span class="ident">remove_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.remove_extension(type cls, name)
+Remove a previously registered extension.</p>
+<pre><code>    name (str): Name of the extension.
+    RETURNS (tuple): A &lt;code&gt;(default, method, getter, setter)&lt;/code&gt; tuple of the
+        removed extension.
+
+    DOCS: &lt;https://spacy.io/api/doc#remove_extension&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.retokenize"><code class="name flex">
+<span>def <span class="ident">retokenize</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.retokenize(self)
+Context manager to handle retokenization of the Doc.
+Modifications to the Doc's tokenization are stored, and then
+made all at once when the context manager exits. This is
+much more efficient, and less error-prone.</p>
+<pre><code>    All views of the Doc (Span and Token) created before the
+    retokenization are invalidated, although they may accidentally
+    continue to work.
+
+    DOCS: &lt;https://spacy.io/api/doc#retokenize&gt;
+    USAGE: &lt;https://spacy.io/usage/linguistic-features#retokenization&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.set_ents"><code class="name flex">
+<span>def <span class="ident">set_ents</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.set_ents(self, entities, *, blocked=None, missing=None, outside=None, default=SetEntsDefault.outside)
+Set entity annotation.</p>
+<pre><code>    entities (List[Span]): Spans with labels to set as entities.
+    blocked (Optional[List[Span]]): Spans to set as 'blocked' (never an
+        entity) for spacy's built-in NER component. Other components may
+        ignore this setting.
+    missing (Optional[List[Span]]): Spans with missing/unknown entity
+        information.
+    outside (Optional[List[Span]]): Spans outside of entities (O in IOB).
+    default (str): How to set entity annotation for tokens outside of any
+        provided spans. Options: "blocked", "missing", "outside" and
+        "unmodified" (preserve current state). Defaults to "outside".
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.set_extension"><code class="name flex">
+<span>def <span class="ident">set_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.set_extension(type cls, name, **kwargs)
+Define a custom attribute which becomes available as <code>Doc._</code>.</p>
+<pre><code>    name (str): Name of the attribute to set.
+    default: Optional default value of the attribute.
+    getter (callable): Optional getter function.
+    setter (callable): Optional setter function.
+    method (callable): Optional method for method extension.
+    force (bool): Force overwriting existing attribute.
+
+    DOCS: &lt;https://spacy.io/api/doc#set_extension&gt;
+    USAGE: &lt;https://spacy.io/usage/processing-pipelines#custom-components-attributes&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.similarity"><code class="name flex">
+<span>def <span class="ident">similarity</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.similarity(self, other)
+Make a semantic similarity estimate. The default estimate is cosine
+similarity using an average of word vectors.</p>
+<pre><code>    other (object): The object to compare with. By default, accepts &lt;code&gt;&lt;a title="lang_main.types.Doc" href="#lang_main.types.Doc"&gt;Doc&lt;/a&gt;&lt;/code&gt;,
+        &lt;code&gt;Span&lt;/code&gt;, &lt;code&gt;&lt;a title="lang_main.types.Token" href="#lang_main.types.Token"&gt;Token&lt;/a&gt;&lt;/code&gt; and &lt;code&gt;Lexeme&lt;/code&gt; objects.
+    RETURNS (float): A scalar similarity score. Higher is more similar.
+
+    DOCS: &lt;https://spacy.io/api/doc#similarity&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.to_array"><code class="name flex">
+<span>def <span class="ident">to_array</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.to_array(self, py_attr_ids) -&gt; ndarray
+Export given token attributes to a numpy <code>ndarray</code>.
+If <code>attr_ids</code> is a sequence of M attributes, the output array will be
+of shape <code>(N, M)</code>, where N is the length of the <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code> (in tokens). If
+<code>attr_ids</code> is a single attribute, the output shape will be (N,). You
+can specify attributes by integer ID (e.g. spacy.attrs.LEMMA) or
+string name (e.g. 'LEMMA' or 'lemma').</p>
+<pre><code>    py_attr_ids (list[]): A list of attributes (int IDs or string names).
+    RETURNS (numpy.ndarray[long, ndim=2]): A feature matrix, with one row
+        per word, and one column per attribute indicated in the input
+        &lt;code&gt;attr\_ids&lt;/code&gt;.
+
+    EXAMPLE:
+        &gt;&gt;&gt; from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA
+        &gt;&gt;&gt; doc = nlp(text)
+        &gt;&gt;&gt; # All strings mapped to integers, for easy export to numpy
+        &gt;&gt;&gt; np_array = doc.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.to_bytes"><code class="name flex">
+<span>def <span class="ident">to_bytes</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.to_bytes(self, *, exclude=tuple())
+Serialize, i.e. export the document contents to a binary string.</p>
+<pre><code>    exclude (Iterable[str]): String names of serialization fields to exclude.
+    RETURNS (bytes): A losslessly serialized copy of the &lt;code&gt;&lt;a title="lang_main.types.Doc" href="#lang_main.types.Doc"&gt;Doc&lt;/a&gt;&lt;/code&gt;, including
+        all annotations.
+
+    DOCS: &lt;https://spacy.io/api/doc#to_bytes&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.to_dict"><code class="name flex">
+<span>def <span class="ident">to_dict</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.to_dict(self, *, exclude=tuple())
+Export the document contents to a dictionary for serialization.</p>
+<pre><code>    exclude (Iterable[str]): String names of serialization fields to exclude.
+    RETURNS (Dict[str, Any]): A dictionary representation of the &lt;code&gt;&lt;a title="lang_main.types.Doc" href="#lang_main.types.Doc"&gt;Doc&lt;/a&gt;&lt;/code&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.to_disk"><code class="name flex">
+<span>def <span class="ident">to_disk</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.to_disk(self, path, *, exclude=tuple())
+Save the current state to a directory.</p>
+<pre><code>    path (str / Path): A path to a directory, which will be created if
+        it doesn't exist. Paths may be either strings or Path-like objects.
+    exclude (Iterable[str]): String names of serialization fields to exclude.
+
+    DOCS: &lt;https://spacy.io/api/doc#to_disk&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.to_json"><code class="name flex">
+<span>def <span class="ident">to_json</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.to_json(self, underscore=None)
+Convert a Doc to JSON.</p>
+<pre><code>    underscore (list): Optional list of string names of custom doc._.
+    attributes. Attribute values need to be JSON-serializable. Values will
+    be added to an "_" key in the data, e.g. "_": {"foo": "bar"}.
+    RETURNS (dict): The data in JSON format.
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Doc.to_utf8_array"><code class="name flex">
+<span>def <span class="ident">to_utf8_array</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Doc.to_utf8_array(self, int nr_char=-1)
+Encode word strings to utf8, and export to a fixed-width array
+of characters. Characters are placed into the array in the order:
+0, -1, 1, -2, etc
+For example, if the array is sliced array[:, :8], the array will
+contain the first 4 characters and last 4 characters of each word &mdash;
+with the middle characters clipped out. The value 255 is used as a pad
+value.</p></div>
+</dd>
+</dl>
+</dd>
+<dt id="lang_main.types.Language"><code class="flex name class">
+<span>class <span class="ident">SpacyModel</span></span>
+<span>(</span><span>vocab: spacy.vocab.Vocab | bool = True,<br>*,<br>max_length: int = 1000000,<br>meta: Dict[str, Any] = {},<br>create_tokenizer: Callable[[ForwardRef('<a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a>')], Callable[[str], spacy.tokens.doc.Doc]] | None = None,<br>create_vectors: Callable[[ForwardRef('Vocab')], spacy.vectors.BaseVectors] | None = None,<br>batch_size: int = 1000,<br>**kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class Language:
+    &#34;&#34;&#34;A text-processing pipeline. Usually you&#39;ll load this once per process,
+    and pass the instance around your application.
+
+    Defaults (class): Settings, data and factory methods for creating the `nlp`
+        object and processing pipeline.
+    lang (str): IETF language code, such as &#39;en&#39;.
+
+    DOCS: https://spacy.io/api/language
+    &#34;&#34;&#34;
+
+    Defaults = BaseDefaults
+    lang: Optional[str] = None
+    default_config = DEFAULT_CONFIG
+
+    factories = SimpleFrozenDict(error=Errors.E957)
+    _factory_meta: Dict[str, &#34;FactoryMeta&#34;] = {}  # meta by factory
+
+    def __init__(
+        self,
+        vocab: Union[Vocab, bool] = True,
+        *,
+        max_length: int = 10**6,
+        meta: Dict[str, Any] = {},
+        create_tokenizer: Optional[Callable[[&#34;Language&#34;], Callable[[str], Doc]]] = None,
+        create_vectors: Optional[Callable[[&#34;Vocab&#34;], BaseVectors]] = None,
+        batch_size: int = 1000,
+        **kwargs,
+    ) -&gt; None:
+        &#34;&#34;&#34;Initialise a Language object.
+
+        vocab (Vocab): A `Vocab` object. If `True`, a vocab is created.
+        meta (dict): Custom meta data for the Language class. Is written to by
+            models to add model meta data.
+        max_length (int): Maximum number of characters in a single text. The
+            current models may run out memory on extremely long texts, due to
+            large internal allocations. You should segment these texts into
+            meaningful units, e.g. paragraphs, subsections etc, before passing
+            them to spaCy. Default maximum length is 1,000,000 charas (1mb). As
+            a rule of thumb, if all pipeline components are enabled, spaCy&#39;s
+            default models currently requires roughly 1GB of temporary memory per
+            100,000 characters in one text.
+        create_tokenizer (Callable): Function that takes the nlp object and
+            returns a tokenizer.
+        batch_size (int): Default batch size for pipe and evaluate.
+
+        DOCS: https://spacy.io/api/language#init
+        &#34;&#34;&#34;
+        # We&#39;re only calling this to import all factories provided via entry
+        # points. The factory decorator applied to these functions takes care
+        # of the rest.
+        util.registry._entry_point_factories.get_all()
+
+        self._config = DEFAULT_CONFIG.merge(self.default_config)
+        self._meta = dict(meta)
+        self._path = None
+        self._optimizer: Optional[Optimizer] = None
+        # Component meta and configs are only needed on the instance
+        self._pipe_meta: Dict[str, &#34;FactoryMeta&#34;] = {}  # meta by component
+        self._pipe_configs: Dict[str, Config] = {}  # config by component
+
+        if not isinstance(vocab, Vocab) and vocab is not True:
+            raise ValueError(Errors.E918.format(vocab=vocab, vocab_type=type(Vocab)))
+        if vocab is True:
+            vectors_name = meta.get(&#34;vectors&#34;, {}).get(&#34;name&#34;)
+            vocab = create_vocab(self.lang, self.Defaults, vectors_name=vectors_name)
+            if not create_vectors:
+                vectors_cfg = {&#34;vectors&#34;: self._config[&#34;nlp&#34;][&#34;vectors&#34;]}
+                create_vectors = registry.resolve(vectors_cfg)[&#34;vectors&#34;]
+            vocab.vectors = create_vectors(vocab)
+        else:
+            if (self.lang and vocab.lang) and (self.lang != vocab.lang):
+                raise ValueError(Errors.E150.format(nlp=self.lang, vocab=vocab.lang))
+        self.vocab: Vocab = vocab
+        if self.lang is None:
+            self.lang = self.vocab.lang
+        self._components: List[Tuple[str, PipeCallable]] = []
+        self._disabled: Set[str] = set()
+        self.max_length = max_length
+        # Create the default tokenizer from the default config
+        if not create_tokenizer:
+            tokenizer_cfg = {&#34;tokenizer&#34;: self._config[&#34;nlp&#34;][&#34;tokenizer&#34;]}
+            create_tokenizer = registry.resolve(tokenizer_cfg)[&#34;tokenizer&#34;]
+        self.tokenizer = create_tokenizer(self)
+        self.batch_size = batch_size
+        self.default_error_handler = raise_error
+
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        cls.default_config = DEFAULT_CONFIG.merge(cls.Defaults.config)
+        cls.default_config[&#34;nlp&#34;][&#34;lang&#34;] = cls.lang
+
+    @property
+    def path(self):
+        return self._path
+
+    @property
+    def meta(self) -&gt; Dict[str, Any]:
+        &#34;&#34;&#34;Custom meta data of the language class. If a model is loaded, this
+        includes details from the model&#39;s meta.json.
+
+        RETURNS (Dict[str, Any]): The meta.
+
+        DOCS: https://spacy.io/api/language#meta
+        &#34;&#34;&#34;
+        spacy_version = util.get_minor_version_range(about.__version__)
+        if self.vocab.lang:
+            self._meta.setdefault(&#34;lang&#34;, self.vocab.lang)
+        else:
+            self._meta.setdefault(&#34;lang&#34;, self.lang)
+        self._meta.setdefault(&#34;name&#34;, &#34;pipeline&#34;)
+        self._meta.setdefault(&#34;version&#34;, &#34;0.0.0&#34;)
+        self._meta.setdefault(&#34;spacy_version&#34;, spacy_version)
+        self._meta.setdefault(&#34;description&#34;, &#34;&#34;)
+        self._meta.setdefault(&#34;author&#34;, &#34;&#34;)
+        self._meta.setdefault(&#34;email&#34;, &#34;&#34;)
+        self._meta.setdefault(&#34;url&#34;, &#34;&#34;)
+        self._meta.setdefault(&#34;license&#34;, &#34;&#34;)
+        self._meta.setdefault(&#34;spacy_git_version&#34;, GIT_VERSION)
+        self._meta[&#34;vectors&#34;] = {
+            &#34;width&#34;: self.vocab.vectors_length,
+            &#34;vectors&#34;: len(self.vocab.vectors),
+            &#34;keys&#34;: self.vocab.vectors.n_keys,
+            &#34;name&#34;: self.vocab.vectors.name,
+            &#34;mode&#34;: self.vocab.vectors.mode,
+        }
+        self._meta[&#34;labels&#34;] = dict(self.pipe_labels)
+        # TODO: Adding this back to prevent breaking people&#39;s code etc., but
+        # we should consider removing it
+        self._meta[&#34;pipeline&#34;] = list(self.pipe_names)
+        self._meta[&#34;components&#34;] = list(self.component_names)
+        self._meta[&#34;disabled&#34;] = list(self.disabled)
+        return self._meta
+
+    @meta.setter
+    def meta(self, value: Dict[str, Any]) -&gt; None:
+        self._meta = value
+
+    @property
+    def config(self) -&gt; Config:
+        &#34;&#34;&#34;Trainable config for the current language instance. Includes the
+        current pipeline components, as well as default training config.
+
+        RETURNS (thinc.api.Config): The config.
+
+        DOCS: https://spacy.io/api/language#config
+        &#34;&#34;&#34;
+        self._config.setdefault(&#34;nlp&#34;, {})
+        self._config.setdefault(&#34;training&#34;, {})
+        self._config[&#34;nlp&#34;][&#34;lang&#34;] = self.lang
+        # We&#39;re storing the filled config for each pipeline component and so
+        # we can populate the config again later
+        pipeline = {}
+        score_weights = []
+        for pipe_name in self.component_names:
+            pipe_meta = self.get_pipe_meta(pipe_name)
+            pipe_config = self.get_pipe_config(pipe_name)
+            pipeline[pipe_name] = {&#34;factory&#34;: pipe_meta.factory, **pipe_config}
+            if pipe_meta.default_score_weights:
+                score_weights.append(pipe_meta.default_score_weights)
+        self._config[&#34;nlp&#34;][&#34;pipeline&#34;] = list(self.component_names)
+        self._config[&#34;nlp&#34;][&#34;disabled&#34;] = list(self.disabled)
+        self._config[&#34;components&#34;] = pipeline
+        # We&#39;re merging the existing score weights back into the combined
+        # weights to make sure we&#39;re preserving custom settings in the config
+        # but also reflect updates (e.g. new components added)
+        prev_weights = self._config[&#34;training&#34;].get(&#34;score_weights&#34;, {})
+        combined_score_weights = combine_score_weights(score_weights, prev_weights)
+        self._config[&#34;training&#34;][&#34;score_weights&#34;] = combined_score_weights
+        if not srsly.is_json_serializable(self._config):
+            raise ValueError(Errors.E961.format(config=self._config))
+        return self._config
+
+    @config.setter
+    def config(self, value: Config) -&gt; None:
+        self._config = value
+
+    @property
+    def disabled(self) -&gt; List[str]:
+        &#34;&#34;&#34;Get the names of all disabled components.
+
+        RETURNS (List[str]): The disabled components.
+        &#34;&#34;&#34;
+        # Make sure the disabled components are returned in the order they
+        # appear in the pipeline (which isn&#39;t guaranteed by the set)
+        names = [name for name, _ in self._components if name in self._disabled]
+        return SimpleFrozenList(names, error=Errors.E926.format(attr=&#34;disabled&#34;))
+
+    @property
+    def factory_names(self) -&gt; List[str]:
+        &#34;&#34;&#34;Get names of all available factories.
+
+        RETURNS (List[str]): The factory names.
+        &#34;&#34;&#34;
+        names = list(self.factories.keys())
+        return SimpleFrozenList(names)
+
+    @property
+    def components(self) -&gt; List[Tuple[str, PipeCallable]]:
+        &#34;&#34;&#34;Get all (name, component) tuples in the pipeline, including the
+        currently disabled components.
+        &#34;&#34;&#34;
+        return SimpleFrozenList(
+            self._components, error=Errors.E926.format(attr=&#34;components&#34;)
+        )
+
+    @property
+    def component_names(self) -&gt; List[str]:
+        &#34;&#34;&#34;Get the names of the available pipeline components. Includes all
+        active and inactive pipeline components.
+
+        RETURNS (List[str]): List of component name strings, in order.
+        &#34;&#34;&#34;
+        names = [pipe_name for pipe_name, _ in self._components]
+        return SimpleFrozenList(names, error=Errors.E926.format(attr=&#34;component_names&#34;))
+
+    @property
+    def pipeline(self) -&gt; List[Tuple[str, PipeCallable]]:
+        &#34;&#34;&#34;The processing pipeline consisting of (name, component) tuples. The
+        components are called on the Doc in order as it passes through the
+        pipeline.
+
+        RETURNS (List[Tuple[str, Callable[[Doc], Doc]]]): The pipeline.
+        &#34;&#34;&#34;
+        pipes = [(n, p) for n, p in self._components if n not in self._disabled]
+        return SimpleFrozenList(pipes, error=Errors.E926.format(attr=&#34;pipeline&#34;))
+
+    @property
+    def pipe_names(self) -&gt; List[str]:
+        &#34;&#34;&#34;Get names of available active pipeline components.
+
+        RETURNS (List[str]): List of component name strings, in order.
+        &#34;&#34;&#34;
+        names = [pipe_name for pipe_name, _ in self.pipeline]
+        return SimpleFrozenList(names, error=Errors.E926.format(attr=&#34;pipe_names&#34;))
+
+    @property
+    def pipe_factories(self) -&gt; Dict[str, str]:
+        &#34;&#34;&#34;Get the component factories for the available pipeline components.
+
+        RETURNS (Dict[str, str]): Factory names, keyed by component names.
+        &#34;&#34;&#34;
+        factories = {}
+        for pipe_name, pipe in self._components:
+            factories[pipe_name] = self.get_pipe_meta(pipe_name).factory
+        return SimpleFrozenDict(factories)
+
+    @property
+    def pipe_labels(self) -&gt; Dict[str, List[str]]:
+        &#34;&#34;&#34;Get the labels set by the pipeline components, if available (if
+        the component exposes a labels property and the labels are not
+        hidden).
+
+        RETURNS (Dict[str, List[str]]): Labels keyed by component name.
+        &#34;&#34;&#34;
+        labels = {}
+        for name, pipe in self._components:
+            if hasattr(pipe, &#34;hide_labels&#34;) and pipe.hide_labels is True:
+                continue
+            if hasattr(pipe, &#34;labels&#34;):
+                labels[name] = list(pipe.labels)
+        return SimpleFrozenDict(labels)
+
+    @classmethod
+    def has_factory(cls, name: str) -&gt; bool:
+        &#34;&#34;&#34;RETURNS (bool): Whether a factory of that name is registered.&#34;&#34;&#34;
+        internal_name = cls.get_factory_name(name)
+        return name in registry.factories or internal_name in registry.factories
+
+    @classmethod
+    def get_factory_name(cls, name: str) -&gt; str:
+        &#34;&#34;&#34;Get the internal factory name based on the language subclass.
+
+        name (str): The factory name.
+        RETURNS (str): The internal factory name.
+        &#34;&#34;&#34;
+        if cls.lang is None:
+            return name
+        return f&#34;{cls.lang}.{name}&#34;
+
+    @classmethod
+    def get_factory_meta(cls, name: str) -&gt; &#34;FactoryMeta&#34;:
+        &#34;&#34;&#34;Get the meta information for a given factory name.
+
+        name (str): The component factory name.
+        RETURNS (FactoryMeta): The meta for the given factory name.
+        &#34;&#34;&#34;
+        internal_name = cls.get_factory_name(name)
+        if internal_name in cls._factory_meta:
+            return cls._factory_meta[internal_name]
+        if name in cls._factory_meta:
+            return cls._factory_meta[name]
+        raise ValueError(Errors.E967.format(meta=&#34;factory&#34;, name=name))
+
+    @classmethod
+    def set_factory_meta(cls, name: str, value: &#34;FactoryMeta&#34;) -&gt; None:
+        &#34;&#34;&#34;Set the meta information for a given factory name.
+
+        name (str): The component factory name.
+        value (FactoryMeta): The meta to set.
+        &#34;&#34;&#34;
+        cls._factory_meta[cls.get_factory_name(name)] = value
+
+    def get_pipe_meta(self, name: str) -&gt; &#34;FactoryMeta&#34;:
+        &#34;&#34;&#34;Get the meta information for a given component name.
+
+        name (str): The component name.
+        RETURNS (FactoryMeta): The meta for the given component name.
+        &#34;&#34;&#34;
+        if name not in self._pipe_meta:
+            raise ValueError(Errors.E967.format(meta=&#34;component&#34;, name=name))
+        return self._pipe_meta[name]
+
+    def get_pipe_config(self, name: str) -&gt; Config:
+        &#34;&#34;&#34;Get the config used to create a pipeline component.
+
+        name (str): The component name.
+        RETURNS (Config): The config used to create the pipeline component.
+        &#34;&#34;&#34;
+        if name not in self._pipe_configs:
+            raise ValueError(Errors.E960.format(name=name))
+        pipe_config = self._pipe_configs[name]
+        return pipe_config
+
+    @classmethod
+    def factory(
+        cls,
+        name: str,
+        *,
+        default_config: Dict[str, Any] = SimpleFrozenDict(),
+        assigns: Iterable[str] = SimpleFrozenList(),
+        requires: Iterable[str] = SimpleFrozenList(),
+        retokenizes: bool = False,
+        default_score_weights: Dict[str, Optional[float]] = SimpleFrozenDict(),
+        func: Optional[Callable] = None,
+    ) -&gt; Callable:
+        &#34;&#34;&#34;Register a new pipeline component factory. Can be used as a decorator
+        on a function or classmethod, or called as a function with the factory
+        provided as the func keyword argument. To create a component and add
+        it to the pipeline, you can use nlp.add_pipe(name).
+
+        name (str): The name of the component factory.
+        default_config (Dict[str, Any]): Default configuration, describing the
+            default values of the factory arguments.
+        assigns (Iterable[str]): Doc/Token attributes assigned by this component,
+            e.g. &#34;token.ent_id&#34;. Used for pipeline analysis.
+        requires (Iterable[str]): Doc/Token attributes required by this component,
+            e.g. &#34;token.ent_id&#34;. Used for pipeline analysis.
+        retokenizes (bool): Whether the component changes the tokenization.
+            Used for pipeline analysis.
+        default_score_weights (Dict[str, Optional[float]]): The scores to report during
+            training, and their default weight towards the final score used to
+            select the best model. Weights should sum to 1.0 per component and
+            will be combined and normalized for the whole pipeline. If None,
+            the score won&#39;t be shown in the logs or be weighted.
+        func (Optional[Callable]): Factory function if not used as a decorator.
+
+        DOCS: https://spacy.io/api/language#factory
+        &#34;&#34;&#34;
+        if not isinstance(name, str):
+            raise ValueError(Errors.E963.format(decorator=&#34;factory&#34;))
+        if &#34;.&#34; in name:
+            raise ValueError(Errors.E853.format(name=name))
+        if not isinstance(default_config, dict):
+            err = Errors.E962.format(
+                style=&#34;default config&#34;, name=name, cfg_type=type(default_config)
+            )
+            raise ValueError(err)
+
+        def add_factory(factory_func: Callable) -&gt; Callable:
+            internal_name = cls.get_factory_name(name)
+            if internal_name in registry.factories:
+                # We only check for the internal name here – it&#39;s okay if it&#39;s a
+                # subclass and the base class has a factory of the same name. We
+                # also only raise if the function is different to prevent raising
+                # if module is reloaded.
+                existing_func = registry.factories.get(internal_name)
+                if not util.is_same_func(factory_func, existing_func):
+                    err = Errors.E004.format(
+                        name=name, func=existing_func, new_func=factory_func
+                    )
+                    raise ValueError(err)
+
+            arg_names = util.get_arg_names(factory_func)
+            if &#34;nlp&#34; not in arg_names or &#34;name&#34; not in arg_names:
+                raise ValueError(Errors.E964.format(name=name))
+            # Officially register the factory so we can later call
+            # registry.resolve and refer to it in the config as
+            # @factories = &#34;spacy.Language.xyz&#34;. We use the class name here so
+            # different classes can have different factories.
+            registry.factories.register(internal_name, func=factory_func)
+            factory_meta = FactoryMeta(
+                factory=name,
+                default_config=default_config,
+                assigns=validate_attrs(assigns),
+                requires=validate_attrs(requires),
+                scores=list(default_score_weights.keys()),
+                default_score_weights=default_score_weights,
+                retokenizes=retokenizes,
+            )
+            cls.set_factory_meta(name, factory_meta)
+            # We&#39;re overwriting the class attr with a frozen dict to handle
+            # backwards-compat (writing to Language.factories directly). This
+            # wouldn&#39;t work with an instance property and just produce a
+            # confusing error – here we can show a custom error
+            cls.factories = SimpleFrozenDict(
+                registry.factories.get_all(), error=Errors.E957
+            )
+            return factory_func
+
+        if func is not None:  # Support non-decorator use cases
+            return add_factory(func)
+        return add_factory
+
+    @classmethod
+    def component(
+        cls,
+        name: str,
+        *,
+        assigns: Iterable[str] = SimpleFrozenList(),
+        requires: Iterable[str] = SimpleFrozenList(),
+        retokenizes: bool = False,
+        func: Optional[PipeCallable] = None,
+    ) -&gt; Callable[..., Any]:
+        &#34;&#34;&#34;Register a new pipeline component. Can be used for stateless function
+        components that don&#39;t require a separate factory. Can be used as a
+        decorator on a function or classmethod, or called as a function with the
+        factory provided as the func keyword argument. To create a component and
+        add it to the pipeline, you can use nlp.add_pipe(name).
+
+        name (str): The name of the component factory.
+        assigns (Iterable[str]): Doc/Token attributes assigned by this component,
+            e.g. &#34;token.ent_id&#34;. Used for pipeline analysis.
+        requires (Iterable[str]): Doc/Token attributes required by this component,
+            e.g. &#34;token.ent_id&#34;. Used for pipeline analysis.
+        retokenizes (bool): Whether the component changes the tokenization.
+            Used for pipeline analysis.
+        func (Optional[Callable[[Doc], Doc]): Factory function if not used as a decorator.
+
+        DOCS: https://spacy.io/api/language#component
+        &#34;&#34;&#34;
+        if name is not None:
+            if not isinstance(name, str):
+                raise ValueError(Errors.E963.format(decorator=&#34;component&#34;))
+            if &#34;.&#34; in name:
+                raise ValueError(Errors.E853.format(name=name))
+        component_name = name if name is not None else util.get_object_name(func)
+
+        def add_component(component_func: PipeCallable) -&gt; Callable:
+            if isinstance(func, type):  # function is a class
+                raise ValueError(Errors.E965.format(name=component_name))
+
+            def factory_func(nlp, name: str) -&gt; PipeCallable:
+                return component_func
+
+            internal_name = cls.get_factory_name(name)
+            if internal_name in registry.factories:
+                # We only check for the internal name here – it&#39;s okay if it&#39;s a
+                # subclass and the base class has a factory of the same name. We
+                # also only raise if the function is different to prevent raising
+                # if module is reloaded. It&#39;s hacky, but we need to check the
+                # existing functure for a closure and whether that&#39;s identical
+                # to the component function (because factory_func created above
+                # will always be different, even for the same function)
+                existing_func = registry.factories.get(internal_name)
+                closure = existing_func.__closure__
+                wrapped = [c.cell_contents for c in closure][0] if closure else None
+                if util.is_same_func(wrapped, component_func):
+                    factory_func = existing_func  # noqa: F811
+
+            cls.factory(
+                component_name,
+                assigns=assigns,
+                requires=requires,
+                retokenizes=retokenizes,
+                func=factory_func,
+            )
+            return component_func
+
+        if func is not None:  # Support non-decorator use cases
+            return add_component(func)
+        return add_component
+
+    def analyze_pipes(
+        self,
+        *,
+        keys: List[str] = [&#34;assigns&#34;, &#34;requires&#34;, &#34;scores&#34;, &#34;retokenizes&#34;],
+        pretty: bool = False,
+    ) -&gt; Optional[Dict[str, Any]]:
+        &#34;&#34;&#34;Analyze the current pipeline components, print a summary of what
+        they assign or require and check that all requirements are met.
+
+        keys (List[str]): The meta values to display in the table. Corresponds
+            to values in FactoryMeta, defined by @Language.factory decorator.
+        pretty (bool): Pretty-print the results.
+        RETURNS (dict): The data.
+        &#34;&#34;&#34;
+        analysis = analyze_pipes(self, keys=keys)
+        if pretty:
+            print_pipe_analysis(analysis, keys=keys)
+        return analysis
+
+    def get_pipe(self, name: str) -&gt; PipeCallable:
+        &#34;&#34;&#34;Get a pipeline component for a given component name.
+
+        name (str): Name of pipeline component to get.
+        RETURNS (callable): The pipeline component.
+
+        DOCS: https://spacy.io/api/language#get_pipe
+        &#34;&#34;&#34;
+        for pipe_name, component in self._components:
+            if pipe_name == name:
+                return component
+        raise KeyError(Errors.E001.format(name=name, opts=self.component_names))
+
+    def create_pipe(
+        self,
+        factory_name: str,
+        name: Optional[str] = None,
+        *,
+        config: Dict[str, Any] = SimpleFrozenDict(),
+        raw_config: Optional[Config] = None,
+        validate: bool = True,
+    ) -&gt; PipeCallable:
+        &#34;&#34;&#34;Create a pipeline component. Mostly used internally. To create and
+        add a component to the pipeline, you can use nlp.add_pipe.
+
+        factory_name (str): Name of component factory.
+        name (Optional[str]): Optional name to assign to component instance.
+            Defaults to factory name if not set.
+        config (Dict[str, Any]): Config parameters to use for this component.
+            Will be merged with default config, if available.
+        raw_config (Optional[Config]): Internals: the non-interpolated config.
+        validate (bool): Whether to validate the component config against the
+            arguments and types expected by the factory.
+        RETURNS (Callable[[Doc], Doc]): The pipeline component.
+
+        DOCS: https://spacy.io/api/language#create_pipe
+        &#34;&#34;&#34;
+        name = name if name is not None else factory_name
+        if not isinstance(config, dict):
+            err = Errors.E962.format(style=&#34;config&#34;, name=name, cfg_type=type(config))
+            raise ValueError(err)
+        if not srsly.is_json_serializable(config):
+            raise ValueError(Errors.E961.format(config=config))
+        if not self.has_factory(factory_name):
+            err = Errors.E002.format(
+                name=factory_name,
+                opts=&#34;, &#34;.join(self.factory_names),
+                method=&#34;create_pipe&#34;,
+                lang=util.get_object_name(self),
+                lang_code=self.lang,
+            )
+            raise ValueError(err)
+        pipe_meta = self.get_factory_meta(factory_name)
+        # This is unideal, but the alternative would mean you always need to
+        # specify the full config settings, which is not really viable.
+        if pipe_meta.default_config:
+            config = Config(pipe_meta.default_config).merge(config)
+        internal_name = self.get_factory_name(factory_name)
+        # If the language-specific factory doesn&#39;t exist, try again with the
+        # not-specific name
+        if internal_name not in registry.factories:
+            internal_name = factory_name
+        # The name allows components to know their pipe name and use it in the
+        # losses etc. (even if multiple instances of the same factory are used)
+        config = {&#34;nlp&#34;: self, &#34;name&#34;: name, **config, &#34;@factories&#34;: internal_name}
+        # We need to create a top-level key because Thinc doesn&#39;t allow resolving
+        # top-level references to registered functions. Also gives nicer errors.
+        cfg = {factory_name: config}
+        # We&#39;re calling the internal _fill here to avoid constructing the
+        # registered functions twice
+        resolved = registry.resolve(cfg, validate=validate)
+        filled = registry.fill({&#34;cfg&#34;: cfg[factory_name]}, validate=validate)[&#34;cfg&#34;]
+        filled = Config(filled)
+        filled[&#34;factory&#34;] = factory_name
+        filled.pop(&#34;@factories&#34;, None)
+        # Remove the extra values we added because we don&#39;t want to keep passing
+        # them around, copying them etc.
+        filled.pop(&#34;nlp&#34;, None)
+        filled.pop(&#34;name&#34;, None)
+        # Merge the final filled config with the raw config (including non-
+        # interpolated variables)
+        if raw_config:
+            filled = filled.merge(raw_config)
+        self._pipe_configs[name] = filled
+        return resolved[factory_name]
+
+    def create_pipe_from_source(
+        self, source_name: str, source: &#34;Language&#34;, *, name: str
+    ) -&gt; Tuple[PipeCallable, str]:
+        &#34;&#34;&#34;Create a pipeline component by copying it from an existing model.
+
+        source_name (str): Name of the component in the source pipeline.
+        source (Language): The source nlp object to copy from.
+        name (str): Optional alternative name to use in current pipeline.
+        RETURNS (Tuple[Callable[[Doc], Doc], str]): The component and its factory name.
+        &#34;&#34;&#34;
+        # Check source type
+        if not isinstance(source, Language):
+            raise ValueError(Errors.E945.format(name=source_name, source=type(source)))
+        if self.vocab.vectors != source.vocab.vectors:
+            warnings.warn(Warnings.W113.format(name=source_name))
+        if source_name not in source.component_names:
+            raise KeyError(
+                Errors.E944.format(
+                    name=source_name,
+                    model=f&#34;{source.meta[&#39;lang&#39;]}_{source.meta[&#39;name&#39;]}&#34;,
+                    opts=&#34;, &#34;.join(source.component_names),
+                )
+            )
+        pipe = source.get_pipe(source_name)
+        # There is no actual solution here. Either the component has the right
+        # name for the source pipeline or the component has the right name for
+        # the current pipeline. This prioritizes the current pipeline.
+        if hasattr(pipe, &#34;name&#34;):
+            pipe.name = name
+        # Make sure the source config is interpolated so we don&#39;t end up with
+        # orphaned variables in our final config
+        source_config = source.config.interpolate()
+        pipe_config = util.copy_config(source_config[&#34;components&#34;][source_name])
+        self._pipe_configs[name] = pipe_config
+        if self.vocab.strings != source.vocab.strings:
+            for s in source.vocab.strings:
+                self.vocab.strings.add(s)
+        return pipe, pipe_config[&#34;factory&#34;]
+
+    def add_pipe(
+        self,
+        factory_name: str,
+        name: Optional[str] = None,
+        *,
+        before: Optional[Union[str, int]] = None,
+        after: Optional[Union[str, int]] = None,
+        first: Optional[bool] = None,
+        last: Optional[bool] = None,
+        source: Optional[&#34;Language&#34;] = None,
+        config: Dict[str, Any] = SimpleFrozenDict(),
+        raw_config: Optional[Config] = None,
+        validate: bool = True,
+    ) -&gt; PipeCallable:
+        &#34;&#34;&#34;Add a component to the processing pipeline. Valid components are
+        callables that take a `Doc` object, modify it and return it. Only one
+        of before/after/first/last can be set. Default behaviour is &#34;last&#34;.
+
+        factory_name (str): Name of the component factory.
+        name (str): Name of pipeline component. Overwrites existing
+            component.name attribute if available. If no name is set and
+            the component exposes no name attribute, component.__name__ is
+            used. An error is raised if a name already exists in the pipeline.
+        before (Union[str, int]): Name or index of the component to insert new
+            component directly before.
+        after (Union[str, int]): Name or index of the component to insert new
+            component directly after.
+        first (bool): If True, insert component first in the pipeline.
+        last (bool): If True, insert component last in the pipeline.
+        source (Language): Optional loaded nlp object to copy the pipeline
+            component from.
+        config (Dict[str, Any]): Config parameters to use for this component.
+            Will be merged with default config, if available.
+        raw_config (Optional[Config]): Internals: the non-interpolated config.
+        validate (bool): Whether to validate the component config against the
+            arguments and types expected by the factory.
+        RETURNS (Callable[[Doc], Doc]): The pipeline component.
+
+        DOCS: https://spacy.io/api/language#add_pipe
+        &#34;&#34;&#34;
+        if not isinstance(factory_name, str):
+            bad_val = repr(factory_name)
+            err = Errors.E966.format(component=bad_val, name=name)
+            raise ValueError(err)
+        name = name if name is not None else factory_name
+        if name in self.component_names:
+            raise ValueError(Errors.E007.format(name=name, opts=self.component_names))
+        # Overriding pipe name in the config is not supported and will be ignored.
+        if &#34;name&#34; in config:
+            warnings.warn(Warnings.W119.format(name_in_config=config.pop(&#34;name&#34;)))
+        if source is not None:
+            # We&#39;re loading the component from a model. After loading the
+            # component, we know its real factory name
+            pipe_component, factory_name = self.create_pipe_from_source(
+                factory_name, source, name=name
+            )
+        else:
+            pipe_component = self.create_pipe(
+                factory_name,
+                name=name,
+                config=config,
+                raw_config=raw_config,
+                validate=validate,
+            )
+        pipe_index = self._get_pipe_index(before, after, first, last)
+        self._pipe_meta[name] = self.get_factory_meta(factory_name)
+        self._components.insert(pipe_index, (name, pipe_component))
+        self._link_components()
+        return pipe_component
+
+    def _get_pipe_index(
+        self,
+        before: Optional[Union[str, int]] = None,
+        after: Optional[Union[str, int]] = None,
+        first: Optional[bool] = None,
+        last: Optional[bool] = None,
+    ) -&gt; int:
+        &#34;&#34;&#34;Determine where to insert a pipeline component based on the before/
+        after/first/last values.
+
+        before (str): Name or index of the component to insert directly before.
+        after (str): Name or index of component to insert directly after.
+        first (bool): If True, insert component first in the pipeline.
+        last (bool): If True, insert component last in the pipeline.
+        RETURNS (int): The index of the new pipeline component.
+        &#34;&#34;&#34;
+        all_args = {&#34;before&#34;: before, &#34;after&#34;: after, &#34;first&#34;: first, &#34;last&#34;: last}
+        if sum(arg is not None for arg in [before, after, first, last]) &gt;= 2:
+            raise ValueError(
+                Errors.E006.format(args=all_args, opts=self.component_names)
+            )
+        if last or not any(value is not None for value in [first, before, after]):
+            return len(self._components)
+        elif first:
+            return 0
+        elif isinstance(before, str):
+            if before not in self.component_names:
+                raise ValueError(
+                    Errors.E001.format(name=before, opts=self.component_names)
+                )
+            return self.component_names.index(before)
+        elif isinstance(after, str):
+            if after not in self.component_names:
+                raise ValueError(
+                    Errors.E001.format(name=after, opts=self.component_names)
+                )
+            return self.component_names.index(after) + 1
+        # We&#39;re only accepting indices referring to components that exist
+        # (can&#39;t just do isinstance here because bools are instance of int, too)
+        elif type(before) == int:
+            if before &gt;= len(self._components) or before &lt; 0:
+                err = Errors.E959.format(
+                    dir=&#34;before&#34;, idx=before, opts=self.component_names
+                )
+                raise ValueError(err)
+            return before
+        elif type(after) == int:
+            if after &gt;= len(self._components) or after &lt; 0:
+                err = Errors.E959.format(
+                    dir=&#34;after&#34;, idx=after, opts=self.component_names
+                )
+                raise ValueError(err)
+            return after + 1
+        raise ValueError(Errors.E006.format(args=all_args, opts=self.component_names))
+
+    def has_pipe(self, name: str) -&gt; bool:
+        &#34;&#34;&#34;Check if a component name is present in the pipeline. Equivalent to
+        `name in nlp.pipe_names`.
+
+        name (str): Name of the component.
+        RETURNS (bool): Whether a component of the name exists in the pipeline.
+
+        DOCS: https://spacy.io/api/language#has_pipe
+        &#34;&#34;&#34;
+        return name in self.pipe_names
+
+    def replace_pipe(
+        self,
+        name: str,
+        factory_name: str,
+        *,
+        config: Dict[str, Any] = SimpleFrozenDict(),
+        validate: bool = True,
+    ) -&gt; PipeCallable:
+        &#34;&#34;&#34;Replace a component in the pipeline.
+
+        name (str): Name of the component to replace.
+        factory_name (str): Factory name of replacement component.
+        config (Optional[Dict[str, Any]]): Config parameters to use for this
+            component. Will be merged with default config, if available.
+        validate (bool): Whether to validate the component config against the
+            arguments and types expected by the factory.
+        RETURNS (Callable[[Doc], Doc]): The new pipeline component.
+
+        DOCS: https://spacy.io/api/language#replace_pipe
+        &#34;&#34;&#34;
+        if name not in self.component_names:
+            raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
+        if hasattr(factory_name, &#34;__call__&#34;):
+            err = Errors.E968.format(component=repr(factory_name), name=name)
+            raise ValueError(err)
+        # We need to delegate to Language.add_pipe here instead of just writing
+        # to Language.pipeline to make sure the configs are handled correctly
+        pipe_index = self.component_names.index(name)
+        self.remove_pipe(name)
+        if not len(self._components) or pipe_index == len(self._components):
+            # we have no components to insert before/after, or we&#39;re replacing the last component
+            return self.add_pipe(
+                factory_name, name=name, config=config, validate=validate
+            )
+        else:
+            return self.add_pipe(
+                factory_name,
+                name=name,
+                before=pipe_index,
+                config=config,
+                validate=validate,
+            )
+
+    def rename_pipe(self, old_name: str, new_name: str) -&gt; None:
+        &#34;&#34;&#34;Rename a pipeline component.
+
+        old_name (str): Name of the component to rename.
+        new_name (str): New name of the component.
+
+        DOCS: https://spacy.io/api/language#rename_pipe
+        &#34;&#34;&#34;
+        if old_name not in self.component_names:
+            raise ValueError(
+                Errors.E001.format(name=old_name, opts=self.component_names)
+            )
+        if new_name in self.component_names:
+            raise ValueError(
+                Errors.E007.format(name=new_name, opts=self.component_names)
+            )
+        i = self.component_names.index(old_name)
+        self._components[i] = (new_name, self._components[i][1])
+        self._pipe_meta[new_name] = self._pipe_meta.pop(old_name)
+        self._pipe_configs[new_name] = self._pipe_configs.pop(old_name)
+        # Make sure [initialize] config is adjusted
+        if old_name in self._config[&#34;initialize&#34;][&#34;components&#34;]:
+            init_cfg = self._config[&#34;initialize&#34;][&#34;components&#34;].pop(old_name)
+            self._config[&#34;initialize&#34;][&#34;components&#34;][new_name] = init_cfg
+        self._link_components()
+
+    def remove_pipe(self, name: str) -&gt; Tuple[str, PipeCallable]:
+        &#34;&#34;&#34;Remove a component from the pipeline.
+
+        name (str): Name of the component to remove.
+        RETURNS (Tuple[str, Callable[[Doc], Doc]]): A `(name, component)` tuple of the removed component.
+
+        DOCS: https://spacy.io/api/language#remove_pipe
+        &#34;&#34;&#34;
+        if name not in self.component_names:
+            raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
+        removed = self._components.pop(self.component_names.index(name))
+        # We&#39;re only removing the component itself from the metas/configs here
+        # because factory may be used for something else
+        self._pipe_meta.pop(name)
+        self._pipe_configs.pop(name)
+        self.meta.get(&#34;_sourced_vectors_hashes&#34;, {}).pop(name, None)
+        # Make sure name is removed from the [initialize] config
+        if name in self._config[&#34;initialize&#34;][&#34;components&#34;]:
+            self._config[&#34;initialize&#34;][&#34;components&#34;].pop(name)
+        # Make sure the name is also removed from the set of disabled components
+        if name in self.disabled:
+            self._disabled.remove(name)
+        self._link_components()
+        return removed
+
+    def disable_pipe(self, name: str) -&gt; None:
+        &#34;&#34;&#34;Disable a pipeline component. The component will still exist on
+        the nlp object, but it won&#39;t be run as part of the pipeline. Does
+        nothing if the component is already disabled.
+
+        name (str): The name of the component to disable.
+        &#34;&#34;&#34;
+        if name not in self.component_names:
+            raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
+        self._disabled.add(name)
+
+    def enable_pipe(self, name: str) -&gt; None:
+        &#34;&#34;&#34;Enable a previously disabled pipeline component so it&#39;s run as part
+        of the pipeline. Does nothing if the component is already enabled.
+
+        name (str): The name of the component to enable.
+        &#34;&#34;&#34;
+        if name not in self.component_names:
+            raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
+        if name in self.disabled:
+            self._disabled.remove(name)
+
+    def __call__(
+        self,
+        text: Union[str, Doc],
+        *,
+        disable: Iterable[str] = SimpleFrozenList(),
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+    ) -&gt; Doc:
+        &#34;&#34;&#34;Apply the pipeline to some text. The text can span multiple sentences,
+        and can contain arbitrary whitespace. Alignment into the original string
+        is preserved.
+
+        text (Union[str, Doc]): If `str`, the text to be processed. If `Doc`,
+            the doc will be passed directly to the pipeline, skipping
+            `Language.make_doc`.
+        disable (List[str]): Names of the pipeline components to disable.
+        component_cfg (Dict[str, dict]): An optional dictionary with extra
+            keyword arguments for specific components.
+        RETURNS (Doc): A container for accessing the annotations.
+
+        DOCS: https://spacy.io/api/language#call
+        &#34;&#34;&#34;
+        doc = self._ensure_doc(text)
+        if component_cfg is None:
+            component_cfg = {}
+        for name, proc in self.pipeline:
+            if name in disable:
+                continue
+            if not hasattr(proc, &#34;__call__&#34;):
+                raise ValueError(Errors.E003.format(component=type(proc), name=name))
+            error_handler = self.default_error_handler
+            if hasattr(proc, &#34;get_error_handler&#34;):
+                error_handler = proc.get_error_handler()
+            try:
+                doc = proc(doc, **component_cfg.get(name, {}))  # type: ignore[call-arg]
+            except KeyError as e:
+                # This typically happens if a component is not initialized
+                raise ValueError(Errors.E109.format(name=name)) from e
+            except Exception as e:
+                error_handler(name, proc, [doc], e)
+            if not isinstance(doc, Doc):
+                raise ValueError(Errors.E005.format(name=name, returned_type=type(doc)))
+        return doc
+
+    def disable_pipes(self, *names) -&gt; &#34;DisabledPipes&#34;:
+        &#34;&#34;&#34;Disable one or more pipeline components. If used as a context
+        manager, the pipeline will be restored to the initial state at the end
+        of the block. Otherwise, a DisabledPipes object is returned, that has
+        a `.restore()` method you can use to undo your changes.
+
+        This method has been deprecated since 3.0
+        &#34;&#34;&#34;
+        warnings.warn(Warnings.W096, DeprecationWarning)
+        if len(names) == 1 and isinstance(names[0], (list, tuple)):
+            names = names[0]  # type: ignore[assignment]    # support list of names instead of spread
+        return self.select_pipes(disable=names)
+
+    def select_pipes(
+        self,
+        *,
+        disable: Optional[Union[str, Iterable[str]]] = None,
+        enable: Optional[Union[str, Iterable[str]]] = None,
+    ) -&gt; &#34;DisabledPipes&#34;:
+        &#34;&#34;&#34;Disable one or more pipeline components. If used as a context
+        manager, the pipeline will be restored to the initial state at the end
+        of the block. Otherwise, a DisabledPipes object is returned, that has
+        a `.restore()` method you can use to undo your changes.
+
+        disable (str or iterable): The name(s) of the pipes to disable
+        enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
+
+        DOCS: https://spacy.io/api/language#select_pipes
+        &#34;&#34;&#34;
+        if enable is None and disable is None:
+            raise ValueError(Errors.E991)
+        if isinstance(disable, str):
+            disable = [disable]
+        if enable is not None:
+            if isinstance(enable, str):
+                enable = [enable]
+            to_disable = [pipe for pipe in self.pipe_names if pipe not in enable]
+            # raise an error if the enable and disable keywords are not consistent
+            if disable is not None and disable != to_disable:
+                raise ValueError(
+                    Errors.E992.format(
+                        enable=enable, disable=disable, names=self.pipe_names
+                    )
+                )
+            disable = to_disable
+        assert disable is not None
+        # DisabledPipes will restore the pipes in &#39;disable&#39; when it&#39;s done, so we need to exclude
+        # those pipes that were already disabled.
+        disable = [d for d in disable if d not in self._disabled]
+        return DisabledPipes(self, disable)
+
+    def make_doc(self, text: str) -&gt; Doc:
+        &#34;&#34;&#34;Turn a text into a Doc object.
+
+        text (str): The text to process.
+        RETURNS (Doc): The processed doc.
+        &#34;&#34;&#34;
+        if len(text) &gt; self.max_length:
+            raise ValueError(
+                Errors.E088.format(length=len(text), max_length=self.max_length)
+            )
+        return self.tokenizer(text)
+
+    def _ensure_doc(self, doc_like: Union[str, Doc, bytes]) -&gt; Doc:
+        &#34;&#34;&#34;Create a Doc if need be, or raise an error if the input is not
+        a Doc, string, or a byte array (generated by Doc.to_bytes()).&#34;&#34;&#34;
+        if isinstance(doc_like, Doc):
+            return doc_like
+        if isinstance(doc_like, str):
+            return self.make_doc(doc_like)
+        if isinstance(doc_like, bytes):
+            return Doc(self.vocab).from_bytes(doc_like)
+        raise ValueError(Errors.E1041.format(type=type(doc_like)))
+
+    def _ensure_doc_with_context(
+        self, doc_like: Union[str, Doc, bytes], context: _AnyContext
+    ) -&gt; Doc:
+        &#34;&#34;&#34;Call _ensure_doc to generate a Doc and set its context object.&#34;&#34;&#34;
+        doc = self._ensure_doc(doc_like)
+        doc._context = context
+        return doc
+
+    def update(
+        self,
+        examples: Iterable[Example],
+        _: Optional[Any] = None,
+        *,
+        drop: float = 0.0,
+        sgd: Optional[Optimizer] = None,
+        losses: Optional[Dict[str, float]] = None,
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        exclude: Iterable[str] = SimpleFrozenList(),
+        annotates: Iterable[str] = SimpleFrozenList(),
+    ):
+        &#34;&#34;&#34;Update the models in the pipeline.
+
+        examples (Iterable[Example]): A batch of examples
+        _: Should not be set - serves to catch backwards-incompatible scripts.
+        drop (float): The dropout rate.
+        sgd (Optimizer): An optimizer.
+        losses (Dict[str, float]): Dictionary to update with the loss, keyed by
+            component.
+        component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
+            components, keyed by component name.
+        exclude (Iterable[str]): Names of components that shouldn&#39;t be updated.
+        annotates (Iterable[str]): Names of components that should set
+            annotations on the predicted examples after updating.
+        RETURNS (Dict[str, float]): The updated losses dictionary
+
+        DOCS: https://spacy.io/api/language#update
+        &#34;&#34;&#34;
+        if _ is not None:
+            raise ValueError(Errors.E989)
+        if losses is None:
+            losses = {}
+        if isinstance(examples, list) and len(examples) == 0:
+            return losses
+        validate_examples(examples, &#34;Language.update&#34;)
+        examples = _copy_examples(examples)
+        if sgd is None:
+            if self._optimizer is None:
+                self._optimizer = self.create_optimizer()
+            sgd = self._optimizer
+        if component_cfg is None:
+            component_cfg = {}
+        pipe_kwargs = {}
+        for i, (name, proc) in enumerate(self.pipeline):
+            component_cfg.setdefault(name, {})
+            pipe_kwargs[name] = deepcopy(component_cfg[name])
+            component_cfg[name].setdefault(&#34;drop&#34;, drop)
+            pipe_kwargs[name].setdefault(&#34;batch_size&#34;, self.batch_size)
+        for name, proc in self.pipeline:
+            # ignore statements are used here because mypy ignores hasattr
+            if name not in exclude and hasattr(proc, &#34;update&#34;):
+                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore
+            if sgd not in (None, False):
+                if (
+                    name not in exclude
+                    and isinstance(proc, ty.TrainableComponent)
+                    and proc.is_trainable
+                    and proc.model not in (True, False, None)
+                ):
+                    proc.finish_update(sgd)
+            if name in annotates:
+                for doc, eg in zip(
+                    _pipe(
+                        (eg.predicted for eg in examples),
+                        proc=proc,
+                        name=name,
+                        default_error_handler=self.default_error_handler,
+                        kwargs=pipe_kwargs[name],
+                    ),
+                    examples,
+                ):
+                    eg.predicted = doc
+        return _replace_numpy_floats(losses)
+
+    def rehearse(
+        self,
+        examples: Iterable[Example],
+        *,
+        sgd: Optional[Optimizer] = None,
+        losses: Optional[Dict[str, float]] = None,
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        exclude: Iterable[str] = SimpleFrozenList(),
+    ) -&gt; Dict[str, float]:
+        &#34;&#34;&#34;Make a &#34;rehearsal&#34; update to the models in the pipeline, to prevent
+        forgetting. Rehearsal updates run an initial copy of the model over some
+        data, and update the model so its current predictions are more like the
+        initial ones. This is useful for keeping a pretrained model on-track,
+        even if you&#39;re updating it with a smaller set of examples.
+
+        examples (Iterable[Example]): A batch of `Example` objects.
+        sgd (Optional[Optimizer]): An optimizer.
+        component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
+            components, keyed by component name.
+        exclude (Iterable[str]): Names of components that shouldn&#39;t be updated.
+        RETURNS (dict): Results from the update.
+
+        EXAMPLE:
+            &gt;&gt;&gt; raw_text_batches = minibatch(raw_texts)
+            &gt;&gt;&gt; for labelled_batch in minibatch(examples):
+            &gt;&gt;&gt;     nlp.update(labelled_batch)
+            &gt;&gt;&gt;     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
+            &gt;&gt;&gt;     nlp.rehearse(raw_batch)
+
+        DOCS: https://spacy.io/api/language#rehearse
+        &#34;&#34;&#34;
+        if losses is None:
+            losses = {}
+        if isinstance(examples, list) and len(examples) == 0:
+            return losses
+        validate_examples(examples, &#34;Language.rehearse&#34;)
+        if sgd is None:
+            if self._optimizer is None:
+                self._optimizer = self.create_optimizer()
+            sgd = self._optimizer
+        pipes = list(self.pipeline)
+        random.shuffle(pipes)
+        if component_cfg is None:
+            component_cfg = {}
+        grads = {}
+
+        def get_grads(key, W, dW):
+            grads[key] = (W, dW)
+            return W, dW
+
+        get_grads.learn_rate = sgd.learn_rate  # type: ignore[attr-defined, union-attr]
+        get_grads.b1 = sgd.b1  # type: ignore[attr-defined, union-attr]
+        get_grads.b2 = sgd.b2  # type: ignore[attr-defined, union-attr]
+        for name, proc in pipes:
+            if name in exclude or not hasattr(proc, &#34;rehearse&#34;):
+                continue
+            grads = {}
+            proc.rehearse(  # type: ignore[attr-defined]
+                examples, sgd=get_grads, losses=losses, **component_cfg.get(name, {})
+            )
+        for key, (W, dW) in grads.items():
+            sgd(key, W, dW)  # type: ignore[call-arg, misc]
+        return losses
+
+    def begin_training(
+        self,
+        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+        *,
+        sgd: Optional[Optimizer] = None,
+    ) -&gt; Optimizer:
+        warnings.warn(Warnings.W089, DeprecationWarning)
+        return self.initialize(get_examples, sgd=sgd)
+
+    def initialize(
+        self,
+        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+        *,
+        sgd: Optional[Optimizer] = None,
+    ) -&gt; Optimizer:
+        &#34;&#34;&#34;Initialize the pipe for training, using data examples if available.
+
+        get_examples (Callable[[], Iterable[Example]]): Optional function that
+            returns gold-standard Example objects.
+        sgd (Optional[Optimizer]): An optimizer to use for updates. If not
+            provided, will be created using the .create_optimizer() method.
+        RETURNS (thinc.api.Optimizer): The optimizer.
+
+        DOCS: https://spacy.io/api/language#initialize
+        &#34;&#34;&#34;
+        if get_examples is None:
+            util.logger.debug(
+                &#34;No &#39;get_examples&#39; callback provided to &#39;Language.initialize&#39;, creating dummy examples&#34;
+            )
+            doc = Doc(self.vocab, words=[&#34;x&#34;, &#34;y&#34;, &#34;z&#34;])
+
+            def get_examples():
+                return [Example.from_dict(doc, {})]
+
+        if not hasattr(get_examples, &#34;__call__&#34;):
+            err = Errors.E930.format(
+                method=&#34;Language.initialize&#34;, obj=type(get_examples)
+            )
+            raise TypeError(err)
+        # Make sure the config is interpolated so we can resolve subsections
+        config = self.config.interpolate()
+        # These are the settings provided in the [initialize] block in the config
+        I = registry.resolve(config[&#34;initialize&#34;], schema=ConfigSchemaInit)
+        before_init = I[&#34;before_init&#34;]
+        if before_init is not None:
+            before_init(self)
+        try:
+            init_vocab(
+                self, data=I[&#34;vocab_data&#34;], lookups=I[&#34;lookups&#34;], vectors=I[&#34;vectors&#34;]
+            )
+        except IOError:
+            raise IOError(Errors.E884.format(vectors=I[&#34;vectors&#34;]))
+        if self.vocab.vectors.shape[1] &gt;= 1:
+            ops = get_current_ops()
+            self.vocab.vectors.to_ops(ops)
+        if hasattr(self.tokenizer, &#34;initialize&#34;):
+            tok_settings = validate_init_settings(
+                self.tokenizer.initialize,  # type: ignore[union-attr]
+                I[&#34;tokenizer&#34;],
+                section=&#34;tokenizer&#34;,
+                name=&#34;tokenizer&#34;,
+            )
+            self.tokenizer.initialize(get_examples, nlp=self, **tok_settings)  # type: ignore[union-attr]
+        for name, proc in self.pipeline:
+            if isinstance(proc, ty.InitializableComponent):
+                p_settings = I[&#34;components&#34;].get(name, {})
+                p_settings = validate_init_settings(
+                    proc.initialize, p_settings, section=&#34;components&#34;, name=name
+                )
+                proc.initialize(get_examples, nlp=self, **p_settings)
+        pretrain_cfg = config.get(&#34;pretraining&#34;)
+        if pretrain_cfg:
+            P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)
+            init_tok2vec(self, P, I)
+        self._link_components()
+        self._optimizer = sgd
+        if sgd is not None:
+            self._optimizer = sgd
+        elif self._optimizer is None:
+            self._optimizer = self.create_optimizer()
+        after_init = I[&#34;after_init&#34;]
+        if after_init is not None:
+            after_init(self)
+        return self._optimizer
+
+    def resume_training(self, *, sgd: Optional[Optimizer] = None) -&gt; Optimizer:
+        &#34;&#34;&#34;Continue training a pretrained model.
+
+        Create and return an optimizer, and initialize &#34;rehearsal&#34; for any pipeline
+        component that has a .rehearse() method. Rehearsal is used to prevent
+        models from &#34;forgetting&#34; their initialized &#34;knowledge&#34;. To perform
+        rehearsal, collect samples of text you want the models to retain performance
+        on, and call nlp.rehearse() with a batch of Example objects.
+
+        RETURNS (Optimizer): The optimizer.
+
+        DOCS: https://spacy.io/api/language#resume_training
+        &#34;&#34;&#34;
+        ops = get_current_ops()
+        if self.vocab.vectors.shape[1] &gt;= 1:
+            self.vocab.vectors.to_ops(ops)
+        for name, proc in self.pipeline:
+            if hasattr(proc, &#34;_rehearsal_model&#34;):
+                proc._rehearsal_model = deepcopy(proc.model)  # type: ignore[attr-defined]
+        if sgd is not None:
+            self._optimizer = sgd
+        elif self._optimizer is None:
+            self._optimizer = self.create_optimizer()
+        return self._optimizer
+
+    def set_error_handler(
+        self,
+        error_handler: Callable[[str, PipeCallable, List[Doc], Exception], NoReturn],
+    ):
+        &#34;&#34;&#34;Set an error handler object for all the components in the pipeline
+        that implement a set_error_handler function.
+
+        error_handler (Callable[[str, Callable[[Doc], Doc], List[Doc], Exception], NoReturn]):
+            Function that deals with a failing batch of documents. This callable
+            function should take in the component&#39;s name, the component itself,
+            the offending batch of documents, and the exception that was thrown.
+        DOCS: https://spacy.io/api/language#set_error_handler
+        &#34;&#34;&#34;
+        self.default_error_handler = error_handler
+        for name, pipe in self.pipeline:
+            if hasattr(pipe, &#34;set_error_handler&#34;):
+                pipe.set_error_handler(error_handler)
+
+    def evaluate(
+        self,
+        examples: Iterable[Example],
+        *,
+        batch_size: Optional[int] = None,
+        scorer: Optional[Scorer] = None,
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        scorer_cfg: Optional[Dict[str, Any]] = None,
+        per_component: bool = False,
+    ) -&gt; Dict[str, Any]:
+        &#34;&#34;&#34;Evaluate a model&#39;s pipeline components.
+
+        examples (Iterable[Example]): `Example` objects.
+        batch_size (Optional[int]): Batch size to use.
+        scorer (Optional[Scorer]): Scorer to use. If not passed in, a new one
+            will be created.
+        component_cfg (dict): An optional dictionary with extra keyword
+            arguments for specific components.
+        scorer_cfg (dict): An optional dictionary with extra keyword arguments
+            for the scorer.
+        per_component (bool): Whether to return the scores keyed by component
+            name. Defaults to False.
+
+        RETURNS (Scorer): The scorer containing the evaluation results.
+
+        DOCS: https://spacy.io/api/language#evaluate
+        &#34;&#34;&#34;
+        examples = list(examples)
+        validate_examples(examples, &#34;Language.evaluate&#34;)
+        examples = _copy_examples(examples)
+        if batch_size is None:
+            batch_size = self.batch_size
+        if component_cfg is None:
+            component_cfg = {}
+        if scorer_cfg is None:
+            scorer_cfg = {}
+        if scorer is None:
+            kwargs = dict(scorer_cfg)
+            kwargs.setdefault(&#34;nlp&#34;, self)
+            scorer = Scorer(**kwargs)
+        # reset annotation in predicted docs and time tokenization
+        start_time = timer()
+        # this is purely for timing
+        for eg in examples:
+            self.make_doc(eg.reference.text)
+        # apply all pipeline components
+        docs = self.pipe(
+            (eg.predicted for eg in examples),
+            batch_size=batch_size,
+            component_cfg=component_cfg,
+        )
+        for eg, doc in zip(examples, docs):
+            eg.predicted = doc
+        end_time = timer()
+        results = scorer.score(examples, per_component=per_component)
+        n_words = sum(len(eg.predicted) for eg in examples)
+        results[&#34;speed&#34;] = n_words / (end_time - start_time)
+        return _replace_numpy_floats(results)
+
+    def create_optimizer(self):
+        &#34;&#34;&#34;Create an optimizer, usually using the [training.optimizer] config.&#34;&#34;&#34;
+        subconfig = {&#34;optimizer&#34;: self.config[&#34;training&#34;][&#34;optimizer&#34;]}
+        return registry.resolve(subconfig)[&#34;optimizer&#34;]
+
+    @contextmanager
+    def use_params(self, params: Optional[dict]):
+        &#34;&#34;&#34;Replace weights of models in the pipeline with those provided in the
+        params dictionary. Can be used as a contextmanager, in which case,
+        models go back to their original weights after the block.
+
+        params (dict): A dictionary of parameters keyed by model ID.
+
+        EXAMPLE:
+            &gt;&gt;&gt; with nlp.use_params(optimizer.averages):
+            &gt;&gt;&gt;     nlp.to_disk(&#34;/tmp/checkpoint&#34;)
+
+        DOCS: https://spacy.io/api/language#use_params
+        &#34;&#34;&#34;
+        if not params:
+            yield
+        else:
+            contexts = [
+                pipe.use_params(params)  # type: ignore[attr-defined]
+                for name, pipe in self.pipeline
+                if hasattr(pipe, &#34;use_params&#34;) and hasattr(pipe, &#34;model&#34;)
+            ]
+            # TODO: Having trouble with contextlib
+            # Workaround: these aren&#39;t actually context managers atm.
+            for context in contexts:
+                try:
+                    next(context)
+                except StopIteration:
+                    pass
+            yield
+            for context in contexts:
+                try:
+                    next(context)
+                except StopIteration:
+                    pass
+
+    @overload
+    def pipe(
+        self,
+        texts: Iterable[Union[str, Doc]],
+        *,
+        as_tuples: Literal[False] = ...,
+        batch_size: Optional[int] = ...,
+        disable: Iterable[str] = ...,
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = ...,
+        n_process: int = ...,
+    ) -&gt; Iterator[Doc]:
+        ...
+
+    @overload
+    def pipe(  # noqa: F811
+        self,
+        texts: Iterable[Tuple[Union[str, Doc], _AnyContext]],
+        *,
+        as_tuples: Literal[True] = ...,
+        batch_size: Optional[int] = ...,
+        disable: Iterable[str] = ...,
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = ...,
+        n_process: int = ...,
+    ) -&gt; Iterator[Tuple[Doc, _AnyContext]]:
+        ...
+
+    def pipe(  # noqa: F811
+        self,
+        texts: Union[
+            Iterable[Union[str, Doc]], Iterable[Tuple[Union[str, Doc], _AnyContext]]
+        ],
+        *,
+        as_tuples: bool = False,
+        batch_size: Optional[int] = None,
+        disable: Iterable[str] = SimpleFrozenList(),
+        component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        n_process: int = 1,
+    ) -&gt; Union[Iterator[Doc], Iterator[Tuple[Doc, _AnyContext]]]:
+        &#34;&#34;&#34;Process texts as a stream, and yield `Doc` objects in order.
+
+        texts (Iterable[Union[str, Doc]]): A sequence of texts or docs to
+            process.
+        as_tuples (bool): If set to True, inputs should be a sequence of
+            (text, context) tuples. Output will then be a sequence of
+            (doc, context) tuples. Defaults to False.
+        batch_size (Optional[int]): The number of texts to buffer.
+        disable (List[str]): Names of the pipeline components to disable.
+        component_cfg (Dict[str, Dict]): An optional dictionary with extra keyword
+            arguments for specific components.
+        n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
+        YIELDS (Doc): Documents in the order of the original text.
+
+        DOCS: https://spacy.io/api/language#pipe
+        &#34;&#34;&#34;
+        if as_tuples:
+            texts = cast(Iterable[Tuple[Union[str, Doc], _AnyContext]], texts)
+            docs_with_contexts = (
+                self._ensure_doc_with_context(text, context) for text, context in texts
+            )
+            docs = self.pipe(
+                docs_with_contexts,
+                batch_size=batch_size,
+                disable=disable,
+                n_process=n_process,
+                component_cfg=component_cfg,
+            )
+            for doc in docs:
+                context = doc._context
+                doc._context = None
+                yield (doc, context)
+            return
+
+        texts = cast(Iterable[Union[str, Doc]], texts)
+
+        # Set argument defaults
+        if n_process == -1:
+            n_process = mp.cpu_count()
+        if component_cfg is None:
+            component_cfg = {}
+        if batch_size is None:
+            batch_size = self.batch_size
+
+        pipes = (
+            []
+        )  # contains functools.partial objects to easily create multiprocess worker.
+        for name, proc in self.pipeline:
+            if name in disable:
+                continue
+            kwargs = component_cfg.get(name, {})
+            # Allow component_cfg to overwrite the top-level kwargs.
+            kwargs.setdefault(&#34;batch_size&#34;, batch_size)
+            f = functools.partial(
+                _pipe,
+                proc=proc,
+                name=name,
+                kwargs=kwargs,
+                default_error_handler=self.default_error_handler,
+            )
+            pipes.append(f)
+
+        if n_process != 1:
+            if self._has_gpu_model(disable):
+                warnings.warn(Warnings.W114)
+
+            docs = self._multiprocessing_pipe(texts, pipes, n_process, batch_size)
+        else:
+            # if n_process == 1, no processes are forked.
+            docs = (self._ensure_doc(text) for text in texts)
+            for pipe in pipes:
+                docs = pipe(docs)
+        for doc in docs:
+            yield doc
+
+    def _has_gpu_model(self, disable: Iterable[str]):
+        for name, proc in self.pipeline:
+            is_trainable = hasattr(proc, &#34;is_trainable&#34;) and proc.is_trainable  # type: ignore
+            if name in disable or not is_trainable:
+                continue
+
+            if hasattr(proc, &#34;model&#34;) and hasattr(proc.model, &#34;ops&#34;) and isinstance(proc.model.ops, CupyOps):  # type: ignore
+                return True
+
+        return False
+
+    def _multiprocessing_pipe(
+        self,
+        texts: Iterable[Union[str, Doc]],
+        pipes: Iterable[Callable[..., Iterator[Doc]]],
+        n_process: int,
+        batch_size: int,
+    ) -&gt; Iterator[Doc]:
+        def prepare_input(
+            texts: Iterable[Union[str, Doc]]
+        ) -&gt; Iterable[Tuple[Union[str, bytes], _AnyContext]]:
+            # Serialize Doc inputs to bytes to avoid incurring pickling
+            # overhead when they are passed to child processes. Also yield
+            # any context objects they might have separately (as they are not serialized).
+            for doc_like in texts:
+                if isinstance(doc_like, Doc):
+                    yield (doc_like.to_bytes(), cast(_AnyContext, doc_like._context))
+                else:
+                    yield (doc_like, cast(_AnyContext, None))
+
+        serialized_texts_with_ctx = prepare_input(texts)  # type: ignore
+        # raw_texts is used later to stop iteration.
+        texts, raw_texts = itertools.tee(serialized_texts_with_ctx)  # type: ignore
+        # for sending texts to worker
+        texts_q: List[mp.Queue] = [mp.Queue() for _ in range(n_process)]
+        # for receiving byte-encoded docs from worker
+        bytedocs_recv_ch, bytedocs_send_ch = zip(
+            *[mp.Pipe(False) for _ in range(n_process)]
+        )
+
+        batch_texts = util.minibatch(texts, batch_size)
+        # Sender sends texts to the workers.
+        # This is necessary to properly handle infinite length of texts.
+        # (In this case, all data cannot be sent to the workers at once)
+        sender = _Sender(batch_texts, texts_q, chunk_size=n_process)
+        # send twice to make process busy
+        sender.send()
+        sender.send()
+
+        procs = [
+            mp.Process(
+                target=_apply_pipes,
+                args=(
+                    self._ensure_doc_with_context,
+                    pipes,
+                    rch,
+                    sch,
+                    Underscore.get_state(),
+                ),
+            )
+            for rch, sch in zip(texts_q, bytedocs_send_ch)
+        ]
+        for proc in procs:
+            proc.start()
+
+        # Close writing-end of channels. This is needed to avoid that reading
+        # from the channel blocks indefinitely when the worker closes the
+        # channel.
+        for tx in bytedocs_send_ch:
+            tx.close()
+
+        # Cycle channels not to break the order of docs.
+        # The received object is a batch of byte-encoded docs, so flatten them with chain.from_iterable.
+        byte_tuples = chain.from_iterable(
+            recv.recv() for recv in cycle(bytedocs_recv_ch)
+        )
+        try:
+            for i, (_, (byte_doc, context, byte_error)) in enumerate(
+                zip(raw_texts, byte_tuples), 1
+            ):
+                if byte_doc is not None:
+                    doc = Doc(self.vocab).from_bytes(byte_doc)
+                    doc._context = context
+                    yield doc
+                elif byte_error is not None:
+                    error = srsly.msgpack_loads(byte_error)
+                    self.default_error_handler(
+                        None, None, None, ValueError(Errors.E871.format(error=error))
+                    )
+                if i % batch_size == 0:
+                    # tell `sender` that one batch was consumed.
+                    sender.step()
+        finally:
+            # If we are stopping in an orderly fashion, the workers&#39; queues
+            # are empty. Put the sentinel in their queues to signal that work
+            # is done, so that they can exit gracefully.
+            for q in texts_q:
+                q.put(_WORK_DONE_SENTINEL)
+                q.close()
+
+            # Otherwise, we are stopping because the error handler raised an
+            # exception. The sentinel will be last to go out of the queue.
+            # To avoid doing unnecessary work or hanging on platforms that
+            # block on sending (Windows), we&#39;ll close our end of the channel.
+            # This signals to the worker that it can exit the next time it
+            # attempts to send data down the channel.
+            for r in bytedocs_recv_ch:
+                r.close()
+
+            for proc in procs:
+                proc.join()
+
+            if not all(proc.exitcode == 0 for proc in procs):
+                warnings.warn(Warnings.W127)
+
+    def _link_components(self) -&gt; None:
+        &#34;&#34;&#34;Register &#39;listeners&#39; within pipeline components, to allow them to
+        effectively share weights.
+        &#34;&#34;&#34;
+        # I had thought, &#34;Why do we do this inside the Language object? Shouldn&#39;t
+        # it be the tok2vec/transformer/etc&#39;s job?
+        # The problem is we need to do it during deserialization...And the
+        # components don&#39;t receive the pipeline then. So this does have to be
+        # here :(
+        # First, fix up all the internal component names in case they have
+        # gotten out of sync due to sourcing components from different
+        # pipelines, since find_listeners uses proc2.name for the listener
+        # map.
+        for name, proc in self.pipeline:
+            if hasattr(proc, &#34;name&#34;):
+                proc.name = name
+        for i, (name1, proc1) in enumerate(self.pipeline):
+            if isinstance(proc1, ty.ListenedToComponent):
+                proc1.listener_map = {}
+                for name2, proc2 in self.pipeline[i + 1 :]:
+                    proc1.find_listeners(proc2)
+
+    @classmethod
+    def from_config(
+        cls,
+        config: Union[Dict[str, Any], Config] = {},
+        *,
+        vocab: Union[Vocab, bool] = True,
+        disable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
+        enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
+        exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
+        meta: Dict[str, Any] = SimpleFrozenDict(),
+        auto_fill: bool = True,
+        validate: bool = True,
+    ) -&gt; &#34;Language&#34;:
+        &#34;&#34;&#34;Create the nlp object from a loaded config. Will set up the tokenizer
+        and language data, add pipeline components etc. If no config is provided,
+        the default config of the given language is used.
+
+        config (Dict[str, Any] / Config): The loaded config.
+        vocab (Vocab): A Vocab object. If True, a vocab is created.
+        disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
+            Disabled pipes will be loaded but they won&#39;t be run unless you
+            explicitly enable them by calling nlp.enable_pipe.
+        enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
+            pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
+        exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
+            Excluded components won&#39;t be loaded.
+        meta (Dict[str, Any]): Meta overrides for nlp.meta.
+        auto_fill (bool): Automatically fill in missing values in config based
+            on defaults and function argument annotations.
+        validate (bool): Validate the component config and arguments against
+            the types expected by the factory.
+        RETURNS (Language): The initialized Language class.
+
+        DOCS: https://spacy.io/api/language#from_config
+        &#34;&#34;&#34;
+        if auto_fill:
+            config = Config(
+                cls.default_config, section_order=CONFIG_SECTION_ORDER
+            ).merge(config)
+        if &#34;nlp&#34; not in config:
+            raise ValueError(Errors.E985.format(config=config))
+        # fill in [nlp.vectors] if not present (as a narrower alternative to
+        # auto-filling [nlp] from the default config)
+        if &#34;vectors&#34; not in config[&#34;nlp&#34;]:
+            config[&#34;nlp&#34;][&#34;vectors&#34;] = {&#34;@vectors&#34;: &#34;spacy.Vectors.v1&#34;}
+        config_lang = config[&#34;nlp&#34;].get(&#34;lang&#34;)
+        if config_lang is not None and config_lang != cls.lang:
+            raise ValueError(
+                Errors.E958.format(
+                    bad_lang_code=config[&#34;nlp&#34;][&#34;lang&#34;],
+                    lang_code=cls.lang,
+                    lang=util.get_object_name(cls),
+                )
+            )
+        config[&#34;nlp&#34;][&#34;lang&#34;] = cls.lang
+        # This isn&#39;t very elegant, but we remove the [components] block here to prevent
+        # it from getting resolved (causes problems because we expect to pass in
+        # the nlp and name args for each component). If we&#39;re auto-filling, we&#39;re
+        # using the nlp.config with all defaults.
+        config = util.copy_config(config)
+        orig_pipeline = config.pop(&#34;components&#34;, {})
+        orig_pretraining = config.pop(&#34;pretraining&#34;, None)
+        config[&#34;components&#34;] = {}
+        if auto_fill:
+            filled = registry.fill(config, validate=validate, schema=ConfigSchema)
+        else:
+            filled = config
+        filled[&#34;components&#34;] = orig_pipeline
+        config[&#34;components&#34;] = orig_pipeline
+        if orig_pretraining is not None:
+            filled[&#34;pretraining&#34;] = orig_pretraining
+            config[&#34;pretraining&#34;] = orig_pretraining
+        resolved_nlp = registry.resolve(
+            filled[&#34;nlp&#34;], validate=validate, schema=ConfigSchemaNlp
+        )
+        create_tokenizer = resolved_nlp[&#34;tokenizer&#34;]
+        create_vectors = resolved_nlp[&#34;vectors&#34;]
+        before_creation = resolved_nlp[&#34;before_creation&#34;]
+        after_creation = resolved_nlp[&#34;after_creation&#34;]
+        after_pipeline_creation = resolved_nlp[&#34;after_pipeline_creation&#34;]
+        lang_cls = cls
+        if before_creation is not None:
+            lang_cls = before_creation(cls)
+            if (
+                not isinstance(lang_cls, type)
+                or not issubclass(lang_cls, cls)
+                or lang_cls is not cls
+            ):
+                raise ValueError(Errors.E943.format(value=type(lang_cls)))
+
+        # Warn about require_gpu usage in jupyter notebook
+        warn_if_jupyter_cupy()
+
+        # Note that we don&#39;t load vectors here, instead they get loaded explicitly
+        # inside stuff like the spacy train function. If we loaded them here,
+        # then we would load them twice at runtime: once when we make from config,
+        # and then again when we load from disk.
+        nlp = lang_cls(
+            vocab=vocab,
+            create_tokenizer=create_tokenizer,
+            create_vectors=create_vectors,
+            meta=meta,
+        )
+        if after_creation is not None:
+            nlp = after_creation(nlp)
+            if not isinstance(nlp, cls):
+                raise ValueError(Errors.E942.format(name=&#34;creation&#34;, value=type(nlp)))
+        # To create the components we need to use the final interpolated config
+        # so all values are available (if component configs use variables).
+        # Later we replace the component config with the raw config again.
+        interpolated = filled.interpolate() if not filled.is_interpolated else filled
+        pipeline = interpolated.get(&#34;components&#34;, {})
+        # If components are loaded from a source (existing models), we cache
+        # them here so they&#39;re only loaded once
+        source_nlps = {}
+        source_nlp_vectors_hashes = {}
+        vocab_b = None
+        for pipe_name in config[&#34;nlp&#34;][&#34;pipeline&#34;]:
+            if pipe_name not in pipeline:
+                opts = &#34;, &#34;.join(pipeline.keys())
+                raise ValueError(Errors.E956.format(name=pipe_name, opts=opts))
+            pipe_cfg = util.copy_config(pipeline[pipe_name])
+            raw_config = Config(filled[&#34;components&#34;][pipe_name])
+            if pipe_name not in exclude:
+                if &#34;factory&#34; not in pipe_cfg and &#34;source&#34; not in pipe_cfg:
+                    err = Errors.E984.format(name=pipe_name, config=pipe_cfg)
+                    raise ValueError(err)
+                if &#34;factory&#34; in pipe_cfg:
+                    factory = pipe_cfg.pop(&#34;factory&#34;)
+                    # The pipe name (key in the config) here is the unique name
+                    # of the component, not necessarily the factory
+                    nlp.add_pipe(
+                        factory,
+                        name=pipe_name,
+                        config=pipe_cfg,
+                        validate=validate,
+                        raw_config=raw_config,
+                    )
+                else:
+                    assert &#34;source&#34; in pipe_cfg
+                    # We need the sourced components to reference the same
+                    # vocab without modifying the current vocab state **AND**
+                    # we still want to load the source model vectors to perform
+                    # the vectors check. Since the source vectors clobber the
+                    # current ones, we save the original vocab state and
+                    # restore after this loop. Existing strings are preserved
+                    # during deserialization, so they do not need any
+                    # additional handling.
+                    if vocab_b is None:
+                        vocab_b = nlp.vocab.to_bytes(exclude=[&#34;lookups&#34;, &#34;strings&#34;])
+                    model = pipe_cfg[&#34;source&#34;]
+                    if model not in source_nlps:
+                        # Load with the same vocab, adding any strings
+                        source_nlps[model] = util.load_model(
+                            model, vocab=nlp.vocab, exclude=[&#34;lookups&#34;]
+                        )
+                    source_name = pipe_cfg.get(&#34;component&#34;, pipe_name)
+                    listeners_replaced = False
+                    if &#34;replace_listeners&#34; in pipe_cfg:
+                        # Make sure that the listened-to component has the
+                        # state of the source pipeline listener map so that the
+                        # replace_listeners method below works as intended.
+                        source_nlps[model]._link_components()
+                        for name, proc in source_nlps[model].pipeline:
+                            if source_name in getattr(proc, &#34;listening_components&#34;, []):
+                                source_nlps[model].replace_listeners(
+                                    name, source_name, pipe_cfg[&#34;replace_listeners&#34;]
+                                )
+                                listeners_replaced = True
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(&#34;ignore&#34;, message=&#34;\\[W113\\]&#34;)
+                        nlp.add_pipe(
+                            source_name, source=source_nlps[model], name=pipe_name
+                        )
+                        # At this point after nlp.add_pipe, the listener map
+                        # corresponds to the new pipeline.
+                    if model not in source_nlp_vectors_hashes:
+                        source_nlp_vectors_hashes[model] = hash(
+                            source_nlps[model].vocab.vectors.to_bytes(
+                                exclude=[&#34;strings&#34;]
+                            )
+                        )
+                    if &#34;_sourced_vectors_hashes&#34; not in nlp.meta:
+                        nlp.meta[&#34;_sourced_vectors_hashes&#34;] = {}
+                    nlp.meta[&#34;_sourced_vectors_hashes&#34;][
+                        pipe_name
+                    ] = source_nlp_vectors_hashes[model]
+                    # Delete from cache if listeners were replaced
+                    if listeners_replaced:
+                        del source_nlps[model]
+        # Restore the original vocab after sourcing if necessary
+        if vocab_b is not None:
+            nlp.vocab.from_bytes(vocab_b)
+
+        # Resolve disabled/enabled settings.
+        if isinstance(disable, str):
+            disable = [disable]
+        if isinstance(enable, str):
+            enable = [enable]
+        if isinstance(exclude, str):
+            exclude = [exclude]
+
+        # `enable` should not be merged with `enabled` (the opposite is true for `disable`/`disabled`). If the config
+        # specifies values for `enabled` not included in `enable`, emit warning.
+        if id(enable) != id(_DEFAULT_EMPTY_PIPES):
+            enabled = config[&#34;nlp&#34;].get(&#34;enabled&#34;, [])
+            if len(enabled) and not set(enabled).issubset(enable):
+                warnings.warn(
+                    Warnings.W123.format(
+                        enable=enable,
+                        enabled=enabled,
+                    )
+                )
+
+        # Ensure sets of disabled/enabled pipe names are not contradictory.
+        disabled_pipes = cls._resolve_component_status(
+            list({*disable, *config[&#34;nlp&#34;].get(&#34;disabled&#34;, [])}),
+            enable,
+            config[&#34;nlp&#34;][&#34;pipeline&#34;],
+        )
+        nlp._disabled = set(p for p in disabled_pipes if p not in exclude)
+
+        nlp.batch_size = config[&#34;nlp&#34;][&#34;batch_size&#34;]
+        nlp.config = filled if auto_fill else config
+        if after_pipeline_creation is not None:
+            nlp = after_pipeline_creation(nlp)
+            if not isinstance(nlp, cls):
+                raise ValueError(
+                    Errors.E942.format(name=&#34;pipeline_creation&#34;, value=type(nlp))
+                )
+        return nlp
+
+    def replace_listeners(
+        self,
+        tok2vec_name: str,
+        pipe_name: str,
+        listeners: Iterable[str],
+    ) -&gt; None:
+        &#34;&#34;&#34;Find listener layers (connecting to a token-to-vector embedding
+        component) of a given pipeline component model and replace
+        them with a standalone copy of the token-to-vector layer. This can be
+        useful when training a pipeline with components sourced from an existing
+        pipeline: if multiple components (e.g. tagger, parser, NER) listen to
+        the same tok2vec component, but some of them are frozen and not updated,
+        their performance may degrade significantly as the tok2vec component is
+        updated with new data. To prevent this, listeners can be replaced with
+        a standalone tok2vec layer that is owned by the component and doesn&#39;t
+        change if the component isn&#39;t updated.
+
+        tok2vec_name (str): Name of the token-to-vector component, typically
+            &#34;tok2vec&#34; or &#34;transformer&#34;.
+        pipe_name (str): Name of pipeline component to replace listeners for.
+        listeners (Iterable[str]): The paths to the listeners, relative to the
+            component config, e.g. [&#34;model.tok2vec&#34;]. Typically, implementations
+            will only connect to one tok2vec component, [model.tok2vec], but in
+            theory, custom models can use multiple listeners. The value here can
+            either be an empty list to not replace any listeners, or a complete
+            (!) list of the paths to all listener layers used by the model.
+
+        DOCS: https://spacy.io/api/language#replace_listeners
+        &#34;&#34;&#34;
+        if tok2vec_name not in self.pipe_names:
+            err = Errors.E889.format(
+                tok2vec=tok2vec_name,
+                name=pipe_name,
+                unknown=tok2vec_name,
+                opts=&#34;, &#34;.join(self.pipe_names),
+            )
+            raise ValueError(err)
+        if pipe_name not in self.pipe_names:
+            err = Errors.E889.format(
+                tok2vec=tok2vec_name,
+                name=pipe_name,
+                unknown=pipe_name,
+                opts=&#34;, &#34;.join(self.pipe_names),
+            )
+            raise ValueError(err)
+        tok2vec = self.get_pipe(tok2vec_name)
+        tok2vec_cfg = self.get_pipe_config(tok2vec_name)
+        if not isinstance(tok2vec, ty.ListenedToComponent):
+            raise ValueError(Errors.E888.format(name=tok2vec_name, pipe=type(tok2vec)))
+        tok2vec_model = tok2vec.model
+        pipe_listeners = tok2vec.listener_map.get(pipe_name, [])
+        pipe = self.get_pipe(pipe_name)
+        pipe_cfg = self._pipe_configs[pipe_name]
+        if listeners:
+            util.logger.debug(&#34;Replacing listeners of component &#39;%s&#39;&#34;, pipe_name)
+            if len(list(listeners)) != len(pipe_listeners):
+                # The number of listeners defined in the component model doesn&#39;t
+                # match the listeners to replace, so we won&#39;t be able to update
+                # the nodes and generate a matching config
+                err = Errors.E887.format(
+                    name=pipe_name,
+                    tok2vec=tok2vec_name,
+                    paths=listeners,
+                    n_listeners=len(pipe_listeners),
+                )
+                raise ValueError(err)
+            # Update the config accordingly by copying the tok2vec model to all
+            # sections defined in the listener paths
+            for listener_path in listeners:
+                # Check if the path actually exists in the config
+                try:
+                    util.dot_to_object(pipe_cfg, listener_path)
+                except KeyError:
+                    err = Errors.E886.format(
+                        name=pipe_name, tok2vec=tok2vec_name, path=listener_path
+                    )
+                    raise ValueError(err)
+                new_config = tok2vec_cfg[&#34;model&#34;]
+                if &#34;replace_listener_cfg&#34; in tok2vec_model.attrs:
+                    replace_func = tok2vec_model.attrs[&#34;replace_listener_cfg&#34;]
+                    new_config = replace_func(
+                        tok2vec_cfg[&#34;model&#34;], pipe_cfg[&#34;model&#34;][&#34;tok2vec&#34;]
+                    )
+                util.set_dot_to_object(pipe_cfg, listener_path, new_config)
+            # Go over the listener layers and replace them
+            for listener in pipe_listeners:
+                new_model = tok2vec_model.copy()
+                replace_listener_func = tok2vec_model.attrs.get(&#34;replace_listener&#34;)
+                if replace_listener_func is not None:
+                    # Pass the extra args to the callback without breaking compatibility with
+                    # old library versions that only expect a single parameter.
+                    num_params = len(
+                        inspect.signature(replace_listener_func).parameters
+                    )
+                    if num_params == 1:
+                        new_model = replace_listener_func(new_model)
+                    elif num_params == 3:
+                        new_model = replace_listener_func(new_model, listener, tok2vec)
+                    else:
+                        raise ValueError(Errors.E1055.format(num_params=num_params))
+
+                util.replace_model_node(pipe.model, listener, new_model)  # type: ignore[attr-defined]
+                tok2vec.remove_listener(listener, pipe_name)
+
+    @contextmanager
+    def memory_zone(self, mem: Optional[Pool] = None) -&gt; Iterator[Pool]:
+        &#34;&#34;&#34;Begin a block where all resources allocated during the block will
+        be freed at the end of it. If a resources was created within the
+        memory zone block, accessing it outside the block is invalid.
+        Behaviour of this invalid access is undefined. Memory zones should
+        not be nested.
+
+        The memory zone is helpful for services that need to process large
+        volumes of text with a defined memory budget.
+
+        Example
+        -------
+        &gt;&gt;&gt; with nlp.memory_zone():
+        ...     for doc in nlp.pipe(texts):
+        ...        process_my_doc(doc)
+        &gt;&gt;&gt; # use_doc(doc) &lt;-- Invalid: doc was allocated in the memory zone
+        &#34;&#34;&#34;
+        if mem is None:
+            mem = Pool()
+        # The ExitStack allows programmatic nested context managers.
+        # We don&#39;t know how many we need, so it would be awkward to have
+        # them as nested blocks.
+        with ExitStack() as stack:
+            contexts = [stack.enter_context(self.vocab.memory_zone(mem))]
+            if hasattr(self.tokenizer, &#34;memory_zone&#34;):
+                contexts.append(stack.enter_context(self.tokenizer.memory_zone(mem)))
+            for _, pipe in self.pipeline:
+                if hasattr(pipe, &#34;memory_zone&#34;):
+                    contexts.append(stack.enter_context(pipe.memory_zone(mem)))
+            yield mem
+
+    def to_disk(
+        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
+    ) -&gt; None:
+        &#34;&#34;&#34;Save the current state to a directory.  If a model is loaded, this
+        will include the model.
+
+        path (str / Path): Path to a directory, which will be created if
+            it doesn&#39;t exist.
+        exclude (Iterable[str]): Names of components or serialization fields to exclude.
+
+        DOCS: https://spacy.io/api/language#to_disk
+        &#34;&#34;&#34;
+        path = util.ensure_path(path)
+        serializers = {}
+        serializers[&#34;tokenizer&#34;] = lambda p: self.tokenizer.to_disk(  # type: ignore[union-attr]
+            p, exclude=[&#34;vocab&#34;]
+        )
+        serializers[&#34;meta.json&#34;] = lambda p: srsly.write_json(
+            p, _replace_numpy_floats(self.meta)
+        )
+        serializers[&#34;config.cfg&#34;] = lambda p: self.config.to_disk(p)
+        for name, proc in self._components:
+            if name in exclude:
+                continue
+            if not hasattr(proc, &#34;to_disk&#34;):
+                continue
+            serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=[&#34;vocab&#34;])  # type: ignore[misc]
+        serializers[&#34;vocab&#34;] = lambda p: self.vocab.to_disk(p, exclude=exclude)
+        util.to_disk(path, serializers, exclude)
+
+    @staticmethod
+    def _resolve_component_status(
+        disable: Union[str, Iterable[str]],
+        enable: Union[str, Iterable[str]],
+        pipe_names: Iterable[str],
+    ) -&gt; Tuple[str, ...]:
+        &#34;&#34;&#34;Derives whether (1) `disable` and `enable` values are consistent and (2)
+        resolves those to a single set of disabled components. Raises an error in
+        case of inconsistency.
+
+        disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable.
+        enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable.
+        pipe_names (Iterable[str]): Names of all pipeline components.
+
+        RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
+                                   specified includes and excludes.
+        &#34;&#34;&#34;
+
+        if isinstance(disable, str):
+            disable = [disable]
+        to_disable = disable
+
+        if enable:
+            if isinstance(enable, str):
+                enable = [enable]
+            to_disable = {
+                *[pipe_name for pipe_name in pipe_names if pipe_name not in enable],
+                *disable,
+            }
+            # If any pipe to be enabled is in to_disable, the specification is inconsistent.
+            if len(set(enable) &amp; to_disable):
+                raise ValueError(Errors.E1042.format(enable=enable, disable=disable))
+
+        return tuple(to_disable)
+
+    def from_disk(
+        self,
+        path: Union[str, Path],
+        *,
+        exclude: Iterable[str] = SimpleFrozenList(),
+        overrides: Dict[str, Any] = SimpleFrozenDict(),
+    ) -&gt; &#34;Language&#34;:
+        &#34;&#34;&#34;Loads state from a directory. Modifies the object in place and
+        returns it. If the saved `Language` object contains a model, the
+        model will be loaded.
+
+        path (str / Path): A path to a directory.
+        exclude (Iterable[str]): Names of components or serialization fields to exclude.
+        RETURNS (Language): The modified `Language` object.
+
+        DOCS: https://spacy.io/api/language#from_disk
+        &#34;&#34;&#34;
+
+        def deserialize_meta(path: Path) -&gt; None:
+            if path.exists():
+                data = srsly.read_json(path)
+                self.meta.update(data)
+                # self.meta always overrides meta[&#34;vectors&#34;] with the metadata
+                # from self.vocab.vectors, so set the name directly
+                self.vocab.vectors.name = data.get(&#34;vectors&#34;, {}).get(&#34;name&#34;)
+
+        def deserialize_vocab(path: Path) -&gt; None:
+            if path.exists():
+                self.vocab.from_disk(path, exclude=exclude)
+
+        path = util.ensure_path(path)
+        deserializers = {}
+        if Path(path / &#34;config.cfg&#34;).exists():  # type: ignore[operator]
+            deserializers[&#34;config.cfg&#34;] = lambda p: self.config.from_disk(
+                p, interpolate=False, overrides=overrides
+            )
+        deserializers[&#34;meta.json&#34;] = deserialize_meta  # type: ignore[assignment]
+        deserializers[&#34;vocab&#34;] = deserialize_vocab  # type: ignore[assignment]
+        deserializers[&#34;tokenizer&#34;] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]
+            p, exclude=[&#34;vocab&#34;]
+        )
+        for name, proc in self._components:
+            if name in exclude:
+                continue
+            if not hasattr(proc, &#34;from_disk&#34;):
+                continue
+            deserializers[name] = lambda p, proc=proc: proc.from_disk(  # type: ignore[misc]
+                p, exclude=[&#34;vocab&#34;]
+            )
+        if not (path / &#34;vocab&#34;).exists() and &#34;vocab&#34; not in exclude:  # type: ignore[operator]
+            # Convert to list here in case exclude is (default) tuple
+            exclude = list(exclude) + [&#34;vocab&#34;]
+        util.from_disk(path, deserializers, exclude)  # type: ignore[arg-type]
+        self._path = path  # type: ignore[assignment]
+        self._link_components()
+        return self
+
+    def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -&gt; bytes:
+        &#34;&#34;&#34;Serialize the current state to a binary string.
+
+        exclude (Iterable[str]): Names of components or serialization fields to exclude.
+        RETURNS (bytes): The serialized form of the `Language` object.
+
+        DOCS: https://spacy.io/api/language#to_bytes
+        &#34;&#34;&#34;
+        serializers: Dict[str, Callable[[], bytes]] = {}
+        serializers[&#34;vocab&#34;] = lambda: self.vocab.to_bytes(exclude=exclude)
+        serializers[&#34;tokenizer&#34;] = lambda: self.tokenizer.to_bytes(exclude=[&#34;vocab&#34;])  # type: ignore[union-attr]
+        serializers[&#34;meta.json&#34;] = lambda: srsly.json_dumps(
+            _replace_numpy_floats(self.meta)
+        )
+        serializers[&#34;config.cfg&#34;] = lambda: self.config.to_bytes()
+        for name, proc in self._components:
+            if name in exclude:
+                continue
+            if not hasattr(proc, &#34;to_bytes&#34;):
+                continue
+            serializers[name] = lambda proc=proc: proc.to_bytes(exclude=[&#34;vocab&#34;])  # type: ignore[misc]
+        return util.to_bytes(serializers, exclude)
+
+    def from_bytes(
+        self, bytes_data: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
+    ) -&gt; &#34;Language&#34;:
+        &#34;&#34;&#34;Load state from a binary string.
+
+        bytes_data (bytes): The data to load from.
+        exclude (Iterable[str]): Names of components or serialization fields to exclude.
+        RETURNS (Language): The `Language` object.
+
+        DOCS: https://spacy.io/api/language#from_bytes
+        &#34;&#34;&#34;
+
+        def deserialize_meta(b):
+            data = srsly.json_loads(b)
+            self.meta.update(data)
+            # self.meta always overrides meta[&#34;vectors&#34;] with the metadata
+            # from self.vocab.vectors, so set the name directly
+            self.vocab.vectors.name = data.get(&#34;vectors&#34;, {}).get(&#34;name&#34;)
+
+        deserializers: Dict[str, Callable[[bytes], Any]] = {}
+        deserializers[&#34;config.cfg&#34;] = lambda b: self.config.from_bytes(
+            b, interpolate=False
+        )
+        deserializers[&#34;meta.json&#34;] = deserialize_meta
+        deserializers[&#34;vocab&#34;] = lambda b: self.vocab.from_bytes(b, exclude=exclude)
+        deserializers[&#34;tokenizer&#34;] = lambda b: self.tokenizer.from_bytes(  # type: ignore[union-attr]
+            b, exclude=[&#34;vocab&#34;]
+        )
+        for name, proc in self._components:
+            if name in exclude:
+                continue
+            if not hasattr(proc, &#34;from_bytes&#34;):
+                continue
+            deserializers[name] = lambda b, proc=proc: proc.from_bytes(  # type: ignore[misc]
+                b, exclude=[&#34;vocab&#34;]
+            )
+        util.from_bytes(bytes_data, deserializers, exclude)
+        self._link_components()
+        return self</code></pre>
+</details>
+<div class="desc"><p>A text-processing pipeline. Usually you'll load this once per process,
+and pass the instance around your application.</p>
+<p>Defaults (class): Settings, data and factory methods for creating the <code>nlp</code>
+object and processing pipeline.
+lang (str): IETF language code, such as 'en'.</p>
+<p>DOCS: <a href="https://spacy.io/api/language">https://spacy.io/api/language</a></p>
+<p>Initialise a Language object.</p>
+<p>vocab (Vocab): A <code>Vocab</code> object. If <code>True</code>, a vocab is created.
+meta (dict): Custom meta data for the Language class. Is written to by
+models to add model meta data.
+max_length (int): Maximum number of characters in a single text. The
+current models may run out memory on extremely long texts, due to
+large internal allocations. You should segment these texts into
+meaningful units, e.g. paragraphs, subsections etc, before passing
+them to spaCy. Default maximum length is 1,000,000 charas (1mb). As
+a rule of thumb, if all pipeline components are enabled, spaCy's
+default models currently requires roughly 1GB of temporary memory per
+100,000 characters in one text.
+create_tokenizer (Callable): Function that takes the nlp object and
+returns a tokenizer.
+batch_size (int): Default batch size for pipe and evaluate.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#init">https://spacy.io/api/language#init</a></p></div>
+<h3>Subclasses</h3>
+<ul class="hlist">
+<li>spacy.lang.de.German</li>
+<li>spacy.lang.ja.Japanese</li>
+<li>spacy.lang.xx.MultiLanguage</li>
+</ul>
+<h3>Class variables</h3>
+<dl>
+<dt id="lang_main.types.Language.Defaults"><code class="name">var <span class="ident">Defaults</span></code></dt>
+<dd>
+<div class="desc"><p>Language data defaults, available via Language.Defaults. Can be
+overwritten by language subclasses by defining their own subclasses of
+Language.Defaults.</p></div>
+</dd>
+<dt id="lang_main.types.Language.default_config"><code class="name">var <span class="ident">default_config</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Language.factories"><code class="name">var <span class="ident">factories</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Language.lang"><code class="name">var <span class="ident">lang</span> : str | None</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+<h3>Static methods</h3>
+<dl>
+<dt id="lang_main.types.Language.component"><code class="name flex">
+<span>def <span class="ident">component</span></span>(<span>name: str,<br>*,<br>assigns: Iterable[str] = [],<br>requires: Iterable[str] = [],<br>retokenizes: bool = False,<br>func: Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc] | None = None) ‑> Callable[..., Any]</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Register a new pipeline component. Can be used for stateless function
+components that don't require a separate factory. Can be used as a
+decorator on a function or classmethod, or called as a function with the
+factory provided as the func keyword argument. To create a component and
+add it to the pipeline, you can use nlp.add_pipe(name).</p>
+<p>name (str): The name of the component factory.
+assigns (Iterable[str]): Doc/Token attributes assigned by this component,
+e.g. "token.ent_id". Used for pipeline analysis.
+requires (Iterable[str]): Doc/Token attributes required by this component,
+e.g. "token.ent_id". Used for pipeline analysis.
+retokenizes (bool): Whether the component changes the tokenization.
+Used for pipeline analysis.
+func (Optional[Callable[[Doc], Doc]): Factory function if not used as a decorator.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#component">https://spacy.io/api/language#component</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.factory"><code class="name flex">
+<span>def <span class="ident">factory</span></span>(<span>name: str,<br>*,<br>default_config: Dict[str, Any] = {},<br>assigns: Iterable[str] = [],<br>requires: Iterable[str] = [],<br>retokenizes: bool = False,<br>default_score_weights: Dict[str, float | None] = {},<br>func: Callable | None = None) ‑> Callable</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Register a new pipeline component factory. Can be used as a decorator
+on a function or classmethod, or called as a function with the factory
+provided as the func keyword argument. To create a component and add
+it to the pipeline, you can use nlp.add_pipe(name).</p>
+<p>name (str): The name of the component factory.
+default_config (Dict[str, Any]): Default configuration, describing the
+default values of the factory arguments.
+assigns (Iterable[str]): Doc/Token attributes assigned by this component,
+e.g. "token.ent_id". Used for pipeline analysis.
+requires (Iterable[str]): Doc/Token attributes required by this component,
+e.g. "token.ent_id". Used for pipeline analysis.
+retokenizes (bool): Whether the component changes the tokenization.
+Used for pipeline analysis.
+default_score_weights (Dict[str, Optional[float]]): The scores to report during
+training, and their default weight towards the final score used to
+select the best model. Weights should sum to 1.0 per component and
+will be combined and normalized for the whole pipeline. If None,
+the score won't be shown in the logs or be weighted.
+func (Optional[Callable]): Factory function if not used as a decorator.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#factory">https://spacy.io/api/language#factory</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.from_config"><code class="name flex">
+<span>def <span class="ident">from_config</span></span>(<span>config: Dict[str, Any] | confection.Config = {},<br>*,<br>vocab: spacy.vocab.Vocab | bool = True,<br>disable: str | Iterable[str] = [],<br>enable: str | Iterable[str] = [],<br>exclude: str | Iterable[str] = [],<br>meta: Dict[str, Any] = {},<br>auto_fill: bool = True,<br>validate: bool = True) ‑> spacy.language.Language</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Create the nlp object from a loaded config. Will set up the tokenizer
+and language data, add pipeline components etc. If no config is provided,
+the default config of the given language is used.</p>
+<p>config (Dict[str, Any] / Config): The loaded config.
+vocab (Vocab): A Vocab object. If True, a vocab is created.
+disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
+Disabled pipes will be loaded but they won't be run unless you
+explicitly enable them by calling nlp.enable_pipe.
+enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
+pipes will be disabled (and can be enabled using <code>nlp.enable_pipe</code>).
+exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
+Excluded components won't be loaded.
+meta (Dict[str, Any]): Meta overrides for nlp.meta.
+auto_fill (bool): Automatically fill in missing values in config based
+on defaults and function argument annotations.
+validate (bool): Validate the component config and arguments against
+the types expected by the factory.
+RETURNS (Language): The initialized Language class.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#from_config">https://spacy.io/api/language#from_config</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.get_factory_meta"><code class="name flex">
+<span>def <span class="ident">get_factory_meta</span></span>(<span>name: str) ‑> spacy.language.FactoryMeta</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Get the meta information for a given factory name.</p>
+<p>name (str): The component factory name.
+RETURNS (FactoryMeta): The meta for the given factory name.</p></div>
+</dd>
+<dt id="lang_main.types.Language.get_factory_name"><code class="name flex">
+<span>def <span class="ident">get_factory_name</span></span>(<span>name: str) ‑> str</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Get the internal factory name based on the language subclass.</p>
+<p>name (str): The factory name.
+RETURNS (str): The internal factory name.</p></div>
+</dd>
+<dt id="lang_main.types.Language.has_factory"><code class="name flex">
+<span>def <span class="ident">has_factory</span></span>(<span>name: str) ‑> bool</span>
+</code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether a factory of that name is registered.</p></div>
+</dd>
+<dt id="lang_main.types.Language.set_factory_meta"><code class="name flex">
+<span>def <span class="ident">set_factory_meta</span></span>(<span>name: str, value: FactoryMeta) ‑> None</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Set the meta information for a given factory name.</p>
+<p>name (str): The component factory name.
+value (FactoryMeta): The meta to set.</p></div>
+</dd>
+</dl>
+<h3>Instance variables</h3>
+<dl>
+<dt id="lang_main.types.Language.component_names"><code class="name">prop <span class="ident">component_names</span> : List[str]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def component_names(self) -&gt; List[str]:
+    &#34;&#34;&#34;Get the names of the available pipeline components. Includes all
+    active and inactive pipeline components.
+
+    RETURNS (List[str]): List of component name strings, in order.
+    &#34;&#34;&#34;
+    names = [pipe_name for pipe_name, _ in self._components]
+    return SimpleFrozenList(names, error=Errors.E926.format(attr=&#34;component_names&#34;))</code></pre>
+</details>
+<div class="desc"><p>Get the names of the available pipeline components. Includes all
+active and inactive pipeline components.</p>
+<p>RETURNS (List[str]): List of component name strings, in order.</p></div>
+</dd>
+<dt id="lang_main.types.Language.components"><code class="name">prop <span class="ident">components</span> : List[Tuple[str, Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]]]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def components(self) -&gt; List[Tuple[str, PipeCallable]]:
+    &#34;&#34;&#34;Get all (name, component) tuples in the pipeline, including the
+    currently disabled components.
+    &#34;&#34;&#34;
+    return SimpleFrozenList(
+        self._components, error=Errors.E926.format(attr=&#34;components&#34;)
+    )</code></pre>
+</details>
+<div class="desc"><p>Get all (name, component) tuples in the pipeline, including the
+currently disabled components.</p></div>
+</dd>
+<dt id="lang_main.types.Language.config"><code class="name">prop <span class="ident">config</span> : confection.Config</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def config(self) -&gt; Config:
+    &#34;&#34;&#34;Trainable config for the current language instance. Includes the
+    current pipeline components, as well as default training config.
+
+    RETURNS (thinc.api.Config): The config.
+
+    DOCS: https://spacy.io/api/language#config
+    &#34;&#34;&#34;
+    self._config.setdefault(&#34;nlp&#34;, {})
+    self._config.setdefault(&#34;training&#34;, {})
+    self._config[&#34;nlp&#34;][&#34;lang&#34;] = self.lang
+    # We&#39;re storing the filled config for each pipeline component and so
+    # we can populate the config again later
+    pipeline = {}
+    score_weights = []
+    for pipe_name in self.component_names:
+        pipe_meta = self.get_pipe_meta(pipe_name)
+        pipe_config = self.get_pipe_config(pipe_name)
+        pipeline[pipe_name] = {&#34;factory&#34;: pipe_meta.factory, **pipe_config}
+        if pipe_meta.default_score_weights:
+            score_weights.append(pipe_meta.default_score_weights)
+    self._config[&#34;nlp&#34;][&#34;pipeline&#34;] = list(self.component_names)
+    self._config[&#34;nlp&#34;][&#34;disabled&#34;] = list(self.disabled)
+    self._config[&#34;components&#34;] = pipeline
+    # We&#39;re merging the existing score weights back into the combined
+    # weights to make sure we&#39;re preserving custom settings in the config
+    # but also reflect updates (e.g. new components added)
+    prev_weights = self._config[&#34;training&#34;].get(&#34;score_weights&#34;, {})
+    combined_score_weights = combine_score_weights(score_weights, prev_weights)
+    self._config[&#34;training&#34;][&#34;score_weights&#34;] = combined_score_weights
+    if not srsly.is_json_serializable(self._config):
+        raise ValueError(Errors.E961.format(config=self._config))
+    return self._config</code></pre>
+</details>
+<div class="desc"><p>Trainable config for the current language instance. Includes the
+current pipeline components, as well as default training config.</p>
+<p>RETURNS (thinc.api.Config): The config.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#config">https://spacy.io/api/language#config</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.disabled"><code class="name">prop <span class="ident">disabled</span> : List[str]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def disabled(self) -&gt; List[str]:
+    &#34;&#34;&#34;Get the names of all disabled components.
+
+    RETURNS (List[str]): The disabled components.
+    &#34;&#34;&#34;
+    # Make sure the disabled components are returned in the order they
+    # appear in the pipeline (which isn&#39;t guaranteed by the set)
+    names = [name for name, _ in self._components if name in self._disabled]
+    return SimpleFrozenList(names, error=Errors.E926.format(attr=&#34;disabled&#34;))</code></pre>
+</details>
+<div class="desc"><p>Get the names of all disabled components.</p>
+<p>RETURNS (List[str]): The disabled components.</p></div>
+</dd>
+<dt id="lang_main.types.Language.factory_names"><code class="name">prop <span class="ident">factory_names</span> : List[str]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def factory_names(self) -&gt; List[str]:
+    &#34;&#34;&#34;Get names of all available factories.
+
+    RETURNS (List[str]): The factory names.
+    &#34;&#34;&#34;
+    names = list(self.factories.keys())
+    return SimpleFrozenList(names)</code></pre>
+</details>
+<div class="desc"><p>Get names of all available factories.</p>
+<p>RETURNS (List[str]): The factory names.</p></div>
+</dd>
+<dt id="lang_main.types.Language.meta"><code class="name">prop <span class="ident">meta</span> : Dict[str, Any]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def meta(self) -&gt; Dict[str, Any]:
+    &#34;&#34;&#34;Custom meta data of the language class. If a model is loaded, this
+    includes details from the model&#39;s meta.json.
+
+    RETURNS (Dict[str, Any]): The meta.
+
+    DOCS: https://spacy.io/api/language#meta
+    &#34;&#34;&#34;
+    spacy_version = util.get_minor_version_range(about.__version__)
+    if self.vocab.lang:
+        self._meta.setdefault(&#34;lang&#34;, self.vocab.lang)
+    else:
+        self._meta.setdefault(&#34;lang&#34;, self.lang)
+    self._meta.setdefault(&#34;name&#34;, &#34;pipeline&#34;)
+    self._meta.setdefault(&#34;version&#34;, &#34;0.0.0&#34;)
+    self._meta.setdefault(&#34;spacy_version&#34;, spacy_version)
+    self._meta.setdefault(&#34;description&#34;, &#34;&#34;)
+    self._meta.setdefault(&#34;author&#34;, &#34;&#34;)
+    self._meta.setdefault(&#34;email&#34;, &#34;&#34;)
+    self._meta.setdefault(&#34;url&#34;, &#34;&#34;)
+    self._meta.setdefault(&#34;license&#34;, &#34;&#34;)
+    self._meta.setdefault(&#34;spacy_git_version&#34;, GIT_VERSION)
+    self._meta[&#34;vectors&#34;] = {
+        &#34;width&#34;: self.vocab.vectors_length,
+        &#34;vectors&#34;: len(self.vocab.vectors),
+        &#34;keys&#34;: self.vocab.vectors.n_keys,
+        &#34;name&#34;: self.vocab.vectors.name,
+        &#34;mode&#34;: self.vocab.vectors.mode,
+    }
+    self._meta[&#34;labels&#34;] = dict(self.pipe_labels)
+    # TODO: Adding this back to prevent breaking people&#39;s code etc., but
+    # we should consider removing it
+    self._meta[&#34;pipeline&#34;] = list(self.pipe_names)
+    self._meta[&#34;components&#34;] = list(self.component_names)
+    self._meta[&#34;disabled&#34;] = list(self.disabled)
+    return self._meta</code></pre>
+</details>
+<div class="desc"><p>Custom meta data of the language class. If a model is loaded, this
+includes details from the model's meta.json.</p>
+<p>RETURNS (Dict[str, Any]): The meta.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#meta">https://spacy.io/api/language#meta</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.path"><code class="name">prop <span class="ident">path</span></code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def path(self):
+    return self._path</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Language.pipe_factories"><code class="name">prop <span class="ident">pipe_factories</span> : Dict[str, str]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def pipe_factories(self) -&gt; Dict[str, str]:
+    &#34;&#34;&#34;Get the component factories for the available pipeline components.
+
+    RETURNS (Dict[str, str]): Factory names, keyed by component names.
+    &#34;&#34;&#34;
+    factories = {}
+    for pipe_name, pipe in self._components:
+        factories[pipe_name] = self.get_pipe_meta(pipe_name).factory
+    return SimpleFrozenDict(factories)</code></pre>
+</details>
+<div class="desc"><p>Get the component factories for the available pipeline components.</p>
+<p>RETURNS (Dict[str, str]): Factory names, keyed by component names.</p></div>
+</dd>
+<dt id="lang_main.types.Language.pipe_labels"><code class="name">prop <span class="ident">pipe_labels</span> : Dict[str, List[str]]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def pipe_labels(self) -&gt; Dict[str, List[str]]:
+    &#34;&#34;&#34;Get the labels set by the pipeline components, if available (if
+    the component exposes a labels property and the labels are not
+    hidden).
+
+    RETURNS (Dict[str, List[str]]): Labels keyed by component name.
+    &#34;&#34;&#34;
+    labels = {}
+    for name, pipe in self._components:
+        if hasattr(pipe, &#34;hide_labels&#34;) and pipe.hide_labels is True:
+            continue
+        if hasattr(pipe, &#34;labels&#34;):
+            labels[name] = list(pipe.labels)
+    return SimpleFrozenDict(labels)</code></pre>
+</details>
+<div class="desc"><p>Get the labels set by the pipeline components, if available (if
+the component exposes a labels property and the labels are not
+hidden).</p>
+<p>RETURNS (Dict[str, List[str]]): Labels keyed by component name.</p></div>
+</dd>
+<dt id="lang_main.types.Language.pipe_names"><code class="name">prop <span class="ident">pipe_names</span> : List[str]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def pipe_names(self) -&gt; List[str]:
+    &#34;&#34;&#34;Get names of available active pipeline components.
+
+    RETURNS (List[str]): List of component name strings, in order.
+    &#34;&#34;&#34;
+    names = [pipe_name for pipe_name, _ in self.pipeline]
+    return SimpleFrozenList(names, error=Errors.E926.format(attr=&#34;pipe_names&#34;))</code></pre>
+</details>
+<div class="desc"><p>Get names of available active pipeline components.</p>
+<p>RETURNS (List[str]): List of component name strings, in order.</p></div>
+</dd>
+<dt id="lang_main.types.Language.pipeline"><code class="name">prop <span class="ident">pipeline</span> : List[Tuple[str, Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]]]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def pipeline(self) -&gt; List[Tuple[str, PipeCallable]]:
+    &#34;&#34;&#34;The processing pipeline consisting of (name, component) tuples. The
+    components are called on the Doc in order as it passes through the
+    pipeline.
+
+    RETURNS (List[Tuple[str, Callable[[Doc], Doc]]]): The pipeline.
+    &#34;&#34;&#34;
+    pipes = [(n, p) for n, p in self._components if n not in self._disabled]
+    return SimpleFrozenList(pipes, error=Errors.E926.format(attr=&#34;pipeline&#34;))</code></pre>
+</details>
+<div class="desc"><p>The processing pipeline consisting of (name, component) tuples. The
+components are called on the Doc in order as it passes through the
+pipeline.</p>
+<p>RETURNS (List[Tuple[str, Callable[[Doc], Doc]]]): The pipeline.</p></div>
+</dd>
+</dl>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.types.Language.add_pipe"><code class="name flex">
+<span>def <span class="ident">add_pipe</span></span>(<span>self,<br>factory_name: str,<br>name: str | None = None,<br>*,<br>before: str | int | None = None,<br>after: str | int | None = None,<br>first: bool | None = None,<br>last: bool | None = None,<br>source: ForwardRef('<a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a>') | None = None,<br>config: Dict[str, Any] = {},<br>raw_config: confection.Config | None = None,<br>validate: bool = True) ‑> Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def add_pipe(
+    self,
+    factory_name: str,
+    name: Optional[str] = None,
+    *,
+    before: Optional[Union[str, int]] = None,
+    after: Optional[Union[str, int]] = None,
+    first: Optional[bool] = None,
+    last: Optional[bool] = None,
+    source: Optional[&#34;Language&#34;] = None,
+    config: Dict[str, Any] = SimpleFrozenDict(),
+    raw_config: Optional[Config] = None,
+    validate: bool = True,
+) -&gt; PipeCallable:
+    &#34;&#34;&#34;Add a component to the processing pipeline. Valid components are
+    callables that take a `Doc` object, modify it and return it. Only one
+    of before/after/first/last can be set. Default behaviour is &#34;last&#34;.
+
+    factory_name (str): Name of the component factory.
+    name (str): Name of pipeline component. Overwrites existing
+        component.name attribute if available. If no name is set and
+        the component exposes no name attribute, component.__name__ is
+        used. An error is raised if a name already exists in the pipeline.
+    before (Union[str, int]): Name or index of the component to insert new
+        component directly before.
+    after (Union[str, int]): Name or index of the component to insert new
+        component directly after.
+    first (bool): If True, insert component first in the pipeline.
+    last (bool): If True, insert component last in the pipeline.
+    source (Language): Optional loaded nlp object to copy the pipeline
+        component from.
+    config (Dict[str, Any]): Config parameters to use for this component.
+        Will be merged with default config, if available.
+    raw_config (Optional[Config]): Internals: the non-interpolated config.
+    validate (bool): Whether to validate the component config against the
+        arguments and types expected by the factory.
+    RETURNS (Callable[[Doc], Doc]): The pipeline component.
+
+    DOCS: https://spacy.io/api/language#add_pipe
+    &#34;&#34;&#34;
+    if not isinstance(factory_name, str):
+        bad_val = repr(factory_name)
+        err = Errors.E966.format(component=bad_val, name=name)
+        raise ValueError(err)
+    name = name if name is not None else factory_name
+    if name in self.component_names:
+        raise ValueError(Errors.E007.format(name=name, opts=self.component_names))
+    # Overriding pipe name in the config is not supported and will be ignored.
+    if &#34;name&#34; in config:
+        warnings.warn(Warnings.W119.format(name_in_config=config.pop(&#34;name&#34;)))
+    if source is not None:
+        # We&#39;re loading the component from a model. After loading the
+        # component, we know its real factory name
+        pipe_component, factory_name = self.create_pipe_from_source(
+            factory_name, source, name=name
+        )
+    else:
+        pipe_component = self.create_pipe(
+            factory_name,
+            name=name,
+            config=config,
+            raw_config=raw_config,
+            validate=validate,
+        )
+    pipe_index = self._get_pipe_index(before, after, first, last)
+    self._pipe_meta[name] = self.get_factory_meta(factory_name)
+    self._components.insert(pipe_index, (name, pipe_component))
+    self._link_components()
+    return pipe_component</code></pre>
+</details>
+<div class="desc"><p>Add a component to the processing pipeline. Valid components are
+callables that take a <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code> object, modify it and return it. Only one
+of before/after/first/last can be set. Default behaviour is "last".</p>
+<p>factory_name (str): Name of the component factory.
+name (str): Name of pipeline component. Overwrites existing
+component.name attribute if available. If no name is set and
+the component exposes no name attribute, component.<strong>name</strong> is
+used. An error is raised if a name already exists in the pipeline.
+before (Union[str, int]): Name or index of the component to insert new
+component directly before.
+after (Union[str, int]): Name or index of the component to insert new
+component directly after.
+first (bool): If True, insert component first in the pipeline.
+last (bool): If True, insert component last in the pipeline.
+source (Language): Optional loaded nlp object to copy the pipeline
+component from.
+config (Dict[str, Any]): Config parameters to use for this component.
+Will be merged with default config, if available.
+raw_config (Optional[Config]): Internals: the non-interpolated config.
+validate (bool): Whether to validate the component config against the
+arguments and types expected by the factory.
+RETURNS (Callable[[Doc], Doc]): The pipeline component.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#add_pipe">https://spacy.io/api/language#add_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.analyze_pipes"><code class="name flex">
+<span>def <span class="ident">analyze_pipes</span></span>(<span>self,<br>*,<br>keys: List[str] = ['assigns', 'requires', 'scores', 'retokenizes'],<br>pretty: bool = False) ‑> Dict[str, Any] | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def analyze_pipes(
+    self,
+    *,
+    keys: List[str] = [&#34;assigns&#34;, &#34;requires&#34;, &#34;scores&#34;, &#34;retokenizes&#34;],
+    pretty: bool = False,
+) -&gt; Optional[Dict[str, Any]]:
+    &#34;&#34;&#34;Analyze the current pipeline components, print a summary of what
+    they assign or require and check that all requirements are met.
+
+    keys (List[str]): The meta values to display in the table. Corresponds
+        to values in FactoryMeta, defined by @Language.factory decorator.
+    pretty (bool): Pretty-print the results.
+    RETURNS (dict): The data.
+    &#34;&#34;&#34;
+    analysis = analyze_pipes(self, keys=keys)
+    if pretty:
+        print_pipe_analysis(analysis, keys=keys)
+    return analysis</code></pre>
+</details>
+<div class="desc"><p>Analyze the current pipeline components, print a summary of what
+they assign or require and check that all requirements are met.</p>
+<p>keys (List[str]): The meta values to display in the table. Corresponds
+to values in FactoryMeta, defined by @Language.factory decorator.
+pretty (bool): Pretty-print the results.
+RETURNS (dict): The data.</p></div>
+</dd>
+<dt id="lang_main.types.Language.begin_training"><code class="name flex">
+<span>def <span class="ident">begin_training</span></span>(<span>self,<br>get_examples: Callable[[], Iterable[spacy.training.example.Example]] | None = None,<br>*,<br>sgd: thinc.optimizers.Optimizer | None = None) ‑> thinc.optimizers.Optimizer</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def begin_training(
+    self,
+    get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+    *,
+    sgd: Optional[Optimizer] = None,
+) -&gt; Optimizer:
+    warnings.warn(Warnings.W089, DeprecationWarning)
+    return self.initialize(get_examples, sgd=sgd)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Language.create_optimizer"><code class="name flex">
+<span>def <span class="ident">create_optimizer</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_optimizer(self):
+    &#34;&#34;&#34;Create an optimizer, usually using the [training.optimizer] config.&#34;&#34;&#34;
+    subconfig = {&#34;optimizer&#34;: self.config[&#34;training&#34;][&#34;optimizer&#34;]}
+    return registry.resolve(subconfig)[&#34;optimizer&#34;]</code></pre>
+</details>
+<div class="desc"><p>Create an optimizer, usually using the [training.optimizer] config.</p></div>
+</dd>
+<dt id="lang_main.types.Language.create_pipe"><code class="name flex">
+<span>def <span class="ident">create_pipe</span></span>(<span>self,<br>factory_name: str,<br>name: str | None = None,<br>*,<br>config: Dict[str, Any] = {},<br>raw_config: confection.Config | None = None,<br>validate: bool = True) ‑> Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_pipe(
+    self,
+    factory_name: str,
+    name: Optional[str] = None,
+    *,
+    config: Dict[str, Any] = SimpleFrozenDict(),
+    raw_config: Optional[Config] = None,
+    validate: bool = True,
+) -&gt; PipeCallable:
+    &#34;&#34;&#34;Create a pipeline component. Mostly used internally. To create and
+    add a component to the pipeline, you can use nlp.add_pipe.
+
+    factory_name (str): Name of component factory.
+    name (Optional[str]): Optional name to assign to component instance.
+        Defaults to factory name if not set.
+    config (Dict[str, Any]): Config parameters to use for this component.
+        Will be merged with default config, if available.
+    raw_config (Optional[Config]): Internals: the non-interpolated config.
+    validate (bool): Whether to validate the component config against the
+        arguments and types expected by the factory.
+    RETURNS (Callable[[Doc], Doc]): The pipeline component.
+
+    DOCS: https://spacy.io/api/language#create_pipe
+    &#34;&#34;&#34;
+    name = name if name is not None else factory_name
+    if not isinstance(config, dict):
+        err = Errors.E962.format(style=&#34;config&#34;, name=name, cfg_type=type(config))
+        raise ValueError(err)
+    if not srsly.is_json_serializable(config):
+        raise ValueError(Errors.E961.format(config=config))
+    if not self.has_factory(factory_name):
+        err = Errors.E002.format(
+            name=factory_name,
+            opts=&#34;, &#34;.join(self.factory_names),
+            method=&#34;create_pipe&#34;,
+            lang=util.get_object_name(self),
+            lang_code=self.lang,
+        )
+        raise ValueError(err)
+    pipe_meta = self.get_factory_meta(factory_name)
+    # This is unideal, but the alternative would mean you always need to
+    # specify the full config settings, which is not really viable.
+    if pipe_meta.default_config:
+        config = Config(pipe_meta.default_config).merge(config)
+    internal_name = self.get_factory_name(factory_name)
+    # If the language-specific factory doesn&#39;t exist, try again with the
+    # not-specific name
+    if internal_name not in registry.factories:
+        internal_name = factory_name
+    # The name allows components to know their pipe name and use it in the
+    # losses etc. (even if multiple instances of the same factory are used)
+    config = {&#34;nlp&#34;: self, &#34;name&#34;: name, **config, &#34;@factories&#34;: internal_name}
+    # We need to create a top-level key because Thinc doesn&#39;t allow resolving
+    # top-level references to registered functions. Also gives nicer errors.
+    cfg = {factory_name: config}
+    # We&#39;re calling the internal _fill here to avoid constructing the
+    # registered functions twice
+    resolved = registry.resolve(cfg, validate=validate)
+    filled = registry.fill({&#34;cfg&#34;: cfg[factory_name]}, validate=validate)[&#34;cfg&#34;]
+    filled = Config(filled)
+    filled[&#34;factory&#34;] = factory_name
+    filled.pop(&#34;@factories&#34;, None)
+    # Remove the extra values we added because we don&#39;t want to keep passing
+    # them around, copying them etc.
+    filled.pop(&#34;nlp&#34;, None)
+    filled.pop(&#34;name&#34;, None)
+    # Merge the final filled config with the raw config (including non-
+    # interpolated variables)
+    if raw_config:
+        filled = filled.merge(raw_config)
+    self._pipe_configs[name] = filled
+    return resolved[factory_name]</code></pre>
+</details>
+<div class="desc"><p>Create a pipeline component. Mostly used internally. To create and
+add a component to the pipeline, you can use nlp.add_pipe.</p>
+<p>factory_name (str): Name of component factory.
+name (Optional[str]): Optional name to assign to component instance.
+Defaults to factory name if not set.
+config (Dict[str, Any]): Config parameters to use for this component.
+Will be merged with default config, if available.
+raw_config (Optional[Config]): Internals: the non-interpolated config.
+validate (bool): Whether to validate the component config against the
+arguments and types expected by the factory.
+RETURNS (Callable[[Doc], Doc]): The pipeline component.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#create_pipe">https://spacy.io/api/language#create_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.create_pipe_from_source"><code class="name flex">
+<span>def <span class="ident">create_pipe_from_source</span></span>(<span>self,<br>source_name: str,<br>source: <a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a>,<br>*,<br>name: str) ‑> Tuple[Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc], str]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_pipe_from_source(
+    self, source_name: str, source: &#34;Language&#34;, *, name: str
+) -&gt; Tuple[PipeCallable, str]:
+    &#34;&#34;&#34;Create a pipeline component by copying it from an existing model.
+
+    source_name (str): Name of the component in the source pipeline.
+    source (Language): The source nlp object to copy from.
+    name (str): Optional alternative name to use in current pipeline.
+    RETURNS (Tuple[Callable[[Doc], Doc], str]): The component and its factory name.
+    &#34;&#34;&#34;
+    # Check source type
+    if not isinstance(source, Language):
+        raise ValueError(Errors.E945.format(name=source_name, source=type(source)))
+    if self.vocab.vectors != source.vocab.vectors:
+        warnings.warn(Warnings.W113.format(name=source_name))
+    if source_name not in source.component_names:
+        raise KeyError(
+            Errors.E944.format(
+                name=source_name,
+                model=f&#34;{source.meta[&#39;lang&#39;]}_{source.meta[&#39;name&#39;]}&#34;,
+                opts=&#34;, &#34;.join(source.component_names),
+            )
+        )
+    pipe = source.get_pipe(source_name)
+    # There is no actual solution here. Either the component has the right
+    # name for the source pipeline or the component has the right name for
+    # the current pipeline. This prioritizes the current pipeline.
+    if hasattr(pipe, &#34;name&#34;):
+        pipe.name = name
+    # Make sure the source config is interpolated so we don&#39;t end up with
+    # orphaned variables in our final config
+    source_config = source.config.interpolate()
+    pipe_config = util.copy_config(source_config[&#34;components&#34;][source_name])
+    self._pipe_configs[name] = pipe_config
+    if self.vocab.strings != source.vocab.strings:
+        for s in source.vocab.strings:
+            self.vocab.strings.add(s)
+    return pipe, pipe_config[&#34;factory&#34;]</code></pre>
+</details>
+<div class="desc"><p>Create a pipeline component by copying it from an existing model.</p>
+<p>source_name (str): Name of the component in the source pipeline.
+source (Language): The source nlp object to copy from.
+name (str): Optional alternative name to use in current pipeline.
+RETURNS (Tuple[Callable[[Doc], Doc], str]): The component and its factory name.</p></div>
+</dd>
+<dt id="lang_main.types.Language.disable_pipe"><code class="name flex">
+<span>def <span class="ident">disable_pipe</span></span>(<span>self, name: str) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def disable_pipe(self, name: str) -&gt; None:
+    &#34;&#34;&#34;Disable a pipeline component. The component will still exist on
+    the nlp object, but it won&#39;t be run as part of the pipeline. Does
+    nothing if the component is already disabled.
+
+    name (str): The name of the component to disable.
+    &#34;&#34;&#34;
+    if name not in self.component_names:
+        raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
+    self._disabled.add(name)</code></pre>
+</details>
+<div class="desc"><p>Disable a pipeline component. The component will still exist on
+the nlp object, but it won't be run as part of the pipeline. Does
+nothing if the component is already disabled.</p>
+<p>name (str): The name of the component to disable.</p></div>
+</dd>
+<dt id="lang_main.types.Language.disable_pipes"><code class="name flex">
+<span>def <span class="ident">disable_pipes</span></span>(<span>self, *names) ‑> spacy.language.DisabledPipes</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def disable_pipes(self, *names) -&gt; &#34;DisabledPipes&#34;:
+    &#34;&#34;&#34;Disable one or more pipeline components. If used as a context
+    manager, the pipeline will be restored to the initial state at the end
+    of the block. Otherwise, a DisabledPipes object is returned, that has
+    a `.restore()` method you can use to undo your changes.
+
+    This method has been deprecated since 3.0
+    &#34;&#34;&#34;
+    warnings.warn(Warnings.W096, DeprecationWarning)
+    if len(names) == 1 and isinstance(names[0], (list, tuple)):
+        names = names[0]  # type: ignore[assignment]    # support list of names instead of spread
+    return self.select_pipes(disable=names)</code></pre>
+</details>
+<div class="desc"><p>Disable one or more pipeline components. If used as a context
+manager, the pipeline will be restored to the initial state at the end
+of the block. Otherwise, a DisabledPipes object is returned, that has
+a <code>.restore()</code> method you can use to undo your changes.</p>
+<p>This method has been deprecated since 3.0</p></div>
+</dd>
+<dt id="lang_main.types.Language.enable_pipe"><code class="name flex">
+<span>def <span class="ident">enable_pipe</span></span>(<span>self, name: str) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def enable_pipe(self, name: str) -&gt; None:
+    &#34;&#34;&#34;Enable a previously disabled pipeline component so it&#39;s run as part
+    of the pipeline. Does nothing if the component is already enabled.
+
+    name (str): The name of the component to enable.
+    &#34;&#34;&#34;
+    if name not in self.component_names:
+        raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
+    if name in self.disabled:
+        self._disabled.remove(name)</code></pre>
+</details>
+<div class="desc"><p>Enable a previously disabled pipeline component so it's run as part
+of the pipeline. Does nothing if the component is already enabled.</p>
+<p>name (str): The name of the component to enable.</p></div>
+</dd>
+<dt id="lang_main.types.Language.evaluate"><code class="name flex">
+<span>def <span class="ident">evaluate</span></span>(<span>self,<br>examples: Iterable[spacy.training.example.Example],<br>*,<br>batch_size: int | None = None,<br>scorer: spacy.scorer.Scorer | None = None,<br>component_cfg: Dict[str, Dict[str, Any]] | None = None,<br>scorer_cfg: Dict[str, Any] | None = None,<br>per_component: bool = False) ‑> Dict[str, Any]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def evaluate(
+    self,
+    examples: Iterable[Example],
+    *,
+    batch_size: Optional[int] = None,
+    scorer: Optional[Scorer] = None,
+    component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+    scorer_cfg: Optional[Dict[str, Any]] = None,
+    per_component: bool = False,
+) -&gt; Dict[str, Any]:
+    &#34;&#34;&#34;Evaluate a model&#39;s pipeline components.
+
+    examples (Iterable[Example]): `Example` objects.
+    batch_size (Optional[int]): Batch size to use.
+    scorer (Optional[Scorer]): Scorer to use. If not passed in, a new one
+        will be created.
+    component_cfg (dict): An optional dictionary with extra keyword
+        arguments for specific components.
+    scorer_cfg (dict): An optional dictionary with extra keyword arguments
+        for the scorer.
+    per_component (bool): Whether to return the scores keyed by component
+        name. Defaults to False.
+
+    RETURNS (Scorer): The scorer containing the evaluation results.
+
+    DOCS: https://spacy.io/api/language#evaluate
+    &#34;&#34;&#34;
+    examples = list(examples)
+    validate_examples(examples, &#34;Language.evaluate&#34;)
+    examples = _copy_examples(examples)
+    if batch_size is None:
+        batch_size = self.batch_size
+    if component_cfg is None:
+        component_cfg = {}
+    if scorer_cfg is None:
+        scorer_cfg = {}
+    if scorer is None:
+        kwargs = dict(scorer_cfg)
+        kwargs.setdefault(&#34;nlp&#34;, self)
+        scorer = Scorer(**kwargs)
+    # reset annotation in predicted docs and time tokenization
+    start_time = timer()
+    # this is purely for timing
+    for eg in examples:
+        self.make_doc(eg.reference.text)
+    # apply all pipeline components
+    docs = self.pipe(
+        (eg.predicted for eg in examples),
+        batch_size=batch_size,
+        component_cfg=component_cfg,
+    )
+    for eg, doc in zip(examples, docs):
+        eg.predicted = doc
+    end_time = timer()
+    results = scorer.score(examples, per_component=per_component)
+    n_words = sum(len(eg.predicted) for eg in examples)
+    results[&#34;speed&#34;] = n_words / (end_time - start_time)
+    return _replace_numpy_floats(results)</code></pre>
+</details>
+<div class="desc"><p>Evaluate a model's pipeline components.</p>
+<p>examples (Iterable[Example]): <code>Example</code> objects.
+batch_size (Optional[int]): Batch size to use.
+scorer (Optional[Scorer]): Scorer to use. If not passed in, a new one
+will be created.
+component_cfg (dict): An optional dictionary with extra keyword
+arguments for specific components.
+scorer_cfg (dict): An optional dictionary with extra keyword arguments
+for the scorer.
+per_component (bool): Whether to return the scores keyed by component
+name. Defaults to False.</p>
+<p>RETURNS (Scorer): The scorer containing the evaluation results.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#evaluate">https://spacy.io/api/language#evaluate</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.from_bytes"><code class="name flex">
+<span>def <span class="ident">from_bytes</span></span>(<span>self, bytes_data: bytes, *, exclude: Iterable[str] = []) ‑> spacy.language.Language</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def from_bytes(
+    self, bytes_data: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
+) -&gt; &#34;Language&#34;:
+    &#34;&#34;&#34;Load state from a binary string.
+
+    bytes_data (bytes): The data to load from.
+    exclude (Iterable[str]): Names of components or serialization fields to exclude.
+    RETURNS (Language): The `Language` object.
+
+    DOCS: https://spacy.io/api/language#from_bytes
+    &#34;&#34;&#34;
+
+    def deserialize_meta(b):
+        data = srsly.json_loads(b)
+        self.meta.update(data)
+        # self.meta always overrides meta[&#34;vectors&#34;] with the metadata
+        # from self.vocab.vectors, so set the name directly
+        self.vocab.vectors.name = data.get(&#34;vectors&#34;, {}).get(&#34;name&#34;)
+
+    deserializers: Dict[str, Callable[[bytes], Any]] = {}
+    deserializers[&#34;config.cfg&#34;] = lambda b: self.config.from_bytes(
+        b, interpolate=False
+    )
+    deserializers[&#34;meta.json&#34;] = deserialize_meta
+    deserializers[&#34;vocab&#34;] = lambda b: self.vocab.from_bytes(b, exclude=exclude)
+    deserializers[&#34;tokenizer&#34;] = lambda b: self.tokenizer.from_bytes(  # type: ignore[union-attr]
+        b, exclude=[&#34;vocab&#34;]
+    )
+    for name, proc in self._components:
+        if name in exclude:
+            continue
+        if not hasattr(proc, &#34;from_bytes&#34;):
+            continue
+        deserializers[name] = lambda b, proc=proc: proc.from_bytes(  # type: ignore[misc]
+            b, exclude=[&#34;vocab&#34;]
+        )
+    util.from_bytes(bytes_data, deserializers, exclude)
+    self._link_components()
+    return self</code></pre>
+</details>
+<div class="desc"><p>Load state from a binary string.</p>
+<p>bytes_data (bytes): The data to load from.
+exclude (Iterable[str]): Names of components or serialization fields to exclude.
+RETURNS (Language): The <code><a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a></code> object.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#from_bytes">https://spacy.io/api/language#from_bytes</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.from_disk"><code class="name flex">
+<span>def <span class="ident">from_disk</span></span>(<span>self,<br>path: str | pathlib.Path,<br>*,<br>exclude: Iterable[str] = [],<br>overrides: Dict[str, Any] = {}) ‑> spacy.language.Language</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def from_disk(
+    self,
+    path: Union[str, Path],
+    *,
+    exclude: Iterable[str] = SimpleFrozenList(),
+    overrides: Dict[str, Any] = SimpleFrozenDict(),
+) -&gt; &#34;Language&#34;:
+    &#34;&#34;&#34;Loads state from a directory. Modifies the object in place and
+    returns it. If the saved `Language` object contains a model, the
+    model will be loaded.
+
+    path (str / Path): A path to a directory.
+    exclude (Iterable[str]): Names of components or serialization fields to exclude.
+    RETURNS (Language): The modified `Language` object.
+
+    DOCS: https://spacy.io/api/language#from_disk
+    &#34;&#34;&#34;
+
+    def deserialize_meta(path: Path) -&gt; None:
+        if path.exists():
+            data = srsly.read_json(path)
+            self.meta.update(data)
+            # self.meta always overrides meta[&#34;vectors&#34;] with the metadata
+            # from self.vocab.vectors, so set the name directly
+            self.vocab.vectors.name = data.get(&#34;vectors&#34;, {}).get(&#34;name&#34;)
+
+    def deserialize_vocab(path: Path) -&gt; None:
+        if path.exists():
+            self.vocab.from_disk(path, exclude=exclude)
+
+    path = util.ensure_path(path)
+    deserializers = {}
+    if Path(path / &#34;config.cfg&#34;).exists():  # type: ignore[operator]
+        deserializers[&#34;config.cfg&#34;] = lambda p: self.config.from_disk(
+            p, interpolate=False, overrides=overrides
+        )
+    deserializers[&#34;meta.json&#34;] = deserialize_meta  # type: ignore[assignment]
+    deserializers[&#34;vocab&#34;] = deserialize_vocab  # type: ignore[assignment]
+    deserializers[&#34;tokenizer&#34;] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]
+        p, exclude=[&#34;vocab&#34;]
+    )
+    for name, proc in self._components:
+        if name in exclude:
+            continue
+        if not hasattr(proc, &#34;from_disk&#34;):
+            continue
+        deserializers[name] = lambda p, proc=proc: proc.from_disk(  # type: ignore[misc]
+            p, exclude=[&#34;vocab&#34;]
+        )
+    if not (path / &#34;vocab&#34;).exists() and &#34;vocab&#34; not in exclude:  # type: ignore[operator]
+        # Convert to list here in case exclude is (default) tuple
+        exclude = list(exclude) + [&#34;vocab&#34;]
+    util.from_disk(path, deserializers, exclude)  # type: ignore[arg-type]
+    self._path = path  # type: ignore[assignment]
+    self._link_components()
+    return self</code></pre>
+</details>
+<div class="desc"><p>Loads state from a directory. Modifies the object in place and
+returns it. If the saved <code><a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a></code> object contains a model, the
+model will be loaded.</p>
+<p>path (str / Path): A path to a directory.
+exclude (Iterable[str]): Names of components or serialization fields to exclude.
+RETURNS (Language): The modified <code><a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a></code> object.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#from_disk">https://spacy.io/api/language#from_disk</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.get_pipe"><code class="name flex">
+<span>def <span class="ident">get_pipe</span></span>(<span>self, name: str) ‑> Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_pipe(self, name: str) -&gt; PipeCallable:
+    &#34;&#34;&#34;Get a pipeline component for a given component name.
+
+    name (str): Name of pipeline component to get.
+    RETURNS (callable): The pipeline component.
+
+    DOCS: https://spacy.io/api/language#get_pipe
+    &#34;&#34;&#34;
+    for pipe_name, component in self._components:
+        if pipe_name == name:
+            return component
+    raise KeyError(Errors.E001.format(name=name, opts=self.component_names))</code></pre>
+</details>
+<div class="desc"><p>Get a pipeline component for a given component name.</p>
+<p>name (str): Name of pipeline component to get.
+RETURNS (callable): The pipeline component.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#get_pipe">https://spacy.io/api/language#get_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.get_pipe_config"><code class="name flex">
+<span>def <span class="ident">get_pipe_config</span></span>(<span>self, name: str) ‑> confection.Config</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_pipe_config(self, name: str) -&gt; Config:
+    &#34;&#34;&#34;Get the config used to create a pipeline component.
+
+    name (str): The component name.
+    RETURNS (Config): The config used to create the pipeline component.
+    &#34;&#34;&#34;
+    if name not in self._pipe_configs:
+        raise ValueError(Errors.E960.format(name=name))
+    pipe_config = self._pipe_configs[name]
+    return pipe_config</code></pre>
+</details>
+<div class="desc"><p>Get the config used to create a pipeline component.</p>
+<p>name (str): The component name.
+RETURNS (Config): The config used to create the pipeline component.</p></div>
+</dd>
+<dt id="lang_main.types.Language.get_pipe_meta"><code class="name flex">
+<span>def <span class="ident">get_pipe_meta</span></span>(<span>self, name: str) ‑> spacy.language.FactoryMeta</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_pipe_meta(self, name: str) -&gt; &#34;FactoryMeta&#34;:
+    &#34;&#34;&#34;Get the meta information for a given component name.
+
+    name (str): The component name.
+    RETURNS (FactoryMeta): The meta for the given component name.
+    &#34;&#34;&#34;
+    if name not in self._pipe_meta:
+        raise ValueError(Errors.E967.format(meta=&#34;component&#34;, name=name))
+    return self._pipe_meta[name]</code></pre>
+</details>
+<div class="desc"><p>Get the meta information for a given component name.</p>
+<p>name (str): The component name.
+RETURNS (FactoryMeta): The meta for the given component name.</p></div>
+</dd>
+<dt id="lang_main.types.Language.has_pipe"><code class="name flex">
+<span>def <span class="ident">has_pipe</span></span>(<span>self, name: str) ‑> bool</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def has_pipe(self, name: str) -&gt; bool:
+    &#34;&#34;&#34;Check if a component name is present in the pipeline. Equivalent to
+    `name in nlp.pipe_names`.
+
+    name (str): Name of the component.
+    RETURNS (bool): Whether a component of the name exists in the pipeline.
+
+    DOCS: https://spacy.io/api/language#has_pipe
+    &#34;&#34;&#34;
+    return name in self.pipe_names</code></pre>
+</details>
+<div class="desc"><p>Check if a component name is present in the pipeline. Equivalent to
+<code>name in nlp.pipe_names</code>.</p>
+<p>name (str): Name of the component.
+RETURNS (bool): Whether a component of the name exists in the pipeline.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#has_pipe">https://spacy.io/api/language#has_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.initialize"><code class="name flex">
+<span>def <span class="ident">initialize</span></span>(<span>self,<br>get_examples: Callable[[], Iterable[spacy.training.example.Example]] | None = None,<br>*,<br>sgd: thinc.optimizers.Optimizer | None = None) ‑> thinc.optimizers.Optimizer</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def initialize(
+    self,
+    get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+    *,
+    sgd: Optional[Optimizer] = None,
+) -&gt; Optimizer:
+    &#34;&#34;&#34;Initialize the pipe for training, using data examples if available.
+
+    get_examples (Callable[[], Iterable[Example]]): Optional function that
+        returns gold-standard Example objects.
+    sgd (Optional[Optimizer]): An optimizer to use for updates. If not
+        provided, will be created using the .create_optimizer() method.
+    RETURNS (thinc.api.Optimizer): The optimizer.
+
+    DOCS: https://spacy.io/api/language#initialize
+    &#34;&#34;&#34;
+    if get_examples is None:
+        util.logger.debug(
+            &#34;No &#39;get_examples&#39; callback provided to &#39;Language.initialize&#39;, creating dummy examples&#34;
+        )
+        doc = Doc(self.vocab, words=[&#34;x&#34;, &#34;y&#34;, &#34;z&#34;])
+
+        def get_examples():
+            return [Example.from_dict(doc, {})]
+
+    if not hasattr(get_examples, &#34;__call__&#34;):
+        err = Errors.E930.format(
+            method=&#34;Language.initialize&#34;, obj=type(get_examples)
+        )
+        raise TypeError(err)
+    # Make sure the config is interpolated so we can resolve subsections
+    config = self.config.interpolate()
+    # These are the settings provided in the [initialize] block in the config
+    I = registry.resolve(config[&#34;initialize&#34;], schema=ConfigSchemaInit)
+    before_init = I[&#34;before_init&#34;]
+    if before_init is not None:
+        before_init(self)
+    try:
+        init_vocab(
+            self, data=I[&#34;vocab_data&#34;], lookups=I[&#34;lookups&#34;], vectors=I[&#34;vectors&#34;]
+        )
+    except IOError:
+        raise IOError(Errors.E884.format(vectors=I[&#34;vectors&#34;]))
+    if self.vocab.vectors.shape[1] &gt;= 1:
+        ops = get_current_ops()
+        self.vocab.vectors.to_ops(ops)
+    if hasattr(self.tokenizer, &#34;initialize&#34;):
+        tok_settings = validate_init_settings(
+            self.tokenizer.initialize,  # type: ignore[union-attr]
+            I[&#34;tokenizer&#34;],
+            section=&#34;tokenizer&#34;,
+            name=&#34;tokenizer&#34;,
+        )
+        self.tokenizer.initialize(get_examples, nlp=self, **tok_settings)  # type: ignore[union-attr]
+    for name, proc in self.pipeline:
+        if isinstance(proc, ty.InitializableComponent):
+            p_settings = I[&#34;components&#34;].get(name, {})
+            p_settings = validate_init_settings(
+                proc.initialize, p_settings, section=&#34;components&#34;, name=name
+            )
+            proc.initialize(get_examples, nlp=self, **p_settings)
+    pretrain_cfg = config.get(&#34;pretraining&#34;)
+    if pretrain_cfg:
+        P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)
+        init_tok2vec(self, P, I)
+    self._link_components()
+    self._optimizer = sgd
+    if sgd is not None:
+        self._optimizer = sgd
+    elif self._optimizer is None:
+        self._optimizer = self.create_optimizer()
+    after_init = I[&#34;after_init&#34;]
+    if after_init is not None:
+        after_init(self)
+    return self._optimizer</code></pre>
+</details>
+<div class="desc"><p>Initialize the pipe for training, using data examples if available.</p>
+<p>get_examples (Callable[[], Iterable[Example]]): Optional function that
+returns gold-standard Example objects.
+sgd (Optional[Optimizer]): An optimizer to use for updates. If not
+provided, will be created using the .create_optimizer() method.
+RETURNS (thinc.api.Optimizer): The optimizer.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#initialize">https://spacy.io/api/language#initialize</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.make_doc"><code class="name flex">
+<span>def <span class="ident">make_doc</span></span>(<span>self, text: str) ‑> spacy.tokens.doc.Doc</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def make_doc(self, text: str) -&gt; Doc:
+    &#34;&#34;&#34;Turn a text into a Doc object.
+
+    text (str): The text to process.
+    RETURNS (Doc): The processed doc.
+    &#34;&#34;&#34;
+    if len(text) &gt; self.max_length:
+        raise ValueError(
+            Errors.E088.format(length=len(text), max_length=self.max_length)
+        )
+    return self.tokenizer(text)</code></pre>
+</details>
+<div class="desc"><p>Turn a text into a Doc object.</p>
+<p>text (str): The text to process.
+RETURNS (Doc): The processed doc.</p></div>
+</dd>
+<dt id="lang_main.types.Language.memory_zone"><code class="name flex">
+<span>def <span class="ident">memory_zone</span></span>(<span>self, mem: cymem.cymem.Pool | None = None) ‑> Iterator[cymem.cymem.Pool]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@contextmanager
+def memory_zone(self, mem: Optional[Pool] = None) -&gt; Iterator[Pool]:
+    &#34;&#34;&#34;Begin a block where all resources allocated during the block will
+    be freed at the end of it. If a resources was created within the
+    memory zone block, accessing it outside the block is invalid.
+    Behaviour of this invalid access is undefined. Memory zones should
+    not be nested.
+
+    The memory zone is helpful for services that need to process large
+    volumes of text with a defined memory budget.
+
+    Example
+    -------
+    &gt;&gt;&gt; with nlp.memory_zone():
+    ...     for doc in nlp.pipe(texts):
+    ...        process_my_doc(doc)
+    &gt;&gt;&gt; # use_doc(doc) &lt;-- Invalid: doc was allocated in the memory zone
+    &#34;&#34;&#34;
+    if mem is None:
+        mem = Pool()
+    # The ExitStack allows programmatic nested context managers.
+    # We don&#39;t know how many we need, so it would be awkward to have
+    # them as nested blocks.
+    with ExitStack() as stack:
+        contexts = [stack.enter_context(self.vocab.memory_zone(mem))]
+        if hasattr(self.tokenizer, &#34;memory_zone&#34;):
+            contexts.append(stack.enter_context(self.tokenizer.memory_zone(mem)))
+        for _, pipe in self.pipeline:
+            if hasattr(pipe, &#34;memory_zone&#34;):
+                contexts.append(stack.enter_context(pipe.memory_zone(mem)))
+        yield mem</code></pre>
+</details>
+<div class="desc"><p>Begin a block where all resources allocated during the block will
+be freed at the end of it. If a resources was created within the
+memory zone block, accessing it outside the block is invalid.
+Behaviour of this invalid access is undefined. Memory zones should
+not be nested.</p>
+<p>The memory zone is helpful for services that need to process large
+volumes of text with a defined memory budget.</p>
+<h2 id="example">Example</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; with nlp.memory_zone():
+...     for doc in nlp.pipe(texts):
+...        process_my_doc(doc)
+&gt;&gt;&gt; # use_doc(doc) &lt;-- Invalid: doc was allocated in the memory zone
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Language.pipe"><code class="name flex">
+<span>def <span class="ident">pipe</span></span>(<span>self,<br>texts: Iterable[str | spacy.tokens.doc.Doc] | Iterable[Tuple[str | spacy.tokens.doc.Doc, ~_AnyContext]],<br>*,<br>as_tuples: bool = False,<br>batch_size: int | None = None,<br>disable: Iterable[str] = [],<br>component_cfg: Dict[str, Dict[str, Any]] | None = None,<br>n_process: int = 1) ‑> Iterator[spacy.tokens.doc.Doc] | Iterator[Tuple[spacy.tokens.doc.Doc, ~_AnyContext]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def pipe(  # noqa: F811
+    self,
+    texts: Union[
+        Iterable[Union[str, Doc]], Iterable[Tuple[Union[str, Doc], _AnyContext]]
+    ],
+    *,
+    as_tuples: bool = False,
+    batch_size: Optional[int] = None,
+    disable: Iterable[str] = SimpleFrozenList(),
+    component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+    n_process: int = 1,
+) -&gt; Union[Iterator[Doc], Iterator[Tuple[Doc, _AnyContext]]]:
+    &#34;&#34;&#34;Process texts as a stream, and yield `Doc` objects in order.
+
+    texts (Iterable[Union[str, Doc]]): A sequence of texts or docs to
+        process.
+    as_tuples (bool): If set to True, inputs should be a sequence of
+        (text, context) tuples. Output will then be a sequence of
+        (doc, context) tuples. Defaults to False.
+    batch_size (Optional[int]): The number of texts to buffer.
+    disable (List[str]): Names of the pipeline components to disable.
+    component_cfg (Dict[str, Dict]): An optional dictionary with extra keyword
+        arguments for specific components.
+    n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
+    YIELDS (Doc): Documents in the order of the original text.
+
+    DOCS: https://spacy.io/api/language#pipe
+    &#34;&#34;&#34;
+    if as_tuples:
+        texts = cast(Iterable[Tuple[Union[str, Doc], _AnyContext]], texts)
+        docs_with_contexts = (
+            self._ensure_doc_with_context(text, context) for text, context in texts
+        )
+        docs = self.pipe(
+            docs_with_contexts,
+            batch_size=batch_size,
+            disable=disable,
+            n_process=n_process,
+            component_cfg=component_cfg,
+        )
+        for doc in docs:
+            context = doc._context
+            doc._context = None
+            yield (doc, context)
+        return
+
+    texts = cast(Iterable[Union[str, Doc]], texts)
+
+    # Set argument defaults
+    if n_process == -1:
+        n_process = mp.cpu_count()
+    if component_cfg is None:
+        component_cfg = {}
+    if batch_size is None:
+        batch_size = self.batch_size
+
+    pipes = (
+        []
+    )  # contains functools.partial objects to easily create multiprocess worker.
+    for name, proc in self.pipeline:
+        if name in disable:
+            continue
+        kwargs = component_cfg.get(name, {})
+        # Allow component_cfg to overwrite the top-level kwargs.
+        kwargs.setdefault(&#34;batch_size&#34;, batch_size)
+        f = functools.partial(
+            _pipe,
+            proc=proc,
+            name=name,
+            kwargs=kwargs,
+            default_error_handler=self.default_error_handler,
+        )
+        pipes.append(f)
+
+    if n_process != 1:
+        if self._has_gpu_model(disable):
+            warnings.warn(Warnings.W114)
+
+        docs = self._multiprocessing_pipe(texts, pipes, n_process, batch_size)
+    else:
+        # if n_process == 1, no processes are forked.
+        docs = (self._ensure_doc(text) for text in texts)
+        for pipe in pipes:
+            docs = pipe(docs)
+    for doc in docs:
+        yield doc</code></pre>
+</details>
+<div class="desc"><p>Process texts as a stream, and yield <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code> objects in order.</p>
+<p>texts (Iterable[Union[str, Doc]]): A sequence of texts or docs to
+process.
+as_tuples (bool): If set to True, inputs should be a sequence of
+(text, context) tuples. Output will then be a sequence of
+(doc, context) tuples. Defaults to False.
+batch_size (Optional[int]): The number of texts to buffer.
+disable (List[str]): Names of the pipeline components to disable.
+component_cfg (Dict[str, Dict]): An optional dictionary with extra keyword
+arguments for specific components.
+n_process (int): Number of processors to process texts. If -1, set <code>multiprocessing.cpu_count()</code>.
+YIELDS (Doc): Documents in the order of the original text.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#pipe">https://spacy.io/api/language#pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.rehearse"><code class="name flex">
+<span>def <span class="ident">rehearse</span></span>(<span>self,<br>examples: Iterable[spacy.training.example.Example],<br>*,<br>sgd: thinc.optimizers.Optimizer | None = None,<br>losses: Dict[str, float] | None = None,<br>component_cfg: Dict[str, Dict[str, Any]] | None = None,<br>exclude: Iterable[str] = []) ‑> Dict[str, float]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def rehearse(
+    self,
+    examples: Iterable[Example],
+    *,
+    sgd: Optional[Optimizer] = None,
+    losses: Optional[Dict[str, float]] = None,
+    component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+    exclude: Iterable[str] = SimpleFrozenList(),
+) -&gt; Dict[str, float]:
+    &#34;&#34;&#34;Make a &#34;rehearsal&#34; update to the models in the pipeline, to prevent
+    forgetting. Rehearsal updates run an initial copy of the model over some
+    data, and update the model so its current predictions are more like the
+    initial ones. This is useful for keeping a pretrained model on-track,
+    even if you&#39;re updating it with a smaller set of examples.
+
+    examples (Iterable[Example]): A batch of `Example` objects.
+    sgd (Optional[Optimizer]): An optimizer.
+    component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
+        components, keyed by component name.
+    exclude (Iterable[str]): Names of components that shouldn&#39;t be updated.
+    RETURNS (dict): Results from the update.
+
+    EXAMPLE:
+        &gt;&gt;&gt; raw_text_batches = minibatch(raw_texts)
+        &gt;&gt;&gt; for labelled_batch in minibatch(examples):
+        &gt;&gt;&gt;     nlp.update(labelled_batch)
+        &gt;&gt;&gt;     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
+        &gt;&gt;&gt;     nlp.rehearse(raw_batch)
+
+    DOCS: https://spacy.io/api/language#rehearse
+    &#34;&#34;&#34;
+    if losses is None:
+        losses = {}
+    if isinstance(examples, list) and len(examples) == 0:
+        return losses
+    validate_examples(examples, &#34;Language.rehearse&#34;)
+    if sgd is None:
+        if self._optimizer is None:
+            self._optimizer = self.create_optimizer()
+        sgd = self._optimizer
+    pipes = list(self.pipeline)
+    random.shuffle(pipes)
+    if component_cfg is None:
+        component_cfg = {}
+    grads = {}
+
+    def get_grads(key, W, dW):
+        grads[key] = (W, dW)
+        return W, dW
+
+    get_grads.learn_rate = sgd.learn_rate  # type: ignore[attr-defined, union-attr]
+    get_grads.b1 = sgd.b1  # type: ignore[attr-defined, union-attr]
+    get_grads.b2 = sgd.b2  # type: ignore[attr-defined, union-attr]
+    for name, proc in pipes:
+        if name in exclude or not hasattr(proc, &#34;rehearse&#34;):
+            continue
+        grads = {}
+        proc.rehearse(  # type: ignore[attr-defined]
+            examples, sgd=get_grads, losses=losses, **component_cfg.get(name, {})
+        )
+    for key, (W, dW) in grads.items():
+        sgd(key, W, dW)  # type: ignore[call-arg, misc]
+    return losses</code></pre>
+</details>
+<div class="desc"><p>Make a "rehearsal" update to the models in the pipeline, to prevent
+forgetting. Rehearsal updates run an initial copy of the model over some
+data, and update the model so its current predictions are more like the
+initial ones. This is useful for keeping a pretrained model on-track,
+even if you're updating it with a smaller set of examples.</p>
+<p>examples (Iterable[Example]): A batch of <code>Example</code> objects.
+sgd (Optional[Optimizer]): An optimizer.
+component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
+components, keyed by component name.
+exclude (Iterable[str]): Names of components that shouldn't be updated.
+RETURNS (dict): Results from the update.</p>
+<h2 id="example">Example</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; raw_text_batches = minibatch(raw_texts)
+&gt;&gt;&gt; for labelled_batch in minibatch(examples):
+&gt;&gt;&gt;     nlp.update(labelled_batch)
+&gt;&gt;&gt;     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
+&gt;&gt;&gt;     nlp.rehearse(raw_batch)
+</code></pre>
+<p>DOCS: <a href="https://spacy.io/api/language#rehearse">https://spacy.io/api/language#rehearse</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.remove_pipe"><code class="name flex">
+<span>def <span class="ident">remove_pipe</span></span>(<span>self, name: str) ‑> Tuple[str, Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def remove_pipe(self, name: str) -&gt; Tuple[str, PipeCallable]:
+    &#34;&#34;&#34;Remove a component from the pipeline.
+
+    name (str): Name of the component to remove.
+    RETURNS (Tuple[str, Callable[[Doc], Doc]]): A `(name, component)` tuple of the removed component.
+
+    DOCS: https://spacy.io/api/language#remove_pipe
+    &#34;&#34;&#34;
+    if name not in self.component_names:
+        raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
+    removed = self._components.pop(self.component_names.index(name))
+    # We&#39;re only removing the component itself from the metas/configs here
+    # because factory may be used for something else
+    self._pipe_meta.pop(name)
+    self._pipe_configs.pop(name)
+    self.meta.get(&#34;_sourced_vectors_hashes&#34;, {}).pop(name, None)
+    # Make sure name is removed from the [initialize] config
+    if name in self._config[&#34;initialize&#34;][&#34;components&#34;]:
+        self._config[&#34;initialize&#34;][&#34;components&#34;].pop(name)
+    # Make sure the name is also removed from the set of disabled components
+    if name in self.disabled:
+        self._disabled.remove(name)
+    self._link_components()
+    return removed</code></pre>
+</details>
+<div class="desc"><p>Remove a component from the pipeline.</p>
+<p>name (str): Name of the component to remove.
+RETURNS (Tuple[str, Callable[[Doc], Doc]]): A <code>(name, component)</code> tuple of the removed component.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#remove_pipe">https://spacy.io/api/language#remove_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.rename_pipe"><code class="name flex">
+<span>def <span class="ident">rename_pipe</span></span>(<span>self, old_name: str, new_name: str) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def rename_pipe(self, old_name: str, new_name: str) -&gt; None:
+    &#34;&#34;&#34;Rename a pipeline component.
+
+    old_name (str): Name of the component to rename.
+    new_name (str): New name of the component.
+
+    DOCS: https://spacy.io/api/language#rename_pipe
+    &#34;&#34;&#34;
+    if old_name not in self.component_names:
+        raise ValueError(
+            Errors.E001.format(name=old_name, opts=self.component_names)
+        )
+    if new_name in self.component_names:
+        raise ValueError(
+            Errors.E007.format(name=new_name, opts=self.component_names)
+        )
+    i = self.component_names.index(old_name)
+    self._components[i] = (new_name, self._components[i][1])
+    self._pipe_meta[new_name] = self._pipe_meta.pop(old_name)
+    self._pipe_configs[new_name] = self._pipe_configs.pop(old_name)
+    # Make sure [initialize] config is adjusted
+    if old_name in self._config[&#34;initialize&#34;][&#34;components&#34;]:
+        init_cfg = self._config[&#34;initialize&#34;][&#34;components&#34;].pop(old_name)
+        self._config[&#34;initialize&#34;][&#34;components&#34;][new_name] = init_cfg
+    self._link_components()</code></pre>
+</details>
+<div class="desc"><p>Rename a pipeline component.</p>
+<p>old_name (str): Name of the component to rename.
+new_name (str): New name of the component.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#rename_pipe">https://spacy.io/api/language#rename_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.replace_listeners"><code class="name flex">
+<span>def <span class="ident">replace_listeners</span></span>(<span>self, tok2vec_name: str, pipe_name: str, listeners: Iterable[str]) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def replace_listeners(
+    self,
+    tok2vec_name: str,
+    pipe_name: str,
+    listeners: Iterable[str],
+) -&gt; None:
+    &#34;&#34;&#34;Find listener layers (connecting to a token-to-vector embedding
+    component) of a given pipeline component model and replace
+    them with a standalone copy of the token-to-vector layer. This can be
+    useful when training a pipeline with components sourced from an existing
+    pipeline: if multiple components (e.g. tagger, parser, NER) listen to
+    the same tok2vec component, but some of them are frozen and not updated,
+    their performance may degrade significantly as the tok2vec component is
+    updated with new data. To prevent this, listeners can be replaced with
+    a standalone tok2vec layer that is owned by the component and doesn&#39;t
+    change if the component isn&#39;t updated.
+
+    tok2vec_name (str): Name of the token-to-vector component, typically
+        &#34;tok2vec&#34; or &#34;transformer&#34;.
+    pipe_name (str): Name of pipeline component to replace listeners for.
+    listeners (Iterable[str]): The paths to the listeners, relative to the
+        component config, e.g. [&#34;model.tok2vec&#34;]. Typically, implementations
+        will only connect to one tok2vec component, [model.tok2vec], but in
+        theory, custom models can use multiple listeners. The value here can
+        either be an empty list to not replace any listeners, or a complete
+        (!) list of the paths to all listener layers used by the model.
+
+    DOCS: https://spacy.io/api/language#replace_listeners
+    &#34;&#34;&#34;
+    if tok2vec_name not in self.pipe_names:
+        err = Errors.E889.format(
+            tok2vec=tok2vec_name,
+            name=pipe_name,
+            unknown=tok2vec_name,
+            opts=&#34;, &#34;.join(self.pipe_names),
+        )
+        raise ValueError(err)
+    if pipe_name not in self.pipe_names:
+        err = Errors.E889.format(
+            tok2vec=tok2vec_name,
+            name=pipe_name,
+            unknown=pipe_name,
+            opts=&#34;, &#34;.join(self.pipe_names),
+        )
+        raise ValueError(err)
+    tok2vec = self.get_pipe(tok2vec_name)
+    tok2vec_cfg = self.get_pipe_config(tok2vec_name)
+    if not isinstance(tok2vec, ty.ListenedToComponent):
+        raise ValueError(Errors.E888.format(name=tok2vec_name, pipe=type(tok2vec)))
+    tok2vec_model = tok2vec.model
+    pipe_listeners = tok2vec.listener_map.get(pipe_name, [])
+    pipe = self.get_pipe(pipe_name)
+    pipe_cfg = self._pipe_configs[pipe_name]
+    if listeners:
+        util.logger.debug(&#34;Replacing listeners of component &#39;%s&#39;&#34;, pipe_name)
+        if len(list(listeners)) != len(pipe_listeners):
+            # The number of listeners defined in the component model doesn&#39;t
+            # match the listeners to replace, so we won&#39;t be able to update
+            # the nodes and generate a matching config
+            err = Errors.E887.format(
+                name=pipe_name,
+                tok2vec=tok2vec_name,
+                paths=listeners,
+                n_listeners=len(pipe_listeners),
+            )
+            raise ValueError(err)
+        # Update the config accordingly by copying the tok2vec model to all
+        # sections defined in the listener paths
+        for listener_path in listeners:
+            # Check if the path actually exists in the config
+            try:
+                util.dot_to_object(pipe_cfg, listener_path)
+            except KeyError:
+                err = Errors.E886.format(
+                    name=pipe_name, tok2vec=tok2vec_name, path=listener_path
+                )
+                raise ValueError(err)
+            new_config = tok2vec_cfg[&#34;model&#34;]
+            if &#34;replace_listener_cfg&#34; in tok2vec_model.attrs:
+                replace_func = tok2vec_model.attrs[&#34;replace_listener_cfg&#34;]
+                new_config = replace_func(
+                    tok2vec_cfg[&#34;model&#34;], pipe_cfg[&#34;model&#34;][&#34;tok2vec&#34;]
+                )
+            util.set_dot_to_object(pipe_cfg, listener_path, new_config)
+        # Go over the listener layers and replace them
+        for listener in pipe_listeners:
+            new_model = tok2vec_model.copy()
+            replace_listener_func = tok2vec_model.attrs.get(&#34;replace_listener&#34;)
+            if replace_listener_func is not None:
+                # Pass the extra args to the callback without breaking compatibility with
+                # old library versions that only expect a single parameter.
+                num_params = len(
+                    inspect.signature(replace_listener_func).parameters
+                )
+                if num_params == 1:
+                    new_model = replace_listener_func(new_model)
+                elif num_params == 3:
+                    new_model = replace_listener_func(new_model, listener, tok2vec)
+                else:
+                    raise ValueError(Errors.E1055.format(num_params=num_params))
+
+            util.replace_model_node(pipe.model, listener, new_model)  # type: ignore[attr-defined]
+            tok2vec.remove_listener(listener, pipe_name)</code></pre>
+</details>
+<div class="desc"><p>Find listener layers (connecting to a token-to-vector embedding
+component) of a given pipeline component model and replace
+them with a standalone copy of the token-to-vector layer. This can be
+useful when training a pipeline with components sourced from an existing
+pipeline: if multiple components (e.g. tagger, parser, NER) listen to
+the same tok2vec component, but some of them are frozen and not updated,
+their performance may degrade significantly as the tok2vec component is
+updated with new data. To prevent this, listeners can be replaced with
+a standalone tok2vec layer that is owned by the component and doesn't
+change if the component isn't updated.</p>
+<p>tok2vec_name (str): Name of the token-to-vector component, typically
+"tok2vec" or "transformer".
+pipe_name (str): Name of pipeline component to replace listeners for.
+listeners (Iterable[str]): The paths to the listeners, relative to the
+component config, e.g. ["model.tok2vec"]. Typically, implementations
+will only connect to one tok2vec component, [model.tok2vec], but in
+theory, custom models can use multiple listeners. The value here can
+either be an empty list to not replace any listeners, or a complete
+(!) list of the paths to all listener layers used by the model.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#replace_listeners">https://spacy.io/api/language#replace_listeners</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.replace_pipe"><code class="name flex">
+<span>def <span class="ident">replace_pipe</span></span>(<span>self,<br>name: str,<br>factory_name: str,<br>*,<br>config: Dict[str, Any] = {},<br>validate: bool = True) ‑> Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def replace_pipe(
+    self,
+    name: str,
+    factory_name: str,
+    *,
+    config: Dict[str, Any] = SimpleFrozenDict(),
+    validate: bool = True,
+) -&gt; PipeCallable:
+    &#34;&#34;&#34;Replace a component in the pipeline.
+
+    name (str): Name of the component to replace.
+    factory_name (str): Factory name of replacement component.
+    config (Optional[Dict[str, Any]]): Config parameters to use for this
+        component. Will be merged with default config, if available.
+    validate (bool): Whether to validate the component config against the
+        arguments and types expected by the factory.
+    RETURNS (Callable[[Doc], Doc]): The new pipeline component.
+
+    DOCS: https://spacy.io/api/language#replace_pipe
+    &#34;&#34;&#34;
+    if name not in self.component_names:
+        raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
+    if hasattr(factory_name, &#34;__call__&#34;):
+        err = Errors.E968.format(component=repr(factory_name), name=name)
+        raise ValueError(err)
+    # We need to delegate to Language.add_pipe here instead of just writing
+    # to Language.pipeline to make sure the configs are handled correctly
+    pipe_index = self.component_names.index(name)
+    self.remove_pipe(name)
+    if not len(self._components) or pipe_index == len(self._components):
+        # we have no components to insert before/after, or we&#39;re replacing the last component
+        return self.add_pipe(
+            factory_name, name=name, config=config, validate=validate
+        )
+    else:
+        return self.add_pipe(
+            factory_name,
+            name=name,
+            before=pipe_index,
+            config=config,
+            validate=validate,
+        )</code></pre>
+</details>
+<div class="desc"><p>Replace a component in the pipeline.</p>
+<p>name (str): Name of the component to replace.
+factory_name (str): Factory name of replacement component.
+config (Optional[Dict[str, Any]]): Config parameters to use for this
+component. Will be merged with default config, if available.
+validate (bool): Whether to validate the component config against the
+arguments and types expected by the factory.
+RETURNS (Callable[[Doc], Doc]): The new pipeline component.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#replace_pipe">https://spacy.io/api/language#replace_pipe</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.resume_training"><code class="name flex">
+<span>def <span class="ident">resume_training</span></span>(<span>self, *, sgd: thinc.optimizers.Optimizer | None = None) ‑> thinc.optimizers.Optimizer</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def resume_training(self, *, sgd: Optional[Optimizer] = None) -&gt; Optimizer:
+    &#34;&#34;&#34;Continue training a pretrained model.
+
+    Create and return an optimizer, and initialize &#34;rehearsal&#34; for any pipeline
+    component that has a .rehearse() method. Rehearsal is used to prevent
+    models from &#34;forgetting&#34; their initialized &#34;knowledge&#34;. To perform
+    rehearsal, collect samples of text you want the models to retain performance
+    on, and call nlp.rehearse() with a batch of Example objects.
+
+    RETURNS (Optimizer): The optimizer.
+
+    DOCS: https://spacy.io/api/language#resume_training
+    &#34;&#34;&#34;
+    ops = get_current_ops()
+    if self.vocab.vectors.shape[1] &gt;= 1:
+        self.vocab.vectors.to_ops(ops)
+    for name, proc in self.pipeline:
+        if hasattr(proc, &#34;_rehearsal_model&#34;):
+            proc._rehearsal_model = deepcopy(proc.model)  # type: ignore[attr-defined]
+    if sgd is not None:
+        self._optimizer = sgd
+    elif self._optimizer is None:
+        self._optimizer = self.create_optimizer()
+    return self._optimizer</code></pre>
+</details>
+<div class="desc"><p>Continue training a pretrained model.</p>
+<p>Create and return an optimizer, and initialize "rehearsal" for any pipeline
+component that has a .rehearse() method. Rehearsal is used to prevent
+models from "forgetting" their initialized "knowledge". To perform
+rehearsal, collect samples of text you want the models to retain performance
+on, and call nlp.rehearse() with a batch of Example objects.</p>
+<p>RETURNS (Optimizer): The optimizer.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#resume_training">https://spacy.io/api/language#resume_training</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.select_pipes"><code class="name flex">
+<span>def <span class="ident">select_pipes</span></span>(<span>self,<br>*,<br>disable: str | Iterable[str] | None = None,<br>enable: str | Iterable[str] | None = None) ‑> spacy.language.DisabledPipes</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def select_pipes(
+    self,
+    *,
+    disable: Optional[Union[str, Iterable[str]]] = None,
+    enable: Optional[Union[str, Iterable[str]]] = None,
+) -&gt; &#34;DisabledPipes&#34;:
+    &#34;&#34;&#34;Disable one or more pipeline components. If used as a context
+    manager, the pipeline will be restored to the initial state at the end
+    of the block. Otherwise, a DisabledPipes object is returned, that has
+    a `.restore()` method you can use to undo your changes.
+
+    disable (str or iterable): The name(s) of the pipes to disable
+    enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
+
+    DOCS: https://spacy.io/api/language#select_pipes
+    &#34;&#34;&#34;
+    if enable is None and disable is None:
+        raise ValueError(Errors.E991)
+    if isinstance(disable, str):
+        disable = [disable]
+    if enable is not None:
+        if isinstance(enable, str):
+            enable = [enable]
+        to_disable = [pipe for pipe in self.pipe_names if pipe not in enable]
+        # raise an error if the enable and disable keywords are not consistent
+        if disable is not None and disable != to_disable:
+            raise ValueError(
+                Errors.E992.format(
+                    enable=enable, disable=disable, names=self.pipe_names
+                )
+            )
+        disable = to_disable
+    assert disable is not None
+    # DisabledPipes will restore the pipes in &#39;disable&#39; when it&#39;s done, so we need to exclude
+    # those pipes that were already disabled.
+    disable = [d for d in disable if d not in self._disabled]
+    return DisabledPipes(self, disable)</code></pre>
+</details>
+<div class="desc"><p>Disable one or more pipeline components. If used as a context
+manager, the pipeline will be restored to the initial state at the end
+of the block. Otherwise, a DisabledPipes object is returned, that has
+a <code>.restore()</code> method you can use to undo your changes.</p>
+<p>disable (str or iterable): The name(s) of the pipes to disable
+enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled</p>
+<p>DOCS: <a href="https://spacy.io/api/language#select_pipes">https://spacy.io/api/language#select_pipes</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.set_error_handler"><code class="name flex">
+<span>def <span class="ident">set_error_handler</span></span>(<span>self,<br>error_handler: Callable[[str, Callable[[spacy.tokens.doc.Doc], spacy.tokens.doc.Doc], List[spacy.tokens.doc.Doc], Exception], NoReturn])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def set_error_handler(
+    self,
+    error_handler: Callable[[str, PipeCallable, List[Doc], Exception], NoReturn],
+):
+    &#34;&#34;&#34;Set an error handler object for all the components in the pipeline
+    that implement a set_error_handler function.
+
+    error_handler (Callable[[str, Callable[[Doc], Doc], List[Doc], Exception], NoReturn]):
+        Function that deals with a failing batch of documents. This callable
+        function should take in the component&#39;s name, the component itself,
+        the offending batch of documents, and the exception that was thrown.
+    DOCS: https://spacy.io/api/language#set_error_handler
+    &#34;&#34;&#34;
+    self.default_error_handler = error_handler
+    for name, pipe in self.pipeline:
+        if hasattr(pipe, &#34;set_error_handler&#34;):
+            pipe.set_error_handler(error_handler)</code></pre>
+</details>
+<div class="desc"><p>Set an error handler object for all the components in the pipeline
+that implement a set_error_handler function.</p>
+<p>error_handler (Callable[[str, Callable[[Doc], Doc], List[Doc], Exception], NoReturn]):
+Function that deals with a failing batch of documents. This callable
+function should take in the component's name, the component itself,
+the offending batch of documents, and the exception that was thrown.
+DOCS: <a href="https://spacy.io/api/language#set_error_handler">https://spacy.io/api/language#set_error_handler</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.to_bytes"><code class="name flex">
+<span>def <span class="ident">to_bytes</span></span>(<span>self, *, exclude: Iterable[str] = []) ‑> bytes</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -&gt; bytes:
+    &#34;&#34;&#34;Serialize the current state to a binary string.
+
+    exclude (Iterable[str]): Names of components or serialization fields to exclude.
+    RETURNS (bytes): The serialized form of the `Language` object.
+
+    DOCS: https://spacy.io/api/language#to_bytes
+    &#34;&#34;&#34;
+    serializers: Dict[str, Callable[[], bytes]] = {}
+    serializers[&#34;vocab&#34;] = lambda: self.vocab.to_bytes(exclude=exclude)
+    serializers[&#34;tokenizer&#34;] = lambda: self.tokenizer.to_bytes(exclude=[&#34;vocab&#34;])  # type: ignore[union-attr]
+    serializers[&#34;meta.json&#34;] = lambda: srsly.json_dumps(
+        _replace_numpy_floats(self.meta)
+    )
+    serializers[&#34;config.cfg&#34;] = lambda: self.config.to_bytes()
+    for name, proc in self._components:
+        if name in exclude:
+            continue
+        if not hasattr(proc, &#34;to_bytes&#34;):
+            continue
+        serializers[name] = lambda proc=proc: proc.to_bytes(exclude=[&#34;vocab&#34;])  # type: ignore[misc]
+    return util.to_bytes(serializers, exclude)</code></pre>
+</details>
+<div class="desc"><p>Serialize the current state to a binary string.</p>
+<p>exclude (Iterable[str]): Names of components or serialization fields to exclude.
+RETURNS (bytes): The serialized form of the <code><a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a></code> object.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#to_bytes">https://spacy.io/api/language#to_bytes</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.to_disk"><code class="name flex">
+<span>def <span class="ident">to_disk</span></span>(<span>self, path: str | pathlib.Path, *, exclude: Iterable[str] = []) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def to_disk(
+    self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
+) -&gt; None:
+    &#34;&#34;&#34;Save the current state to a directory.  If a model is loaded, this
+    will include the model.
+
+    path (str / Path): Path to a directory, which will be created if
+        it doesn&#39;t exist.
+    exclude (Iterable[str]): Names of components or serialization fields to exclude.
+
+    DOCS: https://spacy.io/api/language#to_disk
+    &#34;&#34;&#34;
+    path = util.ensure_path(path)
+    serializers = {}
+    serializers[&#34;tokenizer&#34;] = lambda p: self.tokenizer.to_disk(  # type: ignore[union-attr]
+        p, exclude=[&#34;vocab&#34;]
+    )
+    serializers[&#34;meta.json&#34;] = lambda p: srsly.write_json(
+        p, _replace_numpy_floats(self.meta)
+    )
+    serializers[&#34;config.cfg&#34;] = lambda p: self.config.to_disk(p)
+    for name, proc in self._components:
+        if name in exclude:
+            continue
+        if not hasattr(proc, &#34;to_disk&#34;):
+            continue
+        serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=[&#34;vocab&#34;])  # type: ignore[misc]
+    serializers[&#34;vocab&#34;] = lambda p: self.vocab.to_disk(p, exclude=exclude)
+    util.to_disk(path, serializers, exclude)</code></pre>
+</details>
+<div class="desc"><p>Save the current state to a directory.
+If a model is loaded, this
+will include the model.</p>
+<p>path (str / Path): Path to a directory, which will be created if
+it doesn't exist.
+exclude (Iterable[str]): Names of components or serialization fields to exclude.</p>
+<p>DOCS: <a href="https://spacy.io/api/language#to_disk">https://spacy.io/api/language#to_disk</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.update"><code class="name flex">
+<span>def <span class="ident">update</span></span>(<span>self,<br>examples: Iterable[spacy.training.example.Example],<br>*,<br>drop: float = 0.0,<br>sgd: thinc.optimizers.Optimizer | None = None,<br>losses: Dict[str, float] | None = None,<br>component_cfg: Dict[str, Dict[str, Any]] | None = None,<br>exclude: Iterable[str] = [],<br>annotates: Iterable[str] = [])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def update(
+    self,
+    examples: Iterable[Example],
+    _: Optional[Any] = None,
+    *,
+    drop: float = 0.0,
+    sgd: Optional[Optimizer] = None,
+    losses: Optional[Dict[str, float]] = None,
+    component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+    exclude: Iterable[str] = SimpleFrozenList(),
+    annotates: Iterable[str] = SimpleFrozenList(),
+):
+    &#34;&#34;&#34;Update the models in the pipeline.
+
+    examples (Iterable[Example]): A batch of examples
+    _: Should not be set - serves to catch backwards-incompatible scripts.
+    drop (float): The dropout rate.
+    sgd (Optimizer): An optimizer.
+    losses (Dict[str, float]): Dictionary to update with the loss, keyed by
+        component.
+    component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
+        components, keyed by component name.
+    exclude (Iterable[str]): Names of components that shouldn&#39;t be updated.
+    annotates (Iterable[str]): Names of components that should set
+        annotations on the predicted examples after updating.
+    RETURNS (Dict[str, float]): The updated losses dictionary
+
+    DOCS: https://spacy.io/api/language#update
+    &#34;&#34;&#34;
+    if _ is not None:
+        raise ValueError(Errors.E989)
+    if losses is None:
+        losses = {}
+    if isinstance(examples, list) and len(examples) == 0:
+        return losses
+    validate_examples(examples, &#34;Language.update&#34;)
+    examples = _copy_examples(examples)
+    if sgd is None:
+        if self._optimizer is None:
+            self._optimizer = self.create_optimizer()
+        sgd = self._optimizer
+    if component_cfg is None:
+        component_cfg = {}
+    pipe_kwargs = {}
+    for i, (name, proc) in enumerate(self.pipeline):
+        component_cfg.setdefault(name, {})
+        pipe_kwargs[name] = deepcopy(component_cfg[name])
+        component_cfg[name].setdefault(&#34;drop&#34;, drop)
+        pipe_kwargs[name].setdefault(&#34;batch_size&#34;, self.batch_size)
+    for name, proc in self.pipeline:
+        # ignore statements are used here because mypy ignores hasattr
+        if name not in exclude and hasattr(proc, &#34;update&#34;):
+            proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore
+        if sgd not in (None, False):
+            if (
+                name not in exclude
+                and isinstance(proc, ty.TrainableComponent)
+                and proc.is_trainable
+                and proc.model not in (True, False, None)
+            ):
+                proc.finish_update(sgd)
+        if name in annotates:
+            for doc, eg in zip(
+                _pipe(
+                    (eg.predicted for eg in examples),
+                    proc=proc,
+                    name=name,
+                    default_error_handler=self.default_error_handler,
+                    kwargs=pipe_kwargs[name],
+                ),
+                examples,
+            ):
+                eg.predicted = doc
+    return _replace_numpy_floats(losses)</code></pre>
+</details>
+<div class="desc"><p>Update the models in the pipeline.</p>
+<p>examples (Iterable[Example]): A batch of examples
+_: Should not be set - serves to catch backwards-incompatible scripts.
+drop (float): The dropout rate.
+sgd (Optimizer): An optimizer.
+losses (Dict[str, float]): Dictionary to update with the loss, keyed by
+component.
+component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
+components, keyed by component name.
+exclude (Iterable[str]): Names of components that shouldn't be updated.
+annotates (Iterable[str]): Names of components that should set
+annotations on the predicted examples after updating.
+RETURNS (Dict[str, float]): The updated losses dictionary</p>
+<p>DOCS: <a href="https://spacy.io/api/language#update">https://spacy.io/api/language#update</a></p></div>
+</dd>
+<dt id="lang_main.types.Language.use_params"><code class="name flex">
+<span>def <span class="ident">use_params</span></span>(<span>self, params: dict | None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@contextmanager
+def use_params(self, params: Optional[dict]):
+    &#34;&#34;&#34;Replace weights of models in the pipeline with those provided in the
+    params dictionary. Can be used as a contextmanager, in which case,
+    models go back to their original weights after the block.
+
+    params (dict): A dictionary of parameters keyed by model ID.
+
+    EXAMPLE:
+        &gt;&gt;&gt; with nlp.use_params(optimizer.averages):
+        &gt;&gt;&gt;     nlp.to_disk(&#34;/tmp/checkpoint&#34;)
+
+    DOCS: https://spacy.io/api/language#use_params
+    &#34;&#34;&#34;
+    if not params:
+        yield
+    else:
+        contexts = [
+            pipe.use_params(params)  # type: ignore[attr-defined]
+            for name, pipe in self.pipeline
+            if hasattr(pipe, &#34;use_params&#34;) and hasattr(pipe, &#34;model&#34;)
+        ]
+        # TODO: Having trouble with contextlib
+        # Workaround: these aren&#39;t actually context managers atm.
+        for context in contexts:
+            try:
+                next(context)
+            except StopIteration:
+                pass
+        yield
+        for context in contexts:
+            try:
+                next(context)
+            except StopIteration:
+                pass</code></pre>
+</details>
+<div class="desc"><p>Replace weights of models in the pipeline with those provided in the
+params dictionary. Can be used as a contextmanager, in which case,
+models go back to their original weights after the block.</p>
+<p>params (dict): A dictionary of parameters keyed by model ID.</p>
+<h2 id="example">Example</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; with nlp.use_params(optimizer.averages):
+&gt;&gt;&gt;     nlp.to_disk(&quot;/tmp/checkpoint&quot;)
+</code></pre>
+<p>DOCS: <a href="https://spacy.io/api/language#use_params">https://spacy.io/api/language#use_params</a></p></div>
+</dd>
+</dl>
+</dd>
+<dt id="lang_main.types.SentenceTransformer"><code class="flex name class">
+<span>class <span class="ident">SentenceTransformer</span></span>
+<span>(</span><span>model_name_or_path: str | None = None,<br>modules: Iterable[nn.Module] | None = None,<br>device: str | None = None,<br>prompts: dict[str, str] | None = None,<br>default_prompt_name: str | None = None,<br>similarity_fn_name: str | SimilarityFunction | None = None,<br>cache_folder: str | None = None,<br>trust_remote_code: bool = False,<br>revision: str | None = None,<br>local_files_only: bool = False,<br>token: bool | str | None = None,<br>use_auth_token: bool | str | None = None,<br>truncate_dim: int | None = None,<br>model_kwargs: dict[str, Any] | None = None,<br>tokenizer_kwargs: dict[str, Any] | None = None,<br>config_kwargs: dict[str, Any] | None = None,<br>model_card_data: SentenceTransformerModelCardData | None = None,<br>backend: "Literal['torch', 'onnx', 'openvino']" = 'torch')</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class SentenceTransformer(nn.Sequential, FitMixin, PeftAdapterMixin):
+    &#34;&#34;&#34;
+    Loads or creates a SentenceTransformer model that can be used to map sentences / text to embeddings.
+
+    Args:
+        model_name_or_path (str, optional): If it is a filepath on disc, it loads the model from that path. If it is not a path,
+            it first tries to download a pre-trained SentenceTransformer model. If that fails, tries to construct a model
+            from the Hugging Face Hub with that name.
+        modules (Iterable[nn.Module], optional): A list of torch Modules that should be called sequentially, can be used to create custom
+            SentenceTransformer models from scratch.
+        device (str, optional): Device (like &#34;cuda&#34;, &#34;cpu&#34;, &#34;mps&#34;, &#34;npu&#34;) that should be used for computation. If None, checks if a GPU
+            can be used.
+        prompts (Dict[str, str], optional): A dictionary with prompts for the model. The key is the prompt name, the value is the prompt text.
+            The prompt text will be prepended before any text to encode. For example:
+            `{&#34;query&#34;: &#34;query: &#34;, &#34;passage&#34;: &#34;passage: &#34;}` or `{&#34;clustering&#34;: &#34;Identify the main category based on the
+            titles in &#34;}`.
+        default_prompt_name (str, optional): The name of the prompt that should be used by default. If not set,
+            no prompt will be applied.
+        similarity_fn_name (str or SimilarityFunction, optional): The name of the similarity function to use. Valid options are &#34;cosine&#34;, &#34;dot&#34;,
+            &#34;euclidean&#34;, and &#34;manhattan&#34;. If not set, it is automatically set to &#34;cosine&#34; if `similarity` or
+            `similarity_pairwise` are called while `model.similarity_fn_name` is still `None`.
+        cache_folder (str, optional): Path to store models. Can also be set by the SENTENCE_TRANSFORMERS_HOME environment variable.
+        trust_remote_code (bool, optional): Whether or not to allow for custom models defined on the Hub in their own modeling files.
+            This option should only be set to True for repositories you trust and in which you have read the code, as it
+            will execute code present on the Hub on your local machine.
+        revision (str, optional): The specific model version to use. It can be a branch name, a tag name, or a commit id,
+            for a stored model on Hugging Face.
+        local_files_only (bool, optional): Whether or not to only look at local files (i.e., do not try to download the model).
+        token (bool or str, optional): Hugging Face authentication token to download private models.
+        use_auth_token (bool or str, optional): Deprecated argument. Please use `token` instead.
+        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` does no truncation. Truncation is
+            only applicable during inference when :meth:`SentenceTransformer.encode` is called.
+        model_kwargs (Dict[str, Any], optional): Additional model configuration parameters to be passed to the Hugging Face Transformers model.
+            Particularly useful options are:
+
+            - ``torch_dtype``: Override the default `torch.dtype` and load the model under a specific `dtype`.
+              The different options are:
+
+                    1. ``torch.float16``, ``torch.bfloat16`` or ``torch.float``: load in a specified
+                    ``dtype``, ignoring the model&#39;s ``config.torch_dtype`` if one exists. If not specified - the model will
+                    get loaded in ``torch.float`` (fp32).
+
+                    2. ``&#34;auto&#34;`` - A ``torch_dtype`` entry in the ``config.json`` file of the model will be
+                    attempted to be used. If this entry isn&#39;t found then next check the ``dtype`` of the first weight in
+                    the checkpoint that&#39;s of a floating point type and use that as ``dtype``. This will load the model
+                    using the ``dtype`` it was saved in at the end of the training. It can&#39;t be used as an indicator of how
+                    the model was trained. Since it could be trained in one of half precision dtypes, but saved in fp32.
+            - ``attn_implementation``: The attention implementation to use in the model (if relevant). Can be any of
+              `&#34;eager&#34;` (manual implementation of the attention), `&#34;sdpa&#34;` (using `F.scaled_dot_product_attention
+              &lt;https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html&gt;`_),
+              or `&#34;flash_attention_2&#34;` (using `Dao-AILab/flash-attention &lt;https://github.com/Dao-AILab/flash-attention&gt;`_).
+              By default, if available, SDPA will be used for torch&gt;=2.1.1. The default is otherwise the manual `&#34;eager&#34;`
+              implementation.
+            - ``provider``: If backend is &#34;onnx&#34;, this is the provider to use for inference, for example &#34;CPUExecutionProvider&#34;,
+              &#34;CUDAExecutionProvider&#34;, etc. See https://onnxruntime.ai/docs/execution-providers/ for all ONNX execution providers.
+            - ``file_name``: If backend is &#34;onnx&#34; or &#34;openvino&#34;, this is the file name to load, useful for loading optimized
+              or quantized ONNX or OpenVINO models.
+            - ``export``: If backend is &#34;onnx&#34; or &#34;openvino&#34;, then this is a boolean flag specifying whether this model should
+              be exported to the backend. If not specified, the model will be exported only if the model repository or directory
+              does not already contain an exported model.
+
+            See the `PreTrainedModel.from_pretrained
+            &lt;https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained&gt;`_
+            documentation for more details.
+        tokenizer_kwargs (Dict[str, Any], optional): Additional tokenizer configuration parameters to be passed to the Hugging Face Transformers tokenizer.
+            See the `AutoTokenizer.from_pretrained
+            &lt;https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained&gt;`_
+            documentation for more details.
+        config_kwargs (Dict[str, Any], optional): Additional model configuration parameters to be passed to the Hugging Face Transformers config.
+            See the `AutoConfig.from_pretrained
+            &lt;https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoConfig.from_pretrained&gt;`_
+            documentation for more details.
+        model_card_data (:class:`~sentence_transformers.model_card.SentenceTransformerModelCardData`, optional): A model
+            card data object that contains information about the model. This is used to generate a model card when saving
+            the model. If not set, a default model card data object is created.
+        backend (str): The backend to use for inference. Can be one of &#34;torch&#34; (default), &#34;onnx&#34;, or &#34;openvino&#34;.
+            See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for benchmarking information
+            on the different backends.
+
+    Example:
+        ::
+
+            from sentence_transformers import SentenceTransformer
+
+            # Load a pre-trained SentenceTransformer model
+            model = SentenceTransformer(&#39;all-mpnet-base-v2&#39;)
+
+            # Encode some texts
+            sentences = [
+                &#34;The weather is lovely today.&#34;,
+                &#34;It&#39;s so sunny outside!&#34;,
+                &#34;He drove to the stadium.&#34;,
+            ]
+            embeddings = model.encode(sentences)
+            print(embeddings.shape)
+            # (3, 768)
+
+            # Get the similarity scores between all sentences
+            similarities = model.similarity(embeddings, embeddings)
+            print(similarities)
+            # tensor([[1.0000, 0.6817, 0.0492],
+            #         [0.6817, 1.0000, 0.0421],
+            #         [0.0492, 0.0421, 1.0000]])
+    &#34;&#34;&#34;
+
+    def __init__(
+        self,
+        model_name_or_path: str | None = None,
+        modules: Iterable[nn.Module] | None = None,
+        device: str | None = None,
+        prompts: dict[str, str] | None = None,
+        default_prompt_name: str | None = None,
+        similarity_fn_name: str | SimilarityFunction | None = None,
+        cache_folder: str | None = None,
+        trust_remote_code: bool = False,
+        revision: str | None = None,
+        local_files_only: bool = False,
+        token: bool | str | None = None,
+        use_auth_token: bool | str | None = None,
+        truncate_dim: int | None = None,
+        model_kwargs: dict[str, Any] | None = None,
+        tokenizer_kwargs: dict[str, Any] | None = None,
+        config_kwargs: dict[str, Any] | None = None,
+        model_card_data: SentenceTransformerModelCardData | None = None,
+        backend: Literal[&#34;torch&#34;, &#34;onnx&#34;, &#34;openvino&#34;] = &#34;torch&#34;,
+    ) -&gt; None:
+        # Note: self._load_sbert_model can also update `self.prompts` and `self.default_prompt_name`
+        self.prompts = prompts or {}
+        self.default_prompt_name = default_prompt_name
+        self.similarity_fn_name = similarity_fn_name
+        self.trust_remote_code = trust_remote_code
+        self.truncate_dim = truncate_dim
+        self.model_card_data = model_card_data or SentenceTransformerModelCardData()
+        self.module_kwargs = None
+        self._model_card_vars = {}
+        self._model_card_text = None
+        self._model_config = {}
+        self.backend = backend
+        if use_auth_token is not None:
+            warnings.warn(
+                &#34;The `use_auth_token` argument is deprecated and will be removed in v4 of SentenceTransformers.&#34;,
+                FutureWarning,
+            )
+            if token is not None:
+                raise ValueError(
+                    &#34;`token` and `use_auth_token` are both specified. Please set only the argument `token`.&#34;
+                )
+            token = use_auth_token
+
+        if cache_folder is None:
+            cache_folder = os.getenv(&#34;SENTENCE_TRANSFORMERS_HOME&#34;)
+
+        if device is None:
+            device = get_device_name()
+            logger.info(f&#34;Use pytorch device_name: {device}&#34;)
+
+        if device == &#34;hpu&#34; and importlib.util.find_spec(&#34;optimum&#34;) is not None:
+            from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+            adapt_transformers_to_gaudi()
+
+        if model_name_or_path is not None and model_name_or_path != &#34;&#34;:
+            logger.info(f&#34;Load pretrained SentenceTransformer: {model_name_or_path}&#34;)
+
+            # Old models that don&#39;t belong to any organization
+            basic_transformer_models = [
+                &#34;albert-base-v1&#34;,
+                &#34;albert-base-v2&#34;,
+                &#34;albert-large-v1&#34;,
+                &#34;albert-large-v2&#34;,
+                &#34;albert-xlarge-v1&#34;,
+                &#34;albert-xlarge-v2&#34;,
+                &#34;albert-xxlarge-v1&#34;,
+                &#34;albert-xxlarge-v2&#34;,
+                &#34;bert-base-cased-finetuned-mrpc&#34;,
+                &#34;bert-base-cased&#34;,
+                &#34;bert-base-chinese&#34;,
+                &#34;bert-base-german-cased&#34;,
+                &#34;bert-base-german-dbmdz-cased&#34;,
+                &#34;bert-base-german-dbmdz-uncased&#34;,
+                &#34;bert-base-multilingual-cased&#34;,
+                &#34;bert-base-multilingual-uncased&#34;,
+                &#34;bert-base-uncased&#34;,
+                &#34;bert-large-cased-whole-word-masking-finetuned-squad&#34;,
+                &#34;bert-large-cased-whole-word-masking&#34;,
+                &#34;bert-large-cased&#34;,
+                &#34;bert-large-uncased-whole-word-masking-finetuned-squad&#34;,
+                &#34;bert-large-uncased-whole-word-masking&#34;,
+                &#34;bert-large-uncased&#34;,
+                &#34;camembert-base&#34;,
+                &#34;ctrl&#34;,
+                &#34;distilbert-base-cased-distilled-squad&#34;,
+                &#34;distilbert-base-cased&#34;,
+                &#34;distilbert-base-german-cased&#34;,
+                &#34;distilbert-base-multilingual-cased&#34;,
+                &#34;distilbert-base-uncased-distilled-squad&#34;,
+                &#34;distilbert-base-uncased-finetuned-sst-2-english&#34;,
+                &#34;distilbert-base-uncased&#34;,
+                &#34;distilgpt2&#34;,
+                &#34;distilroberta-base&#34;,
+                &#34;gpt2-large&#34;,
+                &#34;gpt2-medium&#34;,
+                &#34;gpt2-xl&#34;,
+                &#34;gpt2&#34;,
+                &#34;openai-gpt&#34;,
+                &#34;roberta-base-openai-detector&#34;,
+                &#34;roberta-base&#34;,
+                &#34;roberta-large-mnli&#34;,
+                &#34;roberta-large-openai-detector&#34;,
+                &#34;roberta-large&#34;,
+                &#34;t5-11b&#34;,
+                &#34;t5-3b&#34;,
+                &#34;t5-base&#34;,
+                &#34;t5-large&#34;,
+                &#34;t5-small&#34;,
+                &#34;transfo-xl-wt103&#34;,
+                &#34;xlm-clm-ende-1024&#34;,
+                &#34;xlm-clm-enfr-1024&#34;,
+                &#34;xlm-mlm-100-1280&#34;,
+                &#34;xlm-mlm-17-1280&#34;,
+                &#34;xlm-mlm-en-2048&#34;,
+                &#34;xlm-mlm-ende-1024&#34;,
+                &#34;xlm-mlm-enfr-1024&#34;,
+                &#34;xlm-mlm-enro-1024&#34;,
+                &#34;xlm-mlm-tlm-xnli15-1024&#34;,
+                &#34;xlm-mlm-xnli15-1024&#34;,
+                &#34;xlm-roberta-base&#34;,
+                &#34;xlm-roberta-large-finetuned-conll02-dutch&#34;,
+                &#34;xlm-roberta-large-finetuned-conll02-spanish&#34;,
+                &#34;xlm-roberta-large-finetuned-conll03-english&#34;,
+                &#34;xlm-roberta-large-finetuned-conll03-german&#34;,
+                &#34;xlm-roberta-large&#34;,
+                &#34;xlnet-base-cased&#34;,
+                &#34;xlnet-large-cased&#34;,
+            ]
+
+            if not os.path.exists(model_name_or_path):
+                # Not a path, load from hub
+                if &#34;\\&#34; in model_name_or_path or model_name_or_path.count(&#34;/&#34;) &gt; 1:
+                    raise ValueError(f&#34;Path {model_name_or_path} not found&#34;)
+
+                if &#34;/&#34; not in model_name_or_path and model_name_or_path.lower() not in basic_transformer_models:
+                    # A model from sentence-transformers
+                    model_name_or_path = __MODEL_HUB_ORGANIZATION__ + &#34;/&#34; + model_name_or_path
+
+            if is_sentence_transformer_model(
+                model_name_or_path,
+                token,
+                cache_folder=cache_folder,
+                revision=revision,
+                local_files_only=local_files_only,
+            ):
+                modules, self.module_kwargs = self._load_sbert_model(
+                    model_name_or_path,
+                    token=token,
+                    cache_folder=cache_folder,
+                    revision=revision,
+                    trust_remote_code=trust_remote_code,
+                    local_files_only=local_files_only,
+                    model_kwargs=model_kwargs,
+                    tokenizer_kwargs=tokenizer_kwargs,
+                    config_kwargs=config_kwargs,
+                )
+            else:
+                modules = self._load_auto_model(
+                    model_name_or_path,
+                    token=token,
+                    cache_folder=cache_folder,
+                    revision=revision,
+                    trust_remote_code=trust_remote_code,
+                    local_files_only=local_files_only,
+                    model_kwargs=model_kwargs,
+                    tokenizer_kwargs=tokenizer_kwargs,
+                    config_kwargs=config_kwargs,
+                )
+
+        if modules is not None and not isinstance(modules, OrderedDict):
+            modules = OrderedDict([(str(idx), module) for idx, module in enumerate(modules)])
+
+        super().__init__(modules)
+
+        # Ensure all tensors in the model are of the same dtype as the first tensor
+        # This is necessary if the first module has been given a lower precision via
+        # model_kwargs[&#34;torch_dtype&#34;]. The rest of the model should be loaded in the same dtype
+        # See #2887 for more details
+        try:
+            dtype = next(self.parameters()).dtype
+            self.to(dtype)
+        except StopIteration:
+            pass
+
+        self.to(device)
+        self.is_hpu_graph_enabled = False
+
+        if self.default_prompt_name is not None and self.default_prompt_name not in self.prompts:
+            raise ValueError(
+                f&#34;Default prompt name &#39;{self.default_prompt_name}&#39; not found in the configured prompts &#34;
+                f&#34;dictionary with keys {list(self.prompts.keys())!r}.&#34;
+            )
+
+        if self.prompts:
+            logger.info(f&#34;{len(self.prompts)} prompts are loaded, with the keys: {list(self.prompts.keys())}&#34;)
+        if self.default_prompt_name:
+            logger.warning(
+                f&#34;Default prompt name is set to &#39;{self.default_prompt_name}&#39;. &#34;
+                &#34;This prompt will be applied to all `encode()` calls, except if `encode()` &#34;
+                &#34;is called with `prompt` or `prompt_name` parameters.&#34;
+            )
+
+        # Ideally, INSTRUCTOR models should set `include_prompt=False` in their pooling configuration, but
+        # that would be a breaking change for users currently using the InstructorEmbedding project.
+        # So, instead we hardcode setting it for the main INSTRUCTOR models, and otherwise give a warning if we
+        # suspect the user is using an INSTRUCTOR model.
+        if model_name_or_path in (&#34;hkunlp/instructor-base&#34;, &#34;hkunlp/instructor-large&#34;, &#34;hkunlp/instructor-xl&#34;):
+            self.set_pooling_include_prompt(include_prompt=False)
+        elif (
+            model_name_or_path
+            and &#34;/&#34; in model_name_or_path
+            and &#34;instructor&#34; in model_name_or_path.split(&#34;/&#34;)[1].lower()
+        ):
+            if any([module.include_prompt for module in self if isinstance(module, Pooling)]):
+                logger.warning(
+                    &#34;Instructor models require `include_prompt=False` in the pooling configuration. &#34;
+                    &#34;Either update the model configuration or call `model.set_pooling_include_prompt(False)` after loading the model.&#34;
+                )
+
+        # Pass the model to the model card data for later use in generating a model card upon saving this model
+        self.model_card_data.register_model(self)
+
+    def get_backend(self) -&gt; Literal[&#34;torch&#34;, &#34;onnx&#34;, &#34;openvino&#34;]:
+        &#34;&#34;&#34;Return the backend used for inference, which can be one of &#34;torch&#34;, &#34;onnx&#34;, or &#34;openvino&#34;.
+
+        Returns:
+            str: The backend used for inference.
+        &#34;&#34;&#34;
+        return self.backend
+
+    @overload
+    def encode(
+        self,
+        sentences: str,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;] | None = ...,
+        precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = ...,
+        convert_to_numpy: Literal[False] = ...,
+        convert_to_tensor: Literal[False] = ...,
+        device: str = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -&gt; Tensor: ...
+
+    @overload
+    def encode(
+        self,
+        sentences: str | list[str],
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;] | None = ...,
+        precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = ...,
+        convert_to_numpy: Literal[True] = ...,
+        convert_to_tensor: Literal[False] = ...,
+        device: str = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -&gt; np.ndarray: ...
+
+    @overload
+    def encode(
+        self,
+        sentences: str | list[str],
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;] | None = ...,
+        precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = ...,
+        convert_to_numpy: bool = ...,
+        convert_to_tensor: Literal[True] = ...,
+        device: str = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -&gt; Tensor: ...
+
+    @overload
+    def encode(
+        self,
+        sentences: list[str] | np.ndarray,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;] | None = ...,
+        precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = ...,
+        convert_to_numpy: Literal[False] = ...,
+        convert_to_tensor: Literal[False] = ...,
+        device: str = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -&gt; list[Tensor]: ...
+
+    def encode(
+        self,
+        sentences: str | list[str],
+        prompt_name: str | None = None,
+        prompt: str | None = None,
+        batch_size: int = 32,
+        show_progress_bar: bool | None = None,
+        output_value: Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;] | None = &#34;sentence_embedding&#34;,
+        precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = &#34;float32&#34;,
+        convert_to_numpy: bool = True,
+        convert_to_tensor: bool = False,
+        device: str = None,
+        normalize_embeddings: bool = False,
+        **kwargs,
+    ) -&gt; list[Tensor] | np.ndarray | Tensor:
+        &#34;&#34;&#34;
+        Computes sentence embeddings.
+
+        Args:
+            sentences (Union[str, List[str]]): The sentences to embed.
+            prompt_name (Optional[str], optional): The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary,
+                which is either set in the constructor or loaded from the model configuration. For example if
+                ``prompt_name`` is &#34;query&#34; and the ``prompts`` is {&#34;query&#34;: &#34;query: &#34;, ...}, then the sentence &#34;What
+                is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34; because the sentence
+                is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
+            prompt (Optional[str], optional): The prompt to use for encoding. For example, if the prompt is &#34;query: &#34;, then the
+                sentence &#34;What is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34;
+                because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored. Defaults to None.
+            batch_size (int, optional): The batch size used for the computation. Defaults to 32.
+            show_progress_bar (bool, optional): Whether to output a progress bar when encode sentences. Defaults to None.
+            output_value (Optional[Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;]], optional): The type of embeddings to return:
+                &#34;sentence_embedding&#34; to get sentence embeddings, &#34;token_embeddings&#34; to get wordpiece token embeddings, and `None`,
+                to get all output values. Defaults to &#34;sentence_embedding&#34;.
+            precision (Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;], optional): The precision to use for the embeddings.
+                Can be &#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, or &#34;ubinary&#34;. All non-float32 precisions are quantized embeddings.
+                Quantized embeddings are smaller in size and faster to compute, but may have a lower accuracy. They are useful for
+                reducing the size of the embeddings of a corpus for semantic search, among other tasks. Defaults to &#34;float32&#34;.
+            convert_to_numpy (bool, optional): Whether the output should be a list of numpy vectors. If False, it is a list of PyTorch tensors.
+                Defaults to True.
+            convert_to_tensor (bool, optional): Whether the output should be one large tensor. Overwrites `convert_to_numpy`.
+                Defaults to False.
+            device (str, optional): Which :class:`torch.device` to use for the computation. Defaults to None.
+            normalize_embeddings (bool, optional): Whether to normalize returned vectors to have length 1. In that case,
+                the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.
+
+        Returns:
+            Union[List[Tensor], ndarray, Tensor]: By default, a 2d numpy array with shape [num_inputs, output_dimension] is returned.
+            If only one string input is provided, then the output is a 1d array with shape [output_dimension]. If ``convert_to_tensor``,
+            a torch Tensor is returned instead. If ``self.truncate_dim &lt;= output_dimension`` then output_dimension is ``self.truncate_dim``.
+
+        Example:
+            ::
+
+                from sentence_transformers import SentenceTransformer
+
+                # Load a pre-trained SentenceTransformer model
+                model = SentenceTransformer(&#39;all-mpnet-base-v2&#39;)
+
+                # Encode some texts
+                sentences = [
+                    &#34;The weather is lovely today.&#34;,
+                    &#34;It&#39;s so sunny outside!&#34;,
+                    &#34;He drove to the stadium.&#34;,
+                ]
+                embeddings = model.encode(sentences)
+                print(embeddings.shape)
+                # (3, 768)
+        &#34;&#34;&#34;
+        if self.device.type == &#34;hpu&#34; and not self.is_hpu_graph_enabled:
+            import habana_frameworks.torch as ht
+
+            ht.hpu.wrap_in_hpu_graph(self, disable_tensor_cache=True)
+            self.is_hpu_graph_enabled = True
+
+        self.eval()
+        if show_progress_bar is None:
+            show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)
+
+        if convert_to_tensor:
+            convert_to_numpy = False
+
+        if output_value != &#34;sentence_embedding&#34;:
+            convert_to_tensor = False
+            convert_to_numpy = False
+
+        input_was_string = False
+        if isinstance(sentences, str) or not hasattr(
+            sentences, &#34;__len__&#34;
+        ):  # Cast an individual sentence to a list with length 1
+            sentences = [sentences]
+            input_was_string = True
+
+        if prompt is None:
+            if prompt_name is not None:
+                try:
+                    prompt = self.prompts[prompt_name]
+                except KeyError:
+                    raise ValueError(
+                        f&#34;Prompt name &#39;{prompt_name}&#39; not found in the configured prompts dictionary with keys {list(self.prompts.keys())!r}.&#34;
+                    )
+            elif self.default_prompt_name is not None:
+                prompt = self.prompts.get(self.default_prompt_name, None)
+        else:
+            if prompt_name is not None:
+                logger.warning(
+                    &#34;Encode with either a `prompt`, a `prompt_name`, or neither, but not both. &#34;
+                    &#34;Ignoring the `prompt_name` in favor of `prompt`.&#34;
+                )
+
+        extra_features = {}
+        if prompt is not None:
+            sentences = [prompt + sentence for sentence in sentences]
+
+            # Some models (e.g. INSTRUCTOR, GRIT) require removing the prompt before pooling
+            # Tracking the prompt length allow us to remove the prompt during pooling
+            tokenized_prompt = self.tokenize([prompt])
+            if &#34;input_ids&#34; in tokenized_prompt:
+                extra_features[&#34;prompt_length&#34;] = tokenized_prompt[&#34;input_ids&#34;].shape[-1] - 1
+
+        if device is None:
+            device = self.device
+
+        self.to(device)
+
+        all_embeddings = []
+        length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
+        sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
+
+        for start_index in trange(0, len(sentences), batch_size, desc=&#34;Batches&#34;, disable=not show_progress_bar):
+            sentences_batch = sentences_sorted[start_index : start_index + batch_size]
+            features = self.tokenize(sentences_batch)
+            if self.device.type == &#34;hpu&#34;:
+                if &#34;input_ids&#34; in features:
+                    curr_tokenize_len = features[&#34;input_ids&#34;].shape
+                    additional_pad_len = 2 ** math.ceil(math.log2(curr_tokenize_len[1])) - curr_tokenize_len[1]
+                    features[&#34;input_ids&#34;] = torch.cat(
+                        (
+                            features[&#34;input_ids&#34;],
+                            torch.ones((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                        ),
+                        -1,
+                    )
+                    features[&#34;attention_mask&#34;] = torch.cat(
+                        (
+                            features[&#34;attention_mask&#34;],
+                            torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                        ),
+                        -1,
+                    )
+                    if &#34;token_type_ids&#34; in features:
+                        features[&#34;token_type_ids&#34;] = torch.cat(
+                            (
+                                features[&#34;token_type_ids&#34;],
+                                torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                            ),
+                            -1,
+                        )
+
+            features = batch_to_device(features, device)
+            features.update(extra_features)
+
+            with torch.no_grad():
+                out_features = self.forward(features, **kwargs)
+                if self.device.type == &#34;hpu&#34;:
+                    out_features = copy.deepcopy(out_features)
+
+                out_features[&#34;sentence_embedding&#34;] = truncate_embeddings(
+                    out_features[&#34;sentence_embedding&#34;], self.truncate_dim
+                )
+
+                if output_value == &#34;token_embeddings&#34;:
+                    embeddings = []
+                    for token_emb, attention in zip(out_features[output_value], out_features[&#34;attention_mask&#34;]):
+                        last_mask_id = len(attention) - 1
+                        while last_mask_id &gt; 0 and attention[last_mask_id].item() == 0:
+                            last_mask_id -= 1
+
+                        embeddings.append(token_emb[0 : last_mask_id + 1])
+                elif output_value is None:  # Return all outputs
+                    embeddings = []
+                    for sent_idx in range(len(out_features[&#34;sentence_embedding&#34;])):
+                        row = {name: out_features[name][sent_idx] for name in out_features}
+                        embeddings.append(row)
+                else:  # Sentence embeddings
+                    embeddings = out_features[output_value]
+                    embeddings = embeddings.detach()
+                    if normalize_embeddings:
+                        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+
+                    # fixes for #522 and #487 to avoid oom problems on gpu with large datasets
+                    if convert_to_numpy:
+                        embeddings = embeddings.cpu()
+
+                all_embeddings.extend(embeddings)
+
+        all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]
+
+        if precision and precision != &#34;float32&#34;:
+            all_embeddings = quantize_embeddings(all_embeddings, precision=precision)
+
+        if convert_to_tensor:
+            if len(all_embeddings):
+                if isinstance(all_embeddings, np.ndarray):
+                    all_embeddings = torch.from_numpy(all_embeddings)
+                else:
+                    all_embeddings = torch.stack(all_embeddings)
+            else:
+                all_embeddings = torch.Tensor()
+        elif convert_to_numpy:
+            if not isinstance(all_embeddings, np.ndarray):
+                if all_embeddings and all_embeddings[0].dtype == torch.bfloat16:
+                    all_embeddings = np.asarray([emb.float().numpy() for emb in all_embeddings])
+                else:
+                    all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
+        elif isinstance(all_embeddings, np.ndarray):
+            all_embeddings = [torch.from_numpy(embedding) for embedding in all_embeddings]
+
+        if input_was_string:
+            all_embeddings = all_embeddings[0]
+
+        return all_embeddings
+
+    def forward(self, input: dict[str, Tensor], **kwargs) -&gt; dict[str, Tensor]:
+        if self.module_kwargs is None:
+            return super().forward(input)
+
+        for module_name, module in self.named_children():
+            module_kwarg_keys = self.module_kwargs.get(module_name, [])
+            module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
+            input = module(input, **module_kwargs)
+        return input
+
+    @property
+    def similarity_fn_name(self) -&gt; Literal[&#34;cosine&#34;, &#34;dot&#34;, &#34;euclidean&#34;, &#34;manhattan&#34;]:
+        &#34;&#34;&#34;Return the name of the similarity function used by :meth:`SentenceTransformer.similarity` and :meth:`SentenceTransformer.similarity_pairwise`.
+
+        Returns:
+            Optional[str]: The name of the similarity function. Can be None if not set, in which case it will
+                default to &#34;cosine&#34; when first called.
+
+        Example:
+            &gt;&gt;&gt; model = SentenceTransformer(&#34;multi-qa-mpnet-base-dot-v1&#34;)
+            &gt;&gt;&gt; model.similarity_fn_name
+            &#39;dot&#39;
+        &#34;&#34;&#34;
+        if self._similarity_fn_name is None:
+            self.similarity_fn_name = SimilarityFunction.COSINE
+        return self._similarity_fn_name
+
+    @similarity_fn_name.setter
+    def similarity_fn_name(
+        self, value: Literal[&#34;cosine&#34;, &#34;dot&#34;, &#34;euclidean&#34;, &#34;manhattan&#34;] | SimilarityFunction
+    ) -&gt; None:
+        if isinstance(value, SimilarityFunction):
+            value = value.value
+        self._similarity_fn_name = value
+
+        if value is not None:
+            self._similarity = SimilarityFunction.to_similarity_fn(value)
+            self._similarity_pairwise = SimilarityFunction.to_similarity_pairwise_fn(value)
+
+    @overload
+    def similarity(self, embeddings1: Tensor, embeddings2: Tensor) -&gt; Tensor: ...
+
+    @overload
+    def similarity(self, embeddings1: ndarray, embeddings2: ndarray) -&gt; Tensor: ...
+
+    @property
+    def similarity(self) -&gt; Callable[[Tensor | ndarray, Tensor | ndarray], Tensor]:
+        &#34;&#34;&#34;
+        Compute the similarity between two collections of embeddings. The output will be a matrix with the similarity
+        scores between all embeddings from the first parameter and all embeddings from the second parameter. This
+        differs from `similarity_pairwise` which computes the similarity between each pair of embeddings.
+
+        Args:
+            embeddings1 (Union[Tensor, ndarray]): [num_embeddings_1, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+            embeddings2 (Union[Tensor, ndarray]): [num_embeddings_2, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+
+        Returns:
+            Tensor: A [num_embeddings_1, num_embeddings_2]-shaped torch tensor with similarity scores.
+
+        Example:
+            ::
+
+                &gt;&gt;&gt; model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+                &gt;&gt;&gt; sentences = [
+                ...     &#34;The weather is so nice!&#34;,
+                ...     &#34;It&#39;s so sunny outside.&#34;,
+                ...     &#34;He&#39;s driving to the movie theater.&#34;,
+                ...     &#34;She&#39;s going to the cinema.&#34;,
+                ... ]
+                &gt;&gt;&gt; embeddings = model.encode(sentences, normalize_embeddings=True)
+                &gt;&gt;&gt; model.similarity(embeddings, embeddings)
+                tensor([[1.0000, 0.7235, 0.0290, 0.1309],
+                        [0.7235, 1.0000, 0.0613, 0.1129],
+                        [0.0290, 0.0613, 1.0000, 0.5027],
+                        [0.1309, 0.1129, 0.5027, 1.0000]])
+                &gt;&gt;&gt; model.similarity_fn_name
+                &#34;cosine&#34;
+                &gt;&gt;&gt; model.similarity_fn_name = &#34;euclidean&#34;
+                &gt;&gt;&gt; model.similarity(embeddings, embeddings)
+                tensor([[-0.0000, -0.7437, -1.3935, -1.3184],
+                        [-0.7437, -0.0000, -1.3702, -1.3320],
+                        [-1.3935, -1.3702, -0.0000, -0.9973],
+                        [-1.3184, -1.3320, -0.9973, -0.0000]])
+        &#34;&#34;&#34;
+        if self.similarity_fn_name is None:
+            self.similarity_fn_name = SimilarityFunction.COSINE
+        return self._similarity
+
+    @overload
+    def similarity_pairwise(self, embeddings1: Tensor, embeddings2: Tensor) -&gt; Tensor: ...
+
+    @overload
+    def similarity_pairwise(self, embeddings1: ndarray, embeddings2: ndarray) -&gt; Tensor: ...
+
+    @property
+    def similarity_pairwise(self) -&gt; Callable[[Tensor | ndarray, Tensor | ndarray], Tensor]:
+        &#34;&#34;&#34;
+        Compute the similarity between two collections of embeddings. The output will be a vector with the similarity
+        scores between each pair of embeddings.
+
+        Args:
+            embeddings1 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+            embeddings2 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+
+        Returns:
+            Tensor: A [num_embeddings]-shaped torch tensor with pairwise similarity scores.
+
+        Example:
+            ::
+
+                &gt;&gt;&gt; model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+                &gt;&gt;&gt; sentences = [
+                ...     &#34;The weather is so nice!&#34;,
+                ...     &#34;It&#39;s so sunny outside.&#34;,
+                ...     &#34;He&#39;s driving to the movie theater.&#34;,
+                ...     &#34;She&#39;s going to the cinema.&#34;,
+                ... ]
+                &gt;&gt;&gt; embeddings = model.encode(sentences, normalize_embeddings=True)
+                &gt;&gt;&gt; model.similarity_pairwise(embeddings[::2], embeddings[1::2])
+                tensor([0.7235, 0.5027])
+                &gt;&gt;&gt; model.similarity_fn_name
+                &#34;cosine&#34;
+                &gt;&gt;&gt; model.similarity_fn_name = &#34;euclidean&#34;
+                &gt;&gt;&gt; model.similarity_pairwise(embeddings[::2], embeddings[1::2])
+                tensor([-0.7437, -0.9973])
+        &#34;&#34;&#34;
+        if self.similarity_fn_name is None:
+            self.similarity_fn_name = SimilarityFunction.COSINE
+        return self._similarity_pairwise
+
+    def start_multi_process_pool(
+        self, target_devices: list[str] = None
+    ) -&gt; dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any]:
+        &#34;&#34;&#34;
+        Starts a multi-process pool to process the encoding with several independent processes
+        via :meth:`SentenceTransformer.encode_multi_process &lt;sentence_transformers.SentenceTransformer.encode_multi_process&gt;`.
+
+        This method is recommended if you want to encode on multiple GPUs or CPUs. It is advised
+        to start only one process per GPU. This method works together with encode_multi_process
+        and stop_multi_process_pool.
+
+        Args:
+            target_devices (List[str], optional): PyTorch target devices, e.g. [&#34;cuda:0&#34;, &#34;cuda:1&#34;, ...],
+                [&#34;npu:0&#34;, &#34;npu:1&#34;, ...], or [&#34;cpu&#34;, &#34;cpu&#34;, &#34;cpu&#34;, &#34;cpu&#34;]. If target_devices is None and CUDA/NPU
+                is available, then all available CUDA/NPU devices will be used. If target_devices is None and
+                CUDA/NPU is not available, then 4 CPU devices will be used.
+
+        Returns:
+            Dict[str, Any]: A dictionary with the target processes, an input queue, and an output queue.
+        &#34;&#34;&#34;
+        if target_devices is None:
+            if torch.cuda.is_available():
+                target_devices = [f&#34;cuda:{i}&#34; for i in range(torch.cuda.device_count())]
+            elif is_torch_npu_available():
+                target_devices = [f&#34;npu:{i}&#34; for i in range(torch.npu.device_count())]
+            else:
+                logger.info(&#34;CUDA/NPU is not available. Starting 4 CPU workers&#34;)
+                target_devices = [&#34;cpu&#34;] * 4
+
+        logger.info(&#34;Start multi-process pool on devices: {}&#34;.format(&#34;, &#34;.join(map(str, target_devices))))
+
+        self.to(&#34;cpu&#34;)
+        self.share_memory()
+        ctx = mp.get_context(&#34;spawn&#34;)
+        input_queue = ctx.Queue()
+        output_queue = ctx.Queue()
+        processes = []
+
+        for device_id in target_devices:
+            p = ctx.Process(
+                target=SentenceTransformer._encode_multi_process_worker,
+                args=(device_id, self, input_queue, output_queue),
+                daemon=True,
+            )
+            p.start()
+            processes.append(p)
+
+        return {&#34;input&#34;: input_queue, &#34;output&#34;: output_queue, &#34;processes&#34;: processes}
+
+    @staticmethod
+    def stop_multi_process_pool(pool: dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any]) -&gt; None:
+        &#34;&#34;&#34;
+        Stops all processes started with start_multi_process_pool.
+
+        Args:
+            pool (Dict[str, object]): A dictionary containing the input queue, output queue, and process list.
+
+        Returns:
+            None
+        &#34;&#34;&#34;
+        for p in pool[&#34;processes&#34;]:
+            p.terminate()
+
+        for p in pool[&#34;processes&#34;]:
+            p.join()
+            p.close()
+
+        pool[&#34;input&#34;].close()
+        pool[&#34;output&#34;].close()
+
+    def encode_multi_process(
+        self,
+        sentences: list[str],
+        pool: dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any],
+        prompt_name: str | None = None,
+        prompt: str | None = None,
+        batch_size: int = 32,
+        chunk_size: int = None,
+        show_progress_bar: bool | None = None,
+        precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = &#34;float32&#34;,
+        normalize_embeddings: bool = False,
+    ) -&gt; np.ndarray:
+        &#34;&#34;&#34;
+        Encodes a list of sentences using multiple processes and GPUs via
+        :meth:`SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;`.
+        The sentences are chunked into smaller packages and sent to individual processes, which encode them on different
+        GPUs or CPUs. This method is only suitable for encoding large sets of sentences.
+
+        Args:
+            sentences (List[str]): List of sentences to encode.
+            pool (Dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any]): A pool of workers started with
+                :meth:`SentenceTransformer.start_multi_process_pool &lt;sentence_transformers.SentenceTransformer.start_multi_process_pool&gt;`.
+            prompt_name (Optional[str], optional): The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary,
+                which is either set in the constructor or loaded from the model configuration. For example if
+                ``prompt_name`` is &#34;query&#34; and the ``prompts`` is {&#34;query&#34;: &#34;query: &#34;, ...}, then the sentence &#34;What
+                is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34; because the sentence
+                is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
+            prompt (Optional[str], optional): The prompt to use for encoding. For example, if the prompt is &#34;query: &#34;, then the
+                sentence &#34;What is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34;
+                because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored. Defaults to None.
+            batch_size (int): Encode sentences with batch size. (default: 32)
+            chunk_size (int): Sentences are chunked and sent to the individual processes. If None, it determines a
+                sensible size. Defaults to None.
+            show_progress_bar (bool, optional): Whether to output a progress bar when encode sentences. Defaults to None.
+            precision (Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;]): The precision to use for the
+                embeddings. Can be &#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, or &#34;ubinary&#34;. All non-float32 precisions
+                are quantized embeddings. Quantized embeddings are smaller in size and faster to compute, but may
+                have lower accuracy. They are useful for reducing the size of the embeddings of a corpus for
+                semantic search, among other tasks. Defaults to &#34;float32&#34;.
+            normalize_embeddings (bool): Whether to normalize returned vectors to have length 1. In that case,
+                the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.
+
+        Returns:
+            np.ndarray: A 2D numpy array with shape [num_inputs, output_dimension].
+
+        Example:
+            ::
+
+                from sentence_transformers import SentenceTransformer
+
+                def main():
+                    model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+                    sentences = [&#34;The weather is so nice!&#34;, &#34;It&#39;s so sunny outside.&#34;, &#34;He&#39;s driving to the movie theater.&#34;, &#34;She&#39;s going to the cinema.&#34;] * 1000
+
+                    pool = model.start_multi_process_pool()
+                    embeddings = model.encode_multi_process(sentences, pool)
+                    model.stop_multi_process_pool(pool)
+
+                    print(embeddings.shape)
+                    # =&gt; (4000, 768)
+
+                if __name__ == &#34;__main__&#34;:
+                    main()
+        &#34;&#34;&#34;
+
+        if chunk_size is None:
+            chunk_size = min(math.ceil(len(sentences) / len(pool[&#34;processes&#34;]) / 10), 5000)
+
+        if show_progress_bar is None:
+            show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)
+
+        logger.debug(f&#34;Chunk data into {math.ceil(len(sentences) / chunk_size)} packages of size {chunk_size}&#34;)
+
+        input_queue = pool[&#34;input&#34;]
+        last_chunk_id = 0
+        chunk = []
+
+        for sentence in sentences:
+            chunk.append(sentence)
+            if len(chunk) &gt;= chunk_size:
+                input_queue.put(
+                    [last_chunk_id, batch_size, chunk, prompt_name, prompt, precision, normalize_embeddings]
+                )
+                last_chunk_id += 1
+                chunk = []
+
+        if len(chunk) &gt; 0:
+            input_queue.put([last_chunk_id, batch_size, chunk, prompt_name, prompt, precision, normalize_embeddings])
+            last_chunk_id += 1
+
+        output_queue = pool[&#34;output&#34;]
+        results_list = sorted(
+            [output_queue.get() for _ in trange(last_chunk_id, desc=&#34;Chunks&#34;, disable=not show_progress_bar)],
+            key=lambda x: x[0],
+        )
+        embeddings = np.concatenate([result[1] for result in results_list])
+        return embeddings
+
+    @staticmethod
+    def _encode_multi_process_worker(
+        target_device: str, model: SentenceTransformer, input_queue: Queue, results_queue: Queue
+    ) -&gt; None:
+        &#34;&#34;&#34;
+        Internal working process to encode sentences in multi-process setup
+        &#34;&#34;&#34;
+        while True:
+            try:
+                chunk_id, batch_size, sentences, prompt_name, prompt, precision, normalize_embeddings = (
+                    input_queue.get()
+                )
+                embeddings = model.encode(
+                    sentences,
+                    prompt_name=prompt_name,
+                    prompt=prompt,
+                    device=target_device,
+                    show_progress_bar=False,
+                    precision=precision,
+                    convert_to_numpy=True,
+                    batch_size=batch_size,
+                    normalize_embeddings=normalize_embeddings,
+                )
+
+                results_queue.put([chunk_id, embeddings])
+            except queue.Empty:
+                break
+
+    def set_pooling_include_prompt(self, include_prompt: bool) -&gt; None:
+        &#34;&#34;&#34;
+        Sets the `include_prompt` attribute in the pooling layer in the model, if there is one.
+
+        This is useful for INSTRUCTOR models, as the prompt should be excluded from the pooling strategy
+        for these models.
+
+        Args:
+            include_prompt (bool): Whether to include the prompt in the pooling layer.
+
+        Returns:
+            None
+        &#34;&#34;&#34;
+        for module in self:
+            if isinstance(module, Pooling):
+                module.include_prompt = include_prompt
+                break
+
+    def get_max_seq_length(self) -&gt; int | None:
+        &#34;&#34;&#34;
+        Returns the maximal sequence length that the model accepts. Longer inputs will be truncated.
+
+        Returns:
+            Optional[int]: The maximal sequence length that the model accepts, or None if it is not defined.
+        &#34;&#34;&#34;
+        if hasattr(self._first_module(), &#34;max_seq_length&#34;):
+            return self._first_module().max_seq_length
+
+        return None
+
+    def tokenize(self, texts: list[str] | list[dict] | list[tuple[str, str]]) -&gt; dict[str, Tensor]:
+        &#34;&#34;&#34;
+        Tokenizes the texts.
+
+        Args:
+            texts (Union[List[str], List[Dict], List[Tuple[str, str]]]): A list of texts to be tokenized.
+
+        Returns:
+            Dict[str, Tensor]: A dictionary of tensors with the tokenized texts. Common keys are &#34;input_ids&#34;,
+                &#34;attention_mask&#34;, and &#34;token_type_ids&#34;.
+        &#34;&#34;&#34;
+        return self._first_module().tokenize(texts)
+
+    def get_sentence_features(self, *features) -&gt; dict[Literal[&#34;sentence_embedding&#34;], Tensor]:
+        return self._first_module().get_sentence_features(*features)
+
+    def get_sentence_embedding_dimension(self) -&gt; int | None:
+        &#34;&#34;&#34;
+        Returns the number of dimensions in the output of :meth:`SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;`.
+
+        Returns:
+            Optional[int]: The number of dimensions in the output of `encode`. If it&#39;s not known, it&#39;s `None`.
+        &#34;&#34;&#34;
+        output_dim = None
+        for mod in reversed(self._modules.values()):
+            sent_embedding_dim_method = getattr(mod, &#34;get_sentence_embedding_dimension&#34;, None)
+            if callable(sent_embedding_dim_method):
+                output_dim = sent_embedding_dim_method()
+                break
+        if self.truncate_dim is not None:
+            # The user requested truncation. If they set it to a dim greater than output_dim,
+            # no truncation will actually happen. So return output_dim instead of self.truncate_dim
+            return min(output_dim or np.inf, self.truncate_dim)
+        return output_dim
+
+    @contextmanager
+    def truncate_sentence_embeddings(self, truncate_dim: int | None) -&gt; Iterator[None]:
+        &#34;&#34;&#34;
+        In this context, :meth:`SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;` outputs
+        sentence embeddings truncated at dimension ``truncate_dim``.
+
+        This may be useful when you are using the same model for different applications where different dimensions
+        are needed.
+
+        Args:
+            truncate_dim (int, optional): The dimension to truncate sentence embeddings to. ``None`` does no truncation.
+
+        Example:
+            ::
+
+                from sentence_transformers import SentenceTransformer
+
+                model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+
+                with model.truncate_sentence_embeddings(truncate_dim=16):
+                    embeddings_truncated = model.encode([&#34;hello there&#34;, &#34;hiya&#34;])
+                assert embeddings_truncated.shape[-1] == 16
+        &#34;&#34;&#34;
+        original_output_dim = self.truncate_dim
+        try:
+            self.truncate_dim = truncate_dim
+            yield
+        finally:
+            self.truncate_dim = original_output_dim
+
+    def _first_module(self) -&gt; torch.nn.Module:
+        &#34;&#34;&#34;Returns the first module of this sequential embedder&#34;&#34;&#34;
+        return self._modules[next(iter(self._modules))]
+
+    def _last_module(self) -&gt; torch.nn.Module:
+        &#34;&#34;&#34;Returns the last module of this sequential embedder&#34;&#34;&#34;
+        return self._modules[next(reversed(self._modules))]
+
+    def save(
+        self,
+        path: str,
+        model_name: str | None = None,
+        create_model_card: bool = True,
+        train_datasets: list[str] | None = None,
+        safe_serialization: bool = True,
+    ) -&gt; None:
+        &#34;&#34;&#34;
+        Saves a model and its configuration files to a directory, so that it can be loaded
+        with ``SentenceTransformer(path)`` again.
+
+        Args:
+            path (str): Path on disc where the model will be saved.
+            model_name (str, optional): Optional model name.
+            create_model_card (bool, optional): If True, create a README.md with basic information about this model.
+            train_datasets (List[str], optional): Optional list with the names of the datasets used to train the model.
+            safe_serialization (bool, optional): If True, save the model using safetensors. If False, save the model
+                the traditional (but unsafe) PyTorch way.
+        &#34;&#34;&#34;
+        if path is None:
+            return
+
+        os.makedirs(path, exist_ok=True)
+
+        logger.info(f&#34;Save model to {path}&#34;)
+        modules_config = []
+
+        # Save some model info
+        self._model_config[&#34;__version__&#34;] = {
+            &#34;sentence_transformers&#34;: __version__,
+            &#34;transformers&#34;: transformers.__version__,
+            &#34;pytorch&#34;: torch.__version__,
+        }
+
+        with open(os.path.join(path, &#34;config_sentence_transformers.json&#34;), &#34;w&#34;) as fOut:
+            config = self._model_config.copy()
+            config[&#34;prompts&#34;] = self.prompts
+            config[&#34;default_prompt_name&#34;] = self.default_prompt_name
+            config[&#34;similarity_fn_name&#34;] = self.similarity_fn_name
+            json.dump(config, fOut, indent=2)
+
+        # Save modules
+        for idx, name in enumerate(self._modules):
+            module = self._modules[name]
+            if idx == 0 and hasattr(module, &#34;save_in_root&#34;):  # Save first module in the main folder
+                model_path = path + &#34;/&#34;
+            else:
+                model_path = os.path.join(path, str(idx) + &#34;_&#34; + type(module).__name__)
+
+            os.makedirs(model_path, exist_ok=True)
+            # Try to save with safetensors, but fall back to the traditional PyTorch way if the module doesn&#39;t support it
+            try:
+                module.save(model_path, safe_serialization=safe_serialization)
+            except TypeError:
+                module.save(model_path)
+
+            # &#34;module&#34; only works for Sentence Transformers as the modules have the same names as the classes
+            class_ref = type(module).__module__
+            # For remote modules, we want to remove &#34;transformers_modules.{repo_name}&#34;:
+            if class_ref.startswith(&#34;transformers_modules.&#34;):
+                class_file = sys.modules[class_ref].__file__
+
+                # Save the custom module file
+                dest_file = Path(model_path) / (Path(class_file).name)
+                shutil.copy(class_file, dest_file)
+
+                # Save all files importeed in the custom module file
+                for needed_file in get_relative_import_files(class_file):
+                    dest_file = Path(model_path) / (Path(needed_file).name)
+                    shutil.copy(needed_file, dest_file)
+
+                # For remote modules, we want to ignore the &#34;transformers_modules.{repo_id}&#34; part,
+                # i.e. we only want the filename
+                class_ref = f&#34;{class_ref.split(&#39;.&#39;)[-1]}.{type(module).__name__}&#34;
+            # For other cases, we want to add the class name:
+            elif not class_ref.startswith(&#34;sentence_transformers.&#34;):
+                class_ref = f&#34;{class_ref}.{type(module).__name__}&#34;
+            modules_config.append({&#34;idx&#34;: idx, &#34;name&#34;: name, &#34;path&#34;: os.path.basename(model_path), &#34;type&#34;: class_ref})
+
+        with open(os.path.join(path, &#34;modules.json&#34;), &#34;w&#34;) as fOut:
+            json.dump(modules_config, fOut, indent=2)
+
+        # Create model card
+        if create_model_card:
+            self._create_model_card(path, model_name, train_datasets)
+
+    def save_pretrained(
+        self,
+        path: str,
+        model_name: str | None = None,
+        create_model_card: bool = True,
+        train_datasets: list[str] | None = None,
+        safe_serialization: bool = True,
+    ) -&gt; None:
+        &#34;&#34;&#34;
+        Saves a model and its configuration files to a directory, so that it can be loaded
+        with ``SentenceTransformer(path)`` again.
+
+        Args:
+            path (str): Path on disc where the model will be saved.
+            model_name (str, optional): Optional model name.
+            create_model_card (bool, optional): If True, create a README.md with basic information about this model.
+            train_datasets (List[str], optional): Optional list with the names of the datasets used to train the model.
+            safe_serialization (bool, optional): If True, save the model using safetensors. If False, save the model
+                the traditional (but unsafe) PyTorch way.
+        &#34;&#34;&#34;
+        self.save(
+            path,
+            model_name=model_name,
+            create_model_card=create_model_card,
+            train_datasets=train_datasets,
+            safe_serialization=safe_serialization,
+        )
+
+    def _create_model_card(
+        self, path: str, model_name: str | None = None, train_datasets: list[str] | None = &#34;deprecated&#34;
+    ) -&gt; None:
+        &#34;&#34;&#34;
+        Create an automatic model and stores it in the specified path. If no training was done and the loaded model
+        was a Sentence Transformer model already, then its model card is reused.
+
+        Args:
+            path (str): The path where the model card will be stored.
+            model_name (Optional[str], optional): The name of the model. Defaults to None.
+            train_datasets (Optional[List[str]], optional): Deprecated argument. Defaults to &#34;deprecated&#34;.
+
+        Returns:
+            None
+        &#34;&#34;&#34;
+        if model_name:
+            model_path = Path(model_name)
+            if not model_path.exists() and not self.model_card_data.model_id:
+                self.model_card_data.model_id = model_name
+
+        # If we loaded a Sentence Transformer model from the Hub, and no training was done, then
+        # we don&#39;t generate a new model card, but reuse the old one instead.
+        if self._model_card_text and self.model_card_data.trainer is None:
+            model_card = self._model_card_text
+            if self.model_card_data.model_id:
+                # If the original model card was saved without a model_id, we replace the model_id with the new model_id
+                model_card = model_card.replace(
+                    &#39;model = SentenceTransformer(&#34;sentence_transformers_model_id&#34;&#39;,
+                    f&#39;model = SentenceTransformer(&#34;{self.model_card_data.model_id}&#34;&#39;,
+                )
+        else:
+            try:
+                model_card = generate_model_card(self)
+            except Exception:
+                logger.error(
+                    f&#34;Error while generating model card:\n{traceback.format_exc()}&#34;
+                    &#34;Consider opening an issue on https://github.com/UKPLab/sentence-transformers/issues with this traceback.\n&#34;
+                    &#34;Skipping model card creation.&#34;
+                )
+                return
+
+        with open(os.path.join(path, &#34;README.md&#34;), &#34;w&#34;, encoding=&#34;utf8&#34;) as fOut:
+            fOut.write(model_card)
+
+    @save_to_hub_args_decorator
+    def save_to_hub(
+        self,
+        repo_id: str,
+        organization: str | None = None,
+        token: str | None = None,
+        private: bool | None = None,
+        safe_serialization: bool = True,
+        commit_message: str = &#34;Add new SentenceTransformer model.&#34;,
+        local_model_path: str | None = None,
+        exist_ok: bool = False,
+        replace_model_card: bool = False,
+        train_datasets: list[str] | None = None,
+    ) -&gt; str:
+        &#34;&#34;&#34;
+        DEPRECATED, use `push_to_hub` instead.
+
+        Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.
+
+        Args:
+            repo_id (str): Repository name for your model in the Hub, including the user or organization.
+            token (str, optional): An authentication token (See https://huggingface.co/settings/token)
+            private (bool, optional): Set to true, for hosting a private model
+            safe_serialization (bool, optional): If true, save the model using safetensors. If false, save the model the traditional PyTorch way
+            commit_message (str, optional): Message to commit while pushing.
+            local_model_path (str, optional): Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded
+            exist_ok (bool, optional): If true, saving to an existing repository is OK. If false, saving only to a new repository is possible
+            replace_model_card (bool, optional): If true, replace an existing model card in the hub with the automatically created model card
+            train_datasets (List[str], optional): Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.
+
+        Returns:
+            str: The url of the commit of your model in the repository on the Hugging Face Hub.
+        &#34;&#34;&#34;
+        logger.warning(
+            &#34;The `save_to_hub` method is deprecated and will be removed in a future version of SentenceTransformers.&#34;
+            &#34; Please use `push_to_hub` instead for future model uploads.&#34;
+        )
+
+        if organization:
+            if &#34;/&#34; not in repo_id:
+                logger.warning(
+                    f&#39;Providing an `organization` to `save_to_hub` is deprecated, please use `repo_id=&#34;{organization}/{repo_id}&#34;` instead.&#39;
+                )
+                repo_id = f&#34;{organization}/{repo_id}&#34;
+            elif repo_id.split(&#34;/&#34;)[0] != organization:
+                raise ValueError(
+                    &#34;Providing an `organization` to `save_to_hub` is deprecated, please only use `repo_id`.&#34;
+                )
+            else:
+                logger.warning(
+                    f&#39;Providing an `organization` to `save_to_hub` is deprecated, please only use `repo_id=&#34;{repo_id}&#34;` instead.&#39;
+                )
+
+        return self.push_to_hub(
+            repo_id=repo_id,
+            token=token,
+            private=private,
+            safe_serialization=safe_serialization,
+            commit_message=commit_message,
+            local_model_path=local_model_path,
+            exist_ok=exist_ok,
+            replace_model_card=replace_model_card,
+            train_datasets=train_datasets,
+        )
+
+    def push_to_hub(
+        self,
+        repo_id: str,
+        token: str | None = None,
+        private: bool | None = None,
+        safe_serialization: bool = True,
+        commit_message: str | None = None,
+        local_model_path: str | None = None,
+        exist_ok: bool = False,
+        replace_model_card: bool = False,
+        train_datasets: list[str] | None = None,
+        revision: str | None = None,
+        create_pr: bool = False,
+    ) -&gt; str:
+        &#34;&#34;&#34;
+        Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.
+
+        Args:
+            repo_id (str): Repository name for your model in the Hub, including the user or organization.
+            token (str, optional): An authentication token (See https://huggingface.co/settings/token)
+            private (bool, optional): Set to true, for hosting a private model
+            safe_serialization (bool, optional): If true, save the model using safetensors. If false, save the model the traditional PyTorch way
+            commit_message (str, optional): Message to commit while pushing.
+            local_model_path (str, optional): Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded
+            exist_ok (bool, optional): If true, saving to an existing repository is OK. If false, saving only to a new repository is possible
+            replace_model_card (bool, optional): If true, replace an existing model card in the hub with the automatically created model card
+            train_datasets (List[str], optional): Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.
+            revision (str, optional): Branch to push the uploaded files to
+            create_pr (bool, optional): If True, create a pull request instead of pushing directly to the main branch
+
+        Returns:
+            str: The url of the commit of your model in the repository on the Hugging Face Hub.
+        &#34;&#34;&#34;
+        api = HfApi(token=token)
+        repo_url = api.create_repo(
+            repo_id=repo_id,
+            private=private,
+            repo_type=None,
+            exist_ok=exist_ok or create_pr,
+        )
+        repo_id = repo_url.repo_id  # Update the repo_id in case the old repo_id didn&#39;t contain a user or organization
+        self.model_card_data.set_model_id(repo_id)
+        if revision is not None:
+            api.create_branch(repo_id=repo_id, branch=revision, exist_ok=True)
+
+        if commit_message is None:
+            backend = self.get_backend()
+            if backend == &#34;torch&#34;:
+                commit_message = &#34;Add new SentenceTransformer model&#34;
+            else:
+                commit_message = f&#34;Add new SentenceTransformer model with an {backend} backend&#34;
+
+        commit_description = &#34;&#34;
+        if create_pr:
+            commit_description = f&#34;&#34;&#34;\
+Hello!
+
+*This pull request has been automatically generated from the [`push_to_hub`](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html#sentence_transformers.SentenceTransformer.push_to_hub) method from the Sentence Transformers library.*
+
+## Full Model Architecture:
+```
+{self}
+```
+
+## Tip:
+Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
+```python
+from sentence_transformers import SentenceTransformer
+
+# TODO: Fill in the PR number
+pr_number = 2
+model = SentenceTransformer(
+    &#34;{repo_id}&#34;,
+    revision=f&#34;refs/pr/{{pr_number}}&#34;,
+    backend=&#34;{self.get_backend()}&#34;,
+)
+
+# Verify that everything works as expected
+embeddings = model.encode([&#34;The weather is lovely today.&#34;, &#34;It&#39;s so sunny outside!&#34;, &#34;He drove to the stadium.&#34;])
+print(embeddings.shape)
+
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+```
+&#34;&#34;&#34;
+
+        if local_model_path:
+            folder_url = api.upload_folder(
+                repo_id=repo_id,
+                folder_path=local_model_path,
+                commit_message=commit_message,
+                commit_description=commit_description,
+                revision=revision,
+                create_pr=create_pr,
+            )
+        else:
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                create_model_card = replace_model_card or not os.path.exists(os.path.join(tmp_dir, &#34;README.md&#34;))
+                self.save_pretrained(
+                    tmp_dir,
+                    model_name=repo_url.repo_id,
+                    create_model_card=create_model_card,
+                    train_datasets=train_datasets,
+                    safe_serialization=safe_serialization,
+                )
+                folder_url = api.upload_folder(
+                    repo_id=repo_id,
+                    folder_path=tmp_dir,
+                    commit_message=commit_message,
+                    commit_description=commit_description,
+                    revision=revision,
+                    create_pr=create_pr,
+                )
+
+        if create_pr:
+            return folder_url.pr_url
+        return folder_url.commit_url
+
+    def _text_length(self, text: list[int] | list[list[int]]) -&gt; int:
+        &#34;&#34;&#34;
+        Help function to get the length for the input text. Text can be either
+        a list of ints (which means a single text as input), or a tuple of list of ints
+        (representing several text inputs to the model).
+        &#34;&#34;&#34;
+
+        if isinstance(text, dict):  # {key: value} case
+            return len(next(iter(text.values())))
+        elif not hasattr(text, &#34;__len__&#34;):  # Object has no len() method
+            return 1
+        elif len(text) == 0 or isinstance(text[0], int):  # Empty string or list of ints
+            return len(text)
+        else:
+            return sum([len(t) for t in text])  # Sum of length of individual strings
+
+    def evaluate(self, evaluator: SentenceEvaluator, output_path: str = None) -&gt; dict[str, float] | float:
+        &#34;&#34;&#34;
+        Evaluate the model based on an evaluator
+
+        Args:
+            evaluator (SentenceEvaluator): The evaluator used to evaluate the model.
+            output_path (str, optional): The path where the evaluator can write the results. Defaults to None.
+
+        Returns:
+            The evaluation results.
+        &#34;&#34;&#34;
+        if output_path is not None:
+            os.makedirs(output_path, exist_ok=True)
+        return evaluator(self, output_path)
+
+    def _load_auto_model(
+        self,
+        model_name_or_path: str,
+        token: bool | str | None,
+        cache_folder: str | None,
+        revision: str | None = None,
+        trust_remote_code: bool = False,
+        local_files_only: bool = False,
+        model_kwargs: dict[str, Any] | None = None,
+        tokenizer_kwargs: dict[str, Any] | None = None,
+        config_kwargs: dict[str, Any] | None = None,
+    ) -&gt; list[nn.Module]:
+        &#34;&#34;&#34;
+        Creates a simple Transformer + Mean Pooling model and returns the modules
+
+        Args:
+            model_name_or_path (str): The name or path of the pre-trained model.
+            token (Optional[Union[bool, str]]): The token to use for the model.
+            cache_folder (Optional[str]): The folder to cache the model.
+            revision (Optional[str], optional): The revision of the model. Defaults to None.
+            trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
+            local_files_only (bool, optional): Whether to use only local files. Defaults to False.
+            model_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the model. Defaults to None.
+            tokenizer_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the tokenizer. Defaults to None.
+            config_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the config. Defaults to None.
+
+        Returns:
+            List[nn.Module]: A list containing the transformer model and the pooling model.
+        &#34;&#34;&#34;
+        logger.warning(
+            f&#34;No sentence-transformers model found with name {model_name_or_path}. Creating a new one with mean pooling.&#34;
+        )
+
+        shared_kwargs = {
+            &#34;token&#34;: token,
+            &#34;trust_remote_code&#34;: trust_remote_code,
+            &#34;revision&#34;: revision,
+            &#34;local_files_only&#34;: local_files_only,
+        }
+        model_kwargs = shared_kwargs if model_kwargs is None else {**shared_kwargs, **model_kwargs}
+        tokenizer_kwargs = shared_kwargs if tokenizer_kwargs is None else {**shared_kwargs, **tokenizer_kwargs}
+        config_kwargs = shared_kwargs if config_kwargs is None else {**shared_kwargs, **config_kwargs}
+
+        transformer_model = Transformer(
+            model_name_or_path,
+            cache_dir=cache_folder,
+            model_args=model_kwargs,
+            tokenizer_args=tokenizer_kwargs,
+            config_args=config_kwargs,
+            backend=self.backend,
+        )
+        pooling_model = Pooling(transformer_model.get_word_embedding_dimension(), &#34;mean&#34;)
+        self.model_card_data.set_base_model(model_name_or_path, revision=revision)
+        return [transformer_model, pooling_model]
+
+    def _load_module_class_from_ref(
+        self,
+        class_ref: str,
+        model_name_or_path: str,
+        trust_remote_code: bool,
+        revision: str | None,
+        model_kwargs: dict[str, Any] | None,
+    ) -&gt; nn.Module:
+        # If the class is from sentence_transformers, we can directly import it,
+        # otherwise, we try to import it dynamically, and if that fails, we fall back to the default import
+        if class_ref.startswith(&#34;sentence_transformers.&#34;):
+            return import_from_string(class_ref)
+
+        if trust_remote_code:
+            code_revision = model_kwargs.pop(&#34;code_revision&#34;, None) if model_kwargs else None
+            try:
+                return get_class_from_dynamic_module(
+                    class_ref,
+                    model_name_or_path,
+                    revision=revision,
+                    code_revision=code_revision,
+                )
+            except OSError:
+                # Ignore the error if the file does not exist, and fall back to the default import
+                pass
+
+        return import_from_string(class_ref)
+
+    def _load_sbert_model(
+        self,
+        model_name_or_path: str,
+        token: bool | str | None,
+        cache_folder: str | None,
+        revision: str | None = None,
+        trust_remote_code: bool = False,
+        local_files_only: bool = False,
+        model_kwargs: dict[str, Any] | None = None,
+        tokenizer_kwargs: dict[str, Any] | None = None,
+        config_kwargs: dict[str, Any] | None = None,
+    ) -&gt; dict[str, nn.Module]:
+        &#34;&#34;&#34;
+        Loads a full SentenceTransformer model using the modules.json file.
+
+        Args:
+            model_name_or_path (str): The name or path of the pre-trained model.
+            token (Optional[Union[bool, str]]): The token to use for the model.
+            cache_folder (Optional[str]): The folder to cache the model.
+            revision (Optional[str], optional): The revision of the model. Defaults to None.
+            trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
+            local_files_only (bool, optional): Whether to use only local files. Defaults to False.
+            model_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the model. Defaults to None.
+            tokenizer_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the tokenizer. Defaults to None.
+            config_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the config. Defaults to None.
+
+        Returns:
+            OrderedDict[str, nn.Module]: An ordered dictionary containing the modules of the model.
+        &#34;&#34;&#34;
+        # Check if the config_sentence_transformers.json file exists (exists since v2 of the framework)
+        config_sentence_transformers_json_path = load_file_path(
+            model_name_or_path,
+            &#34;config_sentence_transformers.json&#34;,
+            token=token,
+            cache_folder=cache_folder,
+            revision=revision,
+            local_files_only=local_files_only,
+        )
+        if config_sentence_transformers_json_path is not None:
+            with open(config_sentence_transformers_json_path) as fIn:
+                self._model_config = json.load(fIn)
+
+            if (
+                &#34;__version__&#34; in self._model_config
+                and &#34;sentence_transformers&#34; in self._model_config[&#34;__version__&#34;]
+                and self._model_config[&#34;__version__&#34;][&#34;sentence_transformers&#34;] &gt; __version__
+            ):
+                logger.warning(
+                    &#34;You try to use a model that was created with version {}, however, your version is {}. This might cause unexpected behavior or errors. In that case, try to update to the latest version.\n\n\n&#34;.format(
+                        self._model_config[&#34;__version__&#34;][&#34;sentence_transformers&#34;], __version__
+                    )
+                )
+
+            # Set score functions &amp; prompts if not already overridden by the __init__ calls
+            if self._similarity_fn_name is None:
+                self.similarity_fn_name = self._model_config.get(&#34;similarity_fn_name&#34;, None)
+            if not self.prompts:
+                self.prompts = self._model_config.get(&#34;prompts&#34;, {})
+            if not self.default_prompt_name:
+                self.default_prompt_name = self._model_config.get(&#34;default_prompt_name&#34;, None)
+
+        # Check if a readme exists
+        model_card_path = load_file_path(
+            model_name_or_path,
+            &#34;README.md&#34;,
+            token=token,
+            cache_folder=cache_folder,
+            revision=revision,
+            local_files_only=local_files_only,
+        )
+        if model_card_path is not None:
+            try:
+                with open(model_card_path, encoding=&#34;utf8&#34;) as fIn:
+                    self._model_card_text = fIn.read()
+            except Exception:
+                pass
+
+        # Load the modules of sentence transformer
+        modules_json_path = load_file_path(
+            model_name_or_path,
+            &#34;modules.json&#34;,
+            token=token,
+            cache_folder=cache_folder,
+            revision=revision,
+            local_files_only=local_files_only,
+        )
+        with open(modules_json_path) as fIn:
+            modules_config = json.load(fIn)
+
+        modules = OrderedDict()
+        module_kwargs = OrderedDict()
+        for module_config in modules_config:
+            class_ref = module_config[&#34;type&#34;]
+            module_class = self._load_module_class_from_ref(
+                class_ref, model_name_or_path, trust_remote_code, revision, model_kwargs
+            )
+
+            # For Transformer, don&#39;t load the full directory, rely on `transformers` instead
+            # But, do load the config file first.
+            if module_config[&#34;path&#34;] == &#34;&#34;:
+                kwargs = {}
+                for config_name in [
+                    &#34;sentence_bert_config.json&#34;,
+                    &#34;sentence_roberta_config.json&#34;,
+                    &#34;sentence_distilbert_config.json&#34;,
+                    &#34;sentence_camembert_config.json&#34;,
+                    &#34;sentence_albert_config.json&#34;,
+                    &#34;sentence_xlm-roberta_config.json&#34;,
+                    &#34;sentence_xlnet_config.json&#34;,
+                ]:
+                    config_path = load_file_path(
+                        model_name_or_path,
+                        config_name,
+                        token=token,
+                        cache_folder=cache_folder,
+                        revision=revision,
+                        local_files_only=local_files_only,
+                    )
+                    if config_path is not None:
+                        with open(config_path) as fIn:
+                            kwargs = json.load(fIn)
+                            # Don&#39;t allow configs to set trust_remote_code
+                            if &#34;model_args&#34; in kwargs and &#34;trust_remote_code&#34; in kwargs[&#34;model_args&#34;]:
+                                kwargs[&#34;model_args&#34;].pop(&#34;trust_remote_code&#34;)
+                            if &#34;tokenizer_args&#34; in kwargs and &#34;trust_remote_code&#34; in kwargs[&#34;tokenizer_args&#34;]:
+                                kwargs[&#34;tokenizer_args&#34;].pop(&#34;trust_remote_code&#34;)
+                            if &#34;config_args&#34; in kwargs and &#34;trust_remote_code&#34; in kwargs[&#34;config_args&#34;]:
+                                kwargs[&#34;config_args&#34;].pop(&#34;trust_remote_code&#34;)
+                        break
+
+                hub_kwargs = {
+                    &#34;token&#34;: token,
+                    &#34;trust_remote_code&#34;: trust_remote_code,
+                    &#34;revision&#34;: revision,
+                    &#34;local_files_only&#34;: local_files_only,
+                }
+                # 3rd priority: config file
+                if &#34;model_args&#34; not in kwargs:
+                    kwargs[&#34;model_args&#34;] = {}
+                if &#34;tokenizer_args&#34; not in kwargs:
+                    kwargs[&#34;tokenizer_args&#34;] = {}
+                if &#34;config_args&#34; not in kwargs:
+                    kwargs[&#34;config_args&#34;] = {}
+
+                # 2nd priority: hub_kwargs
+                kwargs[&#34;model_args&#34;].update(hub_kwargs)
+                kwargs[&#34;tokenizer_args&#34;].update(hub_kwargs)
+                kwargs[&#34;config_args&#34;].update(hub_kwargs)
+
+                # 1st priority: kwargs passed to SentenceTransformer
+                if model_kwargs:
+                    kwargs[&#34;model_args&#34;].update(model_kwargs)
+                if tokenizer_kwargs:
+                    kwargs[&#34;tokenizer_args&#34;].update(tokenizer_kwargs)
+                if config_kwargs:
+                    kwargs[&#34;config_args&#34;].update(config_kwargs)
+
+                # Try to initialize the module with a lot of kwargs, but only if the module supports them
+                # Otherwise we fall back to the load method
+                try:
+                    module = module_class(model_name_or_path, cache_dir=cache_folder, backend=self.backend, **kwargs)
+                except TypeError:
+                    module = module_class.load(model_name_or_path)
+            else:
+                # Normalize does not require any files to be loaded
+                if module_class == Normalize:
+                    module_path = None
+                else:
+                    module_path = load_dir_path(
+                        model_name_or_path,
+                        module_config[&#34;path&#34;],
+                        token=token,
+                        cache_folder=cache_folder,
+                        revision=revision,
+                        local_files_only=local_files_only,
+                    )
+                module = module_class.load(module_path)
+
+            modules[module_config[&#34;name&#34;]] = module
+            module_kwargs[module_config[&#34;name&#34;]] = module_config.get(&#34;kwargs&#34;, [])
+
+        if revision is None:
+            path_parts = Path(modules_json_path)
+            if len(path_parts.parts) &gt;= 2:
+                revision_path_part = Path(modules_json_path).parts[-2]
+                if len(revision_path_part) == 40:
+                    revision = revision_path_part
+        self.model_card_data.set_base_model(model_name_or_path, revision=revision)
+        return modules, module_kwargs
+
+    @staticmethod
+    def load(input_path) -&gt; SentenceTransformer:
+        return SentenceTransformer(input_path)
+
+    @property
+    def device(self) -&gt; device:
+        &#34;&#34;&#34;
+        Get torch.device from module, assuming that the whole module has one device.
+        In case there are no PyTorch parameters, fall back to CPU.
+        &#34;&#34;&#34;
+        if isinstance(self[0], Transformer):
+            return self[0].auto_model.device
+
+        try:
+            return next(self.parameters()).device
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+
+            def find_tensor_attributes(module: nn.Module) -&gt; list[tuple[str, Tensor]]:
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            try:
+                first_tuple = next(gen)
+                return first_tuple[1].device
+            except StopIteration:
+                return torch.device(&#34;cpu&#34;)
+
+    @property
+    def tokenizer(self) -&gt; Any:
+        &#34;&#34;&#34;
+        Property to get the tokenizer that is used by this model
+        &#34;&#34;&#34;
+        return self._first_module().tokenizer
+
+    @tokenizer.setter
+    def tokenizer(self, value) -&gt; None:
+        &#34;&#34;&#34;
+        Property to set the tokenizer that should be used by this model
+        &#34;&#34;&#34;
+        self._first_module().tokenizer = value
+
+    @property
+    def max_seq_length(self) -&gt; int:
+        &#34;&#34;&#34;
+        Returns the maximal input sequence length for the model. Longer inputs will be truncated.
+
+        Returns:
+            int: The maximal input sequence length.
+
+        Example:
+            ::
+
+                from sentence_transformers import SentenceTransformer
+
+                model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+                print(model.max_seq_length)
+                # =&gt; 384
+        &#34;&#34;&#34;
+        return self._first_module().max_seq_length
+
+    @max_seq_length.setter
+    def max_seq_length(self, value) -&gt; None:
+        &#34;&#34;&#34;
+        Property to set the maximal input sequence length for the model. Longer inputs will be truncated.
+        &#34;&#34;&#34;
+        self._first_module().max_seq_length = value
+
+    @property
+    def _target_device(self) -&gt; torch.device:
+        logger.warning(
+            &#34;`SentenceTransformer._target_device` has been deprecated, please use `SentenceTransformer.device` instead.&#34;,
+        )
+        return self.device
+
+    @_target_device.setter
+    def _target_device(self, device: int | str | torch.device | None = None) -&gt; None:
+        self.to(device)
+
+    @property
+    def _no_split_modules(self) -&gt; list[str]:
+        try:
+            return self._first_module()._no_split_modules
+        except AttributeError:
+            return []
+
+    @property
+    def _keys_to_ignore_on_save(self) -&gt; list[str]:
+        try:
+            return self._first_module()._keys_to_ignore_on_save
+        except AttributeError:
+            return []
+
+    def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None) -&gt; None:
+        # Propagate the gradient checkpointing to the transformer model
+        for module in self:
+            if isinstance(module, Transformer):
+                return module.auto_model.gradient_checkpointing_enable(gradient_checkpointing_kwargs)</code></pre>
+</details>
+<div class="desc"><p>Loads or creates a SentenceTransformer model that can be used to map sentences / text to embeddings.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>model_name_or_path</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>If it is a filepath on disc, it loads the model from that path. If it is not a path,
+it first tries to download a pre-trained SentenceTransformer model. If that fails, tries to construct a model
+from the Hugging Face Hub with that name.</dd>
+<dt><strong><code>modules</code></strong> :&ensp;<code>Iterable[nn.Module]</code>, optional</dt>
+<dd>A list of torch Modules that should be called sequentially, can be used to create custom
+SentenceTransformer models from scratch.</dd>
+<dt><strong><code>device</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Device (like "cuda", "cpu", "mps", "npu") that should be used for computation. If None, checks if a GPU
+can be used.</dd>
+<dt><strong><code>prompts</code></strong> :&ensp;<code>Dict[str, str]</code>, optional</dt>
+<dd>A dictionary with prompts for the model. The key is the prompt name, the value is the prompt text.
+The prompt text will be prepended before any text to encode. For example:
+<code>{"query": "query: ", "passage": "passage: "}</code> or <code>{"clustering": "Identify the main category based on the
+titles in "}</code>.</dd>
+<dt><strong><code>default_prompt_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>The name of the prompt that should be used by default. If not set,
+no prompt will be applied.</dd>
+<dt><strong><code>similarity_fn_name</code></strong> :&ensp;<code>str</code> or <code>SimilarityFunction</code>, optional</dt>
+<dd>The name of the similarity function to use. Valid options are "cosine", "dot",
+"euclidean", and "manhattan". If not set, it is automatically set to "cosine" if <code>similarity</code> or
+<code>similarity_pairwise</code> are called while <code>model.similarity_fn_name</code> is still <code>None</code>.</dd>
+<dt><strong><code>cache_folder</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Path to store models. Can also be set by the SENTENCE_TRANSFORMERS_HOME environment variable.</dd>
+<dt><strong><code>trust_remote_code</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether or not to allow for custom models defined on the Hub in their own modeling files.
+This option should only be set to True for repositories you trust and in which you have read the code, as it
+will execute code present on the Hub on your local machine.</dd>
+<dt><strong><code>revision</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>The specific model version to use. It can be a branch name, a tag name, or a commit id,
+for a stored model on Hugging Face.</dd>
+<dt><strong><code>local_files_only</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether or not to only look at local files (i.e., do not try to download the model).</dd>
+<dt><strong><code>token</code></strong> :&ensp;<code>bool</code> or <code>str</code>, optional</dt>
+<dd>Hugging Face authentication token to download private models.</dd>
+<dt><strong><code>use_auth_token</code></strong> :&ensp;<code>bool</code> or <code>str</code>, optional</dt>
+<dd>Deprecated argument. Please use <code>token</code> instead.</dd>
+<dt><strong><code>truncate_dim</code></strong> :&ensp;<code>int</code>, optional</dt>
+<dd>The dimension to truncate sentence embeddings to. <code>None</code> does no truncation. Truncation is
+only applicable during inference when :meth:<code><a title="lang_main.types.SentenceTransformer.encode" href="#lang_main.types.SentenceTransformer.encode">SentenceTransformer.encode()</a></code> is called.</dd>
+<dt><strong><code>model_kwargs</code></strong> :&ensp;<code>Dict[str, Any]</code>, optional</dt>
+<dd>
+<p>Additional model configuration parameters to be passed to the Hugging Face Transformers model.
+Particularly useful options are:</p>
+<ul>
+<li><code>torch_dtype</code>: Override the default <code>torch.dtype</code> and load the model under a specific <code>dtype</code>.
+The different options are:<pre><code>1. &lt;code&gt;torch.float16&lt;/code&gt;, &lt;code&gt;torch.bfloat16&lt;/code&gt; or &lt;code&gt;torch.float&lt;/code&gt;: load in a specified
+&lt;code&gt;dtype&lt;/code&gt;, ignoring the model's &lt;code&gt;config.torch\_dtype&lt;/code&gt; if one exists. If not specified - the model will
+get loaded in &lt;code&gt;torch.float&lt;/code&gt; (fp32).
+
+2. ``"auto"`` - A &lt;code&gt;torch\_dtype&lt;/code&gt; entry in the &lt;code&gt;config.json&lt;/code&gt; file of the model will be
+attempted to be used. If this entry isn't found then next check the &lt;code&gt;dtype&lt;/code&gt; of the first weight in
+the checkpoint that's of a floating point type and use that as &lt;code&gt;dtype&lt;/code&gt;. This will load the model
+using the &lt;code&gt;dtype&lt;/code&gt; it was saved in at the end of the training. It can't be used as an indicator of how
+the model was trained. Since it could be trained in one of half precision dtypes, but saved in fp32.
+- &lt;code&gt;attn\_implementation&lt;/code&gt;: The attention implementation to use in the model (if relevant). Can be any of
+  `"eager"` (manual implementation of the attention), `"sdpa"` (using `F.scaled_dot_product_attention
+  &lt;https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html&gt;`_),
+  or `"flash_attention_2"` (using `Dao-AILab/flash-attention &lt;https://github.com/Dao-AILab/flash-attention&gt;`_).
+  By default, if available, SDPA will be used for torch&gt;=2.1.1. The default is otherwise the manual `"eager"`
+  implementation.
+- &lt;code&gt;provider&lt;/code&gt;: If backend is "onnx", this is the provider to use for inference, for example "CPUExecutionProvider",
+  "CUDAExecutionProvider", etc. See &lt;https://onnxruntime.ai/docs/execution-providers/&gt; for all ONNX execution providers.
+- &lt;code&gt;file\_name&lt;/code&gt;: If backend is "onnx" or "openvino", this is the file name to load, useful for loading optimized
+  or quantized ONNX or OpenVINO models.
+- &lt;code&gt;export&lt;/code&gt;: If backend is "onnx" or "openvino", then this is a boolean flag specifying whether this model should
+  be exported to the backend. If not specified, the model will be exported only if the model repository or directory
+  does not already contain an exported model.
+</code></pre>
+</li>
+</ul>
+<p>See the <code>PreTrainedModel.from_pretrained
+&lt;https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained&gt;</code>_
+documentation for more details.</p>
+</dd>
+<dt><strong><code>tokenizer_kwargs</code></strong> :&ensp;<code>Dict[str, Any]</code>, optional</dt>
+<dd>Additional tokenizer configuration parameters to be passed to the Hugging Face Transformers tokenizer.
+See the <code>AutoTokenizer.from_pretrained
+&lt;https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained&gt;</code>_
+documentation for more details.</dd>
+<dt><strong><code>config_kwargs</code></strong> :&ensp;<code>Dict[str, Any]</code>, optional</dt>
+<dd>Additional model configuration parameters to be passed to the Hugging Face Transformers config.
+See the <code>AutoConfig.from_pretrained
+&lt;https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoConfig.from_pretrained&gt;</code>_
+documentation for more details.</dd>
+<dt>model_card_data (:class:<code>~sentence_transformers.model_card.SentenceTransformerModelCardData</code>, optional): A model</dt>
+<dt>card data object that contains information about the model. This is used to generate a model card when saving</dt>
+<dt>the model. If not set, a default model card data object is created.</dt>
+<dt><strong><code>backend</code></strong> :&ensp;<code>str</code></dt>
+<dd>The backend to use for inference. Can be one of "torch" (default), "onnx", or "openvino".
+See <a href="https://sbert.net/docs/sentence_transformer/usage/efficiency.html">https://sbert.net/docs/sentence_transformer/usage/efficiency.html</a> for benchmarking information
+on the different backends.</dd>
+</dl>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>from sentence_transformers import SentenceTransformer
+
+# Load a pre-trained SentenceTransformer model
+model = SentenceTransformer('all-mpnet-base-v2')
+
+# Encode some texts
+sentences = [
+    "The weather is lovely today.",
+    "It's so sunny outside!",
+    "He drove to the stadium.",
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# (3, 768)
+
+# Get the similarity scores between all sentences
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[1.0000, 0.6817, 0.0492],
+#         [0.6817, 1.0000, 0.0421],
+#         [0.0492, 0.0421, 1.0000]])
+</code></pre>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>torch.nn.modules.container.Sequential</li>
+<li>torch.nn.modules.module.Module</li>
+<li>sentence_transformers.fit_mixin.FitMixin</li>
+<li>sentence_transformers.peft_mixin.PeftAdapterMixin</li>
+</ul>
+<h3>Static methods</h3>
+<dl>
+<dt id="lang_main.types.SentenceTransformer.load"><code class="name flex">
+<span>def <span class="ident">load</span></span>(<span>input_path) ‑> sentence_transformers.SentenceTransformer.SentenceTransformer</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@staticmethod
+def load(input_path) -&gt; SentenceTransformer:
+    return SentenceTransformer(input_path)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.stop_multi_process_pool"><code class="name flex">
+<span>def <span class="ident">stop_multi_process_pool</span></span>(<span>pool: "dict[Literal['input', 'output', 'processes'], Any]") ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@staticmethod
+def stop_multi_process_pool(pool: dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any]) -&gt; None:
+    &#34;&#34;&#34;
+    Stops all processes started with start_multi_process_pool.
+
+    Args:
+        pool (Dict[str, object]): A dictionary containing the input queue, output queue, and process list.
+
+    Returns:
+        None
+    &#34;&#34;&#34;
+    for p in pool[&#34;processes&#34;]:
+        p.terminate()
+
+    for p in pool[&#34;processes&#34;]:
+        p.join()
+        p.close()
+
+    pool[&#34;input&#34;].close()
+    pool[&#34;output&#34;].close()</code></pre>
+</details>
+<div class="desc"><p>Stops all processes started with start_multi_process_pool.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>pool</code></strong> :&ensp;<code>Dict[str, object]</code></dt>
+<dd>A dictionary containing the input queue, output queue, and process list.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<p>None</p></div>
+</dd>
+</dl>
+<h3>Instance variables</h3>
+<dl>
+<dt id="lang_main.types.SentenceTransformer.device"><code class="name">prop <span class="ident">device</span> : device</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def device(self) -&gt; device:
+    &#34;&#34;&#34;
+    Get torch.device from module, assuming that the whole module has one device.
+    In case there are no PyTorch parameters, fall back to CPU.
+    &#34;&#34;&#34;
+    if isinstance(self[0], Transformer):
+        return self[0].auto_model.device
+
+    try:
+        return next(self.parameters()).device
+    except StopIteration:
+        # For nn.DataParallel compatibility in PyTorch 1.5
+
+        def find_tensor_attributes(module: nn.Module) -&gt; list[tuple[str, Tensor]]:
+            tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+            return tuples
+
+        gen = self._named_members(get_members_fn=find_tensor_attributes)
+        try:
+            first_tuple = next(gen)
+            return first_tuple[1].device
+        except StopIteration:
+            return torch.device(&#34;cpu&#34;)</code></pre>
+</details>
+<div class="desc"><p>Get torch.device from module, assuming that the whole module has one device.
+In case there are no PyTorch parameters, fall back to CPU.</p></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.max_seq_length"><code class="name">prop <span class="ident">max_seq_length</span> : int</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def max_seq_length(self) -&gt; int:
+    &#34;&#34;&#34;
+    Returns the maximal input sequence length for the model. Longer inputs will be truncated.
+
+    Returns:
+        int: The maximal input sequence length.
+
+    Example:
+        ::
+
+            from sentence_transformers import SentenceTransformer
+
+            model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+            print(model.max_seq_length)
+            # =&gt; 384
+    &#34;&#34;&#34;
+    return self._first_module().max_seq_length</code></pre>
+</details>
+<div class="desc"><p>Returns the maximal input sequence length for the model. Longer inputs will be truncated.</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>int</code></dt>
+<dd>The maximal input sequence length.</dd>
+</dl>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>from sentence_transformers import SentenceTransformer
+
+model = SentenceTransformer("all-mpnet-base-v2")
+print(model.max_seq_length)
+# =&gt; 384
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.similarity"><code class="name">prop <span class="ident">similarity</span> : Callable[[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a> | ndarray, <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a> | ndarray], <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def similarity(self) -&gt; Callable[[Tensor | ndarray, Tensor | ndarray], Tensor]:
+    &#34;&#34;&#34;
+    Compute the similarity between two collections of embeddings. The output will be a matrix with the similarity
+    scores between all embeddings from the first parameter and all embeddings from the second parameter. This
+    differs from `similarity_pairwise` which computes the similarity between each pair of embeddings.
+
+    Args:
+        embeddings1 (Union[Tensor, ndarray]): [num_embeddings_1, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+        embeddings2 (Union[Tensor, ndarray]): [num_embeddings_2, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+
+    Returns:
+        Tensor: A [num_embeddings_1, num_embeddings_2]-shaped torch tensor with similarity scores.
+
+    Example:
+        ::
+
+            &gt;&gt;&gt; model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+            &gt;&gt;&gt; sentences = [
+            ...     &#34;The weather is so nice!&#34;,
+            ...     &#34;It&#39;s so sunny outside.&#34;,
+            ...     &#34;He&#39;s driving to the movie theater.&#34;,
+            ...     &#34;She&#39;s going to the cinema.&#34;,
+            ... ]
+            &gt;&gt;&gt; embeddings = model.encode(sentences, normalize_embeddings=True)
+            &gt;&gt;&gt; model.similarity(embeddings, embeddings)
+            tensor([[1.0000, 0.7235, 0.0290, 0.1309],
+                    [0.7235, 1.0000, 0.0613, 0.1129],
+                    [0.0290, 0.0613, 1.0000, 0.5027],
+                    [0.1309, 0.1129, 0.5027, 1.0000]])
+            &gt;&gt;&gt; model.similarity_fn_name
+            &#34;cosine&#34;
+            &gt;&gt;&gt; model.similarity_fn_name = &#34;euclidean&#34;
+            &gt;&gt;&gt; model.similarity(embeddings, embeddings)
+            tensor([[-0.0000, -0.7437, -1.3935, -1.3184],
+                    [-0.7437, -0.0000, -1.3702, -1.3320],
+                    [-1.3935, -1.3702, -0.0000, -0.9973],
+                    [-1.3184, -1.3320, -0.9973, -0.0000]])
+    &#34;&#34;&#34;
+    if self.similarity_fn_name is None:
+        self.similarity_fn_name = SimilarityFunction.COSINE
+    return self._similarity</code></pre>
+</details>
+<div class="desc"><p>Compute the similarity between two collections of embeddings. The output will be a matrix with the similarity
+scores between all embeddings from the first parameter and all embeddings from the second parameter. This
+differs from <code>similarity_pairwise</code> which computes the similarity between each pair of embeddings.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>embeddings1</code></strong> :&ensp;<code>Union[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>, ndarray]</code></dt>
+<dd>[num_embeddings_1, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.</dd>
+<dt><strong><code>embeddings2</code></strong> :&ensp;<code>Union[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>, ndarray]</code></dt>
+<dd>[num_embeddings_2, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code><a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a></code></dt>
+<dd>A [num_embeddings_1, num_embeddings_2]-shaped torch tensor with similarity scores.</dd>
+</dl>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>&gt;&gt;&gt; model = SentenceTransformer("all-mpnet-base-v2")
+&gt;&gt;&gt; sentences = [
+...     "The weather is so nice!",
+...     "It's so sunny outside.",
+...     "He's driving to the movie theater.",
+...     "She's going to the cinema.",
+... ]
+&gt;&gt;&gt; embeddings = model.encode(sentences, normalize_embeddings=True)
+&gt;&gt;&gt; model.similarity(embeddings, embeddings)
+tensor([[1.0000, 0.7235, 0.0290, 0.1309],
+        [0.7235, 1.0000, 0.0613, 0.1129],
+        [0.0290, 0.0613, 1.0000, 0.5027],
+        [0.1309, 0.1129, 0.5027, 1.0000]])
+&gt;&gt;&gt; model.similarity_fn_name
+"cosine"
+&gt;&gt;&gt; model.similarity_fn_name = "euclidean"
+&gt;&gt;&gt; model.similarity(embeddings, embeddings)
+tensor([[-0.0000, -0.7437, -1.3935, -1.3184],
+        [-0.7437, -0.0000, -1.3702, -1.3320],
+        [-1.3935, -1.3702, -0.0000, -0.9973],
+        [-1.3184, -1.3320, -0.9973, -0.0000]])
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.similarity_fn_name"><code class="name">prop <span class="ident">similarity_fn_name</span> : Literal['cosine', 'dot', 'euclidean', 'manhattan']</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def similarity_fn_name(self) -&gt; Literal[&#34;cosine&#34;, &#34;dot&#34;, &#34;euclidean&#34;, &#34;manhattan&#34;]:
+    &#34;&#34;&#34;Return the name of the similarity function used by :meth:`SentenceTransformer.similarity` and :meth:`SentenceTransformer.similarity_pairwise`.
+
+    Returns:
+        Optional[str]: The name of the similarity function. Can be None if not set, in which case it will
+            default to &#34;cosine&#34; when first called.
+
+    Example:
+        &gt;&gt;&gt; model = SentenceTransformer(&#34;multi-qa-mpnet-base-dot-v1&#34;)
+        &gt;&gt;&gt; model.similarity_fn_name
+        &#39;dot&#39;
+    &#34;&#34;&#34;
+    if self._similarity_fn_name is None:
+        self.similarity_fn_name = SimilarityFunction.COSINE
+    return self._similarity_fn_name</code></pre>
+</details>
+<div class="desc"><p>Return the name of the similarity function used by :meth:<code><a title="lang_main.types.SentenceTransformer.similarity" href="#lang_main.types.SentenceTransformer.similarity">SentenceTransformer.similarity</a></code> and :meth:<code><a title="lang_main.types.SentenceTransformer.similarity_pairwise" href="#lang_main.types.SentenceTransformer.similarity_pairwise">SentenceTransformer.similarity_pairwise</a></code>.</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Optional[str]</code></dt>
+<dd>The name of the similarity function. Can be None if not set, in which case it will
+default to "cosine" when first called.</dd>
+</dl>
+<h2 id="example">Example</h2>
+<pre><code class="language-python-repl">&gt;&gt;&gt; model = SentenceTransformer(&quot;multi-qa-mpnet-base-dot-v1&quot;)
+&gt;&gt;&gt; model.similarity_fn_name
+'dot'
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.similarity_pairwise"><code class="name">prop <span class="ident">similarity_pairwise</span> : Callable[[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a> | ndarray, <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a> | ndarray], <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>]</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def similarity_pairwise(self) -&gt; Callable[[Tensor | ndarray, Tensor | ndarray], Tensor]:
+    &#34;&#34;&#34;
+    Compute the similarity between two collections of embeddings. The output will be a vector with the similarity
+    scores between each pair of embeddings.
+
+    Args:
+        embeddings1 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+        embeddings2 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
+
+    Returns:
+        Tensor: A [num_embeddings]-shaped torch tensor with pairwise similarity scores.
+
+    Example:
+        ::
+
+            &gt;&gt;&gt; model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+            &gt;&gt;&gt; sentences = [
+            ...     &#34;The weather is so nice!&#34;,
+            ...     &#34;It&#39;s so sunny outside.&#34;,
+            ...     &#34;He&#39;s driving to the movie theater.&#34;,
+            ...     &#34;She&#39;s going to the cinema.&#34;,
+            ... ]
+            &gt;&gt;&gt; embeddings = model.encode(sentences, normalize_embeddings=True)
+            &gt;&gt;&gt; model.similarity_pairwise(embeddings[::2], embeddings[1::2])
+            tensor([0.7235, 0.5027])
+            &gt;&gt;&gt; model.similarity_fn_name
+            &#34;cosine&#34;
+            &gt;&gt;&gt; model.similarity_fn_name = &#34;euclidean&#34;
+            &gt;&gt;&gt; model.similarity_pairwise(embeddings[::2], embeddings[1::2])
+            tensor([-0.7437, -0.9973])
+    &#34;&#34;&#34;
+    if self.similarity_fn_name is None:
+        self.similarity_fn_name = SimilarityFunction.COSINE
+    return self._similarity_pairwise</code></pre>
+</details>
+<div class="desc"><p>Compute the similarity between two collections of embeddings. The output will be a vector with the similarity
+scores between each pair of embeddings.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>embeddings1</code></strong> :&ensp;<code>Union[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>, ndarray]</code></dt>
+<dd>[num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.</dd>
+<dt><strong><code>embeddings2</code></strong> :&ensp;<code>Union[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>, ndarray]</code></dt>
+<dd>[num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code><a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a></code></dt>
+<dd>A [num_embeddings]-shaped torch tensor with pairwise similarity scores.</dd>
+</dl>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>&gt;&gt;&gt; model = SentenceTransformer("all-mpnet-base-v2")
+&gt;&gt;&gt; sentences = [
+...     "The weather is so nice!",
+...     "It's so sunny outside.",
+...     "He's driving to the movie theater.",
+...     "She's going to the cinema.",
+... ]
+&gt;&gt;&gt; embeddings = model.encode(sentences, normalize_embeddings=True)
+&gt;&gt;&gt; model.similarity_pairwise(embeddings[::2], embeddings[1::2])
+tensor([0.7235, 0.5027])
+&gt;&gt;&gt; model.similarity_fn_name
+"cosine"
+&gt;&gt;&gt; model.similarity_fn_name = "euclidean"
+&gt;&gt;&gt; model.similarity_pairwise(embeddings[::2], embeddings[1::2])
+tensor([-0.7437, -0.9973])
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.tokenizer"><code class="name">prop <span class="ident">tokenizer</span> : Any</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@property
+def tokenizer(self) -&gt; Any:
+    &#34;&#34;&#34;
+    Property to get the tokenizer that is used by this model
+    &#34;&#34;&#34;
+    return self._first_module().tokenizer</code></pre>
+</details>
+<div class="desc"><p>Property to get the tokenizer that is used by this model</p></div>
+</dd>
+</dl>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.types.SentenceTransformer.encode"><code class="name flex">
+<span>def <span class="ident">encode</span></span>(<span>self,<br>sentences: str | list[str],<br>prompt_name: str | None = None,<br>prompt: str | None = None,<br>batch_size: int = 32,<br>show_progress_bar: bool | None = None,<br>output_value: "Literal['sentence_embedding', 'token_embeddings'] | None" = 'sentence_embedding',<br>precision: "Literal['float32', 'int8', 'uint8', 'binary', 'ubinary']" = 'float32',<br>convert_to_numpy: bool = True,<br>convert_to_tensor: bool = False,<br>device: str = None,<br>normalize_embeddings: bool = False,<br>**kwargs) ‑> list[torch.Tensor] | numpy.ndarray | torch.Tensor</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def encode(
+    self,
+    sentences: str | list[str],
+    prompt_name: str | None = None,
+    prompt: str | None = None,
+    batch_size: int = 32,
+    show_progress_bar: bool | None = None,
+    output_value: Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;] | None = &#34;sentence_embedding&#34;,
+    precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = &#34;float32&#34;,
+    convert_to_numpy: bool = True,
+    convert_to_tensor: bool = False,
+    device: str = None,
+    normalize_embeddings: bool = False,
+    **kwargs,
+) -&gt; list[Tensor] | np.ndarray | Tensor:
+    &#34;&#34;&#34;
+    Computes sentence embeddings.
+
+    Args:
+        sentences (Union[str, List[str]]): The sentences to embed.
+        prompt_name (Optional[str], optional): The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary,
+            which is either set in the constructor or loaded from the model configuration. For example if
+            ``prompt_name`` is &#34;query&#34; and the ``prompts`` is {&#34;query&#34;: &#34;query: &#34;, ...}, then the sentence &#34;What
+            is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34; because the sentence
+            is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
+        prompt (Optional[str], optional): The prompt to use for encoding. For example, if the prompt is &#34;query: &#34;, then the
+            sentence &#34;What is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34;
+            because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored. Defaults to None.
+        batch_size (int, optional): The batch size used for the computation. Defaults to 32.
+        show_progress_bar (bool, optional): Whether to output a progress bar when encode sentences. Defaults to None.
+        output_value (Optional[Literal[&#34;sentence_embedding&#34;, &#34;token_embeddings&#34;]], optional): The type of embeddings to return:
+            &#34;sentence_embedding&#34; to get sentence embeddings, &#34;token_embeddings&#34; to get wordpiece token embeddings, and `None`,
+            to get all output values. Defaults to &#34;sentence_embedding&#34;.
+        precision (Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;], optional): The precision to use for the embeddings.
+            Can be &#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, or &#34;ubinary&#34;. All non-float32 precisions are quantized embeddings.
+            Quantized embeddings are smaller in size and faster to compute, but may have a lower accuracy. They are useful for
+            reducing the size of the embeddings of a corpus for semantic search, among other tasks. Defaults to &#34;float32&#34;.
+        convert_to_numpy (bool, optional): Whether the output should be a list of numpy vectors. If False, it is a list of PyTorch tensors.
+            Defaults to True.
+        convert_to_tensor (bool, optional): Whether the output should be one large tensor. Overwrites `convert_to_numpy`.
+            Defaults to False.
+        device (str, optional): Which :class:`torch.device` to use for the computation. Defaults to None.
+        normalize_embeddings (bool, optional): Whether to normalize returned vectors to have length 1. In that case,
+            the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.
+
+    Returns:
+        Union[List[Tensor], ndarray, Tensor]: By default, a 2d numpy array with shape [num_inputs, output_dimension] is returned.
+        If only one string input is provided, then the output is a 1d array with shape [output_dimension]. If ``convert_to_tensor``,
+        a torch Tensor is returned instead. If ``self.truncate_dim &lt;= output_dimension`` then output_dimension is ``self.truncate_dim``.
+
+    Example:
+        ::
+
+            from sentence_transformers import SentenceTransformer
+
+            # Load a pre-trained SentenceTransformer model
+            model = SentenceTransformer(&#39;all-mpnet-base-v2&#39;)
+
+            # Encode some texts
+            sentences = [
+                &#34;The weather is lovely today.&#34;,
+                &#34;It&#39;s so sunny outside!&#34;,
+                &#34;He drove to the stadium.&#34;,
+            ]
+            embeddings = model.encode(sentences)
+            print(embeddings.shape)
+            # (3, 768)
+    &#34;&#34;&#34;
+    if self.device.type == &#34;hpu&#34; and not self.is_hpu_graph_enabled:
+        import habana_frameworks.torch as ht
+
+        ht.hpu.wrap_in_hpu_graph(self, disable_tensor_cache=True)
+        self.is_hpu_graph_enabled = True
+
+    self.eval()
+    if show_progress_bar is None:
+        show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)
+
+    if convert_to_tensor:
+        convert_to_numpy = False
+
+    if output_value != &#34;sentence_embedding&#34;:
+        convert_to_tensor = False
+        convert_to_numpy = False
+
+    input_was_string = False
+    if isinstance(sentences, str) or not hasattr(
+        sentences, &#34;__len__&#34;
+    ):  # Cast an individual sentence to a list with length 1
+        sentences = [sentences]
+        input_was_string = True
+
+    if prompt is None:
+        if prompt_name is not None:
+            try:
+                prompt = self.prompts[prompt_name]
+            except KeyError:
+                raise ValueError(
+                    f&#34;Prompt name &#39;{prompt_name}&#39; not found in the configured prompts dictionary with keys {list(self.prompts.keys())!r}.&#34;
+                )
+        elif self.default_prompt_name is not None:
+            prompt = self.prompts.get(self.default_prompt_name, None)
+    else:
+        if prompt_name is not None:
+            logger.warning(
+                &#34;Encode with either a `prompt`, a `prompt_name`, or neither, but not both. &#34;
+                &#34;Ignoring the `prompt_name` in favor of `prompt`.&#34;
+            )
+
+    extra_features = {}
+    if prompt is not None:
+        sentences = [prompt + sentence for sentence in sentences]
+
+        # Some models (e.g. INSTRUCTOR, GRIT) require removing the prompt before pooling
+        # Tracking the prompt length allow us to remove the prompt during pooling
+        tokenized_prompt = self.tokenize([prompt])
+        if &#34;input_ids&#34; in tokenized_prompt:
+            extra_features[&#34;prompt_length&#34;] = tokenized_prompt[&#34;input_ids&#34;].shape[-1] - 1
+
+    if device is None:
+        device = self.device
+
+    self.to(device)
+
+    all_embeddings = []
+    length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
+    sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
+
+    for start_index in trange(0, len(sentences), batch_size, desc=&#34;Batches&#34;, disable=not show_progress_bar):
+        sentences_batch = sentences_sorted[start_index : start_index + batch_size]
+        features = self.tokenize(sentences_batch)
+        if self.device.type == &#34;hpu&#34;:
+            if &#34;input_ids&#34; in features:
+                curr_tokenize_len = features[&#34;input_ids&#34;].shape
+                additional_pad_len = 2 ** math.ceil(math.log2(curr_tokenize_len[1])) - curr_tokenize_len[1]
+                features[&#34;input_ids&#34;] = torch.cat(
+                    (
+                        features[&#34;input_ids&#34;],
+                        torch.ones((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                    ),
+                    -1,
+                )
+                features[&#34;attention_mask&#34;] = torch.cat(
+                    (
+                        features[&#34;attention_mask&#34;],
+                        torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                    ),
+                    -1,
+                )
+                if &#34;token_type_ids&#34; in features:
+                    features[&#34;token_type_ids&#34;] = torch.cat(
+                        (
+                            features[&#34;token_type_ids&#34;],
+                            torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                        ),
+                        -1,
+                    )
+
+        features = batch_to_device(features, device)
+        features.update(extra_features)
+
+        with torch.no_grad():
+            out_features = self.forward(features, **kwargs)
+            if self.device.type == &#34;hpu&#34;:
+                out_features = copy.deepcopy(out_features)
+
+            out_features[&#34;sentence_embedding&#34;] = truncate_embeddings(
+                out_features[&#34;sentence_embedding&#34;], self.truncate_dim
+            )
+
+            if output_value == &#34;token_embeddings&#34;:
+                embeddings = []
+                for token_emb, attention in zip(out_features[output_value], out_features[&#34;attention_mask&#34;]):
+                    last_mask_id = len(attention) - 1
+                    while last_mask_id &gt; 0 and attention[last_mask_id].item() == 0:
+                        last_mask_id -= 1
+
+                    embeddings.append(token_emb[0 : last_mask_id + 1])
+            elif output_value is None:  # Return all outputs
+                embeddings = []
+                for sent_idx in range(len(out_features[&#34;sentence_embedding&#34;])):
+                    row = {name: out_features[name][sent_idx] for name in out_features}
+                    embeddings.append(row)
+            else:  # Sentence embeddings
+                embeddings = out_features[output_value]
+                embeddings = embeddings.detach()
+                if normalize_embeddings:
+                    embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+
+                # fixes for #522 and #487 to avoid oom problems on gpu with large datasets
+                if convert_to_numpy:
+                    embeddings = embeddings.cpu()
+
+            all_embeddings.extend(embeddings)
+
+    all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]
+
+    if precision and precision != &#34;float32&#34;:
+        all_embeddings = quantize_embeddings(all_embeddings, precision=precision)
+
+    if convert_to_tensor:
+        if len(all_embeddings):
+            if isinstance(all_embeddings, np.ndarray):
+                all_embeddings = torch.from_numpy(all_embeddings)
+            else:
+                all_embeddings = torch.stack(all_embeddings)
+        else:
+            all_embeddings = torch.Tensor()
+    elif convert_to_numpy:
+        if not isinstance(all_embeddings, np.ndarray):
+            if all_embeddings and all_embeddings[0].dtype == torch.bfloat16:
+                all_embeddings = np.asarray([emb.float().numpy() for emb in all_embeddings])
+            else:
+                all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
+    elif isinstance(all_embeddings, np.ndarray):
+        all_embeddings = [torch.from_numpy(embedding) for embedding in all_embeddings]
+
+    if input_was_string:
+        all_embeddings = all_embeddings[0]
+
+    return all_embeddings</code></pre>
+</details>
+<div class="desc"><p>Computes sentence embeddings.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>sentences</code></strong> :&ensp;<code>Union[str, List[str]]</code></dt>
+<dd>The sentences to embed.</dd>
+<dt><strong><code>prompt_name</code></strong> :&ensp;<code>Optional[str]</code>, optional</dt>
+<dd>The name of the prompt to use for encoding. Must be a key in the <code>prompts</code> dictionary,
+which is either set in the constructor or loaded from the model configuration. For example if
+<code>prompt_name</code> is "query" and the <code>prompts</code> is {"query": "query: ", &hellip;}, then the sentence "What
+is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
+is appended to the prompt. If <code>prompt</code> is also set, this argument is ignored. Defaults to None.</dd>
+<dt><strong><code>prompt</code></strong> :&ensp;<code>Optional[str]</code>, optional</dt>
+<dd>The prompt to use for encoding. For example, if the prompt is "query: ", then the
+sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
+because the sentence is appended to the prompt. If <code>prompt</code> is set, <code>prompt_name</code> is ignored. Defaults to None.</dd>
+<dt><strong><code>batch_size</code></strong> :&ensp;<code>int</code>, optional</dt>
+<dd>The batch size used for the computation. Defaults to 32.</dd>
+<dt><strong><code>show_progress_bar</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether to output a progress bar when encode sentences. Defaults to None.</dd>
+<dt>output_value (Optional[Literal["sentence_embedding", "token_embeddings"]], optional): The type of embeddings to return:</dt>
+<dt>"sentence_embedding" to get sentence embeddings, "token_embeddings" to get wordpiece token embeddings, and <code>None</code>,</dt>
+<dt>to get all output values. Defaults to "sentence_embedding".</dt>
+<dt>precision (Literal["float32", "int8", "uint8", "binary", "ubinary"], optional): The precision to use for the embeddings.</dt>
+<dt>Can be "float32", "int8", "uint8", "binary", or "ubinary". All non-float32 precisions are quantized embeddings.</dt>
+<dt>Quantized embeddings are smaller in size and faster to compute, but may have a lower accuracy. They are useful for</dt>
+<dt>reducing the size of the embeddings of a corpus for semantic search, among other tasks. Defaults to "float32".</dt>
+<dt><strong><code>convert_to_numpy</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether the output should be a list of numpy vectors. If False, it is a list of PyTorch tensors.
+Defaults to True.</dd>
+<dt><strong><code>convert_to_tensor</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether the output should be one large tensor. Overwrites <code>convert_to_numpy</code>.
+Defaults to False.</dd>
+<dt><strong><code>device</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Which :class:<code>torch.device</code> to use for the computation. Defaults to None.</dd>
+<dt><strong><code>normalize_embeddings</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether to normalize returned vectors to have length 1. In that case,
+the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Union[List[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>], ndarray, <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>]</code></dt>
+<dd>By default, a 2d numpy array with shape [num_inputs, output_dimension] is returned.</dd>
+</dl>
+<p>If only one string input is provided, then the output is a 1d array with shape [output_dimension]. If <code>convert_to_tensor</code>,
+a torch Tensor is returned instead. If <code>self.truncate_dim &lt;= output_dimension</code> then output_dimension is <code>self.truncate_dim</code>.</p>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>from sentence_transformers import SentenceTransformer
+
+# Load a pre-trained SentenceTransformer model
+model = SentenceTransformer('all-mpnet-base-v2')
+
+# Encode some texts
+sentences = [
+    "The weather is lovely today.",
+    "It's so sunny outside!",
+    "He drove to the stadium.",
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# (3, 768)
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.encode_multi_process"><code class="name flex">
+<span>def <span class="ident">encode_multi_process</span></span>(<span>self,<br>sentences: list[str],<br>pool: "dict[Literal['input', 'output', 'processes'], Any]",<br>prompt_name: str | None = None,<br>prompt: str | None = None,<br>batch_size: int = 32,<br>chunk_size: int = None,<br>show_progress_bar: bool | None = None,<br>precision: "Literal['float32', 'int8', 'uint8', 'binary', 'ubinary']" = 'float32',<br>normalize_embeddings: bool = False) ‑> numpy.ndarray</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def encode_multi_process(
+    self,
+    sentences: list[str],
+    pool: dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any],
+    prompt_name: str | None = None,
+    prompt: str | None = None,
+    batch_size: int = 32,
+    chunk_size: int = None,
+    show_progress_bar: bool | None = None,
+    precision: Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;] = &#34;float32&#34;,
+    normalize_embeddings: bool = False,
+) -&gt; np.ndarray:
+    &#34;&#34;&#34;
+    Encodes a list of sentences using multiple processes and GPUs via
+    :meth:`SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;`.
+    The sentences are chunked into smaller packages and sent to individual processes, which encode them on different
+    GPUs or CPUs. This method is only suitable for encoding large sets of sentences.
+
+    Args:
+        sentences (List[str]): List of sentences to encode.
+        pool (Dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any]): A pool of workers started with
+            :meth:`SentenceTransformer.start_multi_process_pool &lt;sentence_transformers.SentenceTransformer.start_multi_process_pool&gt;`.
+        prompt_name (Optional[str], optional): The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary,
+            which is either set in the constructor or loaded from the model configuration. For example if
+            ``prompt_name`` is &#34;query&#34; and the ``prompts`` is {&#34;query&#34;: &#34;query: &#34;, ...}, then the sentence &#34;What
+            is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34; because the sentence
+            is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
+        prompt (Optional[str], optional): The prompt to use for encoding. For example, if the prompt is &#34;query: &#34;, then the
+            sentence &#34;What is the capital of France?&#34; will be encoded as &#34;query: What is the capital of France?&#34;
+            because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored. Defaults to None.
+        batch_size (int): Encode sentences with batch size. (default: 32)
+        chunk_size (int): Sentences are chunked and sent to the individual processes. If None, it determines a
+            sensible size. Defaults to None.
+        show_progress_bar (bool, optional): Whether to output a progress bar when encode sentences. Defaults to None.
+        precision (Literal[&#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, &#34;ubinary&#34;]): The precision to use for the
+            embeddings. Can be &#34;float32&#34;, &#34;int8&#34;, &#34;uint8&#34;, &#34;binary&#34;, or &#34;ubinary&#34;. All non-float32 precisions
+            are quantized embeddings. Quantized embeddings are smaller in size and faster to compute, but may
+            have lower accuracy. They are useful for reducing the size of the embeddings of a corpus for
+            semantic search, among other tasks. Defaults to &#34;float32&#34;.
+        normalize_embeddings (bool): Whether to normalize returned vectors to have length 1. In that case,
+            the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.
+
+    Returns:
+        np.ndarray: A 2D numpy array with shape [num_inputs, output_dimension].
+
+    Example:
+        ::
+
+            from sentence_transformers import SentenceTransformer
+
+            def main():
+                model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+                sentences = [&#34;The weather is so nice!&#34;, &#34;It&#39;s so sunny outside.&#34;, &#34;He&#39;s driving to the movie theater.&#34;, &#34;She&#39;s going to the cinema.&#34;] * 1000
+
+                pool = model.start_multi_process_pool()
+                embeddings = model.encode_multi_process(sentences, pool)
+                model.stop_multi_process_pool(pool)
+
+                print(embeddings.shape)
+                # =&gt; (4000, 768)
+
+            if __name__ == &#34;__main__&#34;:
+                main()
+    &#34;&#34;&#34;
+
+    if chunk_size is None:
+        chunk_size = min(math.ceil(len(sentences) / len(pool[&#34;processes&#34;]) / 10), 5000)
+
+    if show_progress_bar is None:
+        show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)
+
+    logger.debug(f&#34;Chunk data into {math.ceil(len(sentences) / chunk_size)} packages of size {chunk_size}&#34;)
+
+    input_queue = pool[&#34;input&#34;]
+    last_chunk_id = 0
+    chunk = []
+
+    for sentence in sentences:
+        chunk.append(sentence)
+        if len(chunk) &gt;= chunk_size:
+            input_queue.put(
+                [last_chunk_id, batch_size, chunk, prompt_name, prompt, precision, normalize_embeddings]
+            )
+            last_chunk_id += 1
+            chunk = []
+
+    if len(chunk) &gt; 0:
+        input_queue.put([last_chunk_id, batch_size, chunk, prompt_name, prompt, precision, normalize_embeddings])
+        last_chunk_id += 1
+
+    output_queue = pool[&#34;output&#34;]
+    results_list = sorted(
+        [output_queue.get() for _ in trange(last_chunk_id, desc=&#34;Chunks&#34;, disable=not show_progress_bar)],
+        key=lambda x: x[0],
+    )
+    embeddings = np.concatenate([result[1] for result in results_list])
+    return embeddings</code></pre>
+</details>
+<div class="desc"><p>Encodes a list of sentences using multiple processes and GPUs via
+:meth:<code>SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;</code>.
+The sentences are chunked into smaller packages and sent to individual processes, which encode them on different
+GPUs or CPUs. This method is only suitable for encoding large sets of sentences.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>sentences</code></strong> :&ensp;<code>List[str]</code></dt>
+<dd>List of sentences to encode.</dd>
+<dt>pool (Dict[Literal["input", "output", "processes"], Any]): A pool of workers started with</dt>
+<dt>:meth:<code>SentenceTransformer.start_multi_process_pool &lt;sentence_transformers.SentenceTransformer.start_multi_process_pool&gt;</code>.</dt>
+<dt><strong><code>prompt_name</code></strong> :&ensp;<code>Optional[str]</code>, optional</dt>
+<dd>The name of the prompt to use for encoding. Must be a key in the <code>prompts</code> dictionary,
+which is either set in the constructor or loaded from the model configuration. For example if
+<code>prompt_name</code> is "query" and the <code>prompts</code> is {"query": "query: ", &hellip;}, then the sentence "What
+is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
+is appended to the prompt. If <code>prompt</code> is also set, this argument is ignored. Defaults to None.</dd>
+<dt><strong><code>prompt</code></strong> :&ensp;<code>Optional[str]</code>, optional</dt>
+<dd>The prompt to use for encoding. For example, if the prompt is "query: ", then the
+sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
+because the sentence is appended to the prompt. If <code>prompt</code> is set, <code>prompt_name</code> is ignored. Defaults to None.</dd>
+<dt><strong><code>batch_size</code></strong> :&ensp;<code>int</code></dt>
+<dd>Encode sentences with batch size. (default: 32)</dd>
+<dt><strong><code>chunk_size</code></strong> :&ensp;<code>int</code></dt>
+<dd>Sentences are chunked and sent to the individual processes. If None, it determines a
+sensible size. Defaults to None.</dd>
+<dt><strong><code>show_progress_bar</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Whether to output a progress bar when encode sentences. Defaults to None.</dd>
+<dt>precision (Literal["float32", "int8", "uint8", "binary", "ubinary"]): The precision to use for the</dt>
+<dt>embeddings. Can be "float32", "int8", "uint8", "binary", or "ubinary". All non-float32 precisions</dt>
+<dt>are quantized embeddings. Quantized embeddings are smaller in size and faster to compute, but may</dt>
+<dt>have lower accuracy. They are useful for reducing the size of the embeddings of a corpus for</dt>
+<dt>semantic search, among other tasks. Defaults to "float32".</dt>
+<dt><strong><code>normalize_embeddings</code></strong> :&ensp;<code>bool</code></dt>
+<dd>Whether to normalize returned vectors to have length 1. In that case,
+the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>np.ndarray</code></dt>
+<dd>A 2D numpy array with shape [num_inputs, output_dimension].</dd>
+</dl>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>from sentence_transformers import SentenceTransformer
+
+def main():
+    model = SentenceTransformer("all-mpnet-base-v2")
+    sentences = ["The weather is so nice!", "It's so sunny outside.", "He's driving to the movie theater.", "She's going to the cinema."] * 1000
+
+    pool = model.start_multi_process_pool()
+    embeddings = model.encode_multi_process(sentences, pool)
+    model.stop_multi_process_pool(pool)
+
+    print(embeddings.shape)
+    # =&gt; (4000, 768)
+
+if __name__ == "__main__":
+    main()
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.evaluate"><code class="name flex">
+<span>def <span class="ident">evaluate</span></span>(<span>self, evaluator: SentenceEvaluator, output_path: str = None) ‑> dict[str, float] | float</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def evaluate(self, evaluator: SentenceEvaluator, output_path: str = None) -&gt; dict[str, float] | float:
+    &#34;&#34;&#34;
+    Evaluate the model based on an evaluator
+
+    Args:
+        evaluator (SentenceEvaluator): The evaluator used to evaluate the model.
+        output_path (str, optional): The path where the evaluator can write the results. Defaults to None.
+
+    Returns:
+        The evaluation results.
+    &#34;&#34;&#34;
+    if output_path is not None:
+        os.makedirs(output_path, exist_ok=True)
+    return evaluator(self, output_path)</code></pre>
+</details>
+<div class="desc"><p>Evaluate the model based on an evaluator</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>evaluator</code></strong> :&ensp;<code>SentenceEvaluator</code></dt>
+<dd>The evaluator used to evaluate the model.</dd>
+<dt><strong><code>output_path</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>The path where the evaluator can write the results. Defaults to None.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<p>The evaluation results.</p></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.forward"><code class="name flex">
+<span>def <span class="ident">forward</span></span>(<span>self,<br>input: dict[str, <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>],<br>**kwargs) ‑> dict[str, torch.Tensor]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def forward(self, input: dict[str, Tensor], **kwargs) -&gt; dict[str, Tensor]:
+    if self.module_kwargs is None:
+        return super().forward(input)
+
+    for module_name, module in self.named_children():
+        module_kwarg_keys = self.module_kwargs.get(module_name, [])
+        module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
+        input = module(input, **module_kwargs)
+    return input</code></pre>
+</details>
+<div class="desc"><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the :class:<code>Module</code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.get_backend"><code class="name flex">
+<span>def <span class="ident">get_backend</span></span>(<span>self) ‑> Literal['torch', 'onnx', 'openvino']</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_backend(self) -&gt; Literal[&#34;torch&#34;, &#34;onnx&#34;, &#34;openvino&#34;]:
+    &#34;&#34;&#34;Return the backend used for inference, which can be one of &#34;torch&#34;, &#34;onnx&#34;, or &#34;openvino&#34;.
+
+    Returns:
+        str: The backend used for inference.
+    &#34;&#34;&#34;
+    return self.backend</code></pre>
+</details>
+<div class="desc"><p>Return the backend used for inference, which can be one of "torch", "onnx", or "openvino".</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>str</code></dt>
+<dd>The backend used for inference.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.get_max_seq_length"><code class="name flex">
+<span>def <span class="ident">get_max_seq_length</span></span>(<span>self) ‑> int | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_max_seq_length(self) -&gt; int | None:
+    &#34;&#34;&#34;
+    Returns the maximal sequence length that the model accepts. Longer inputs will be truncated.
+
+    Returns:
+        Optional[int]: The maximal sequence length that the model accepts, or None if it is not defined.
+    &#34;&#34;&#34;
+    if hasattr(self._first_module(), &#34;max_seq_length&#34;):
+        return self._first_module().max_seq_length
+
+    return None</code></pre>
+</details>
+<div class="desc"><p>Returns the maximal sequence length that the model accepts. Longer inputs will be truncated.</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Optional[int]</code></dt>
+<dd>The maximal sequence length that the model accepts, or None if it is not defined.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.get_sentence_embedding_dimension"><code class="name flex">
+<span>def <span class="ident">get_sentence_embedding_dimension</span></span>(<span>self) ‑> int | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_sentence_embedding_dimension(self) -&gt; int | None:
+    &#34;&#34;&#34;
+    Returns the number of dimensions in the output of :meth:`SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;`.
+
+    Returns:
+        Optional[int]: The number of dimensions in the output of `encode`. If it&#39;s not known, it&#39;s `None`.
+    &#34;&#34;&#34;
+    output_dim = None
+    for mod in reversed(self._modules.values()):
+        sent_embedding_dim_method = getattr(mod, &#34;get_sentence_embedding_dimension&#34;, None)
+        if callable(sent_embedding_dim_method):
+            output_dim = sent_embedding_dim_method()
+            break
+    if self.truncate_dim is not None:
+        # The user requested truncation. If they set it to a dim greater than output_dim,
+        # no truncation will actually happen. So return output_dim instead of self.truncate_dim
+        return min(output_dim or np.inf, self.truncate_dim)
+    return output_dim</code></pre>
+</details>
+<div class="desc"><p>Returns the number of dimensions in the output of :meth:<code>SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;</code>.</p>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Optional[int]</code></dt>
+<dd>The number of dimensions in the output of <code>encode</code>. If it's not known, it's <code>None</code>.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.get_sentence_features"><code class="name flex">
+<span>def <span class="ident">get_sentence_features</span></span>(<span>self, *features) ‑> dict[typing.Literal['sentence_embedding'], torch.Tensor]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_sentence_features(self, *features) -&gt; dict[Literal[&#34;sentence_embedding&#34;], Tensor]:
+    return self._first_module().get_sentence_features(*features)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.gradient_checkpointing_enable"><code class="name flex">
+<span>def <span class="ident">gradient_checkpointing_enable</span></span>(<span>self, gradient_checkpointing_kwargs=None) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None) -&gt; None:
+    # Propagate the gradient checkpointing to the transformer model
+    for module in self:
+        if isinstance(module, Transformer):
+            return module.auto_model.gradient_checkpointing_enable(gradient_checkpointing_kwargs)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.push_to_hub"><code class="name flex">
+<span>def <span class="ident">push_to_hub</span></span>(<span>self,<br>repo_id: str,<br>token: str | None = None,<br>private: bool | None = None,<br>safe_serialization: bool = True,<br>commit_message: str | None = None,<br>local_model_path: str | None = None,<br>exist_ok: bool = False,<br>replace_model_card: bool = False,<br>train_datasets: list[str] | None = None,<br>revision: str | None = None,<br>create_pr: bool = False) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">    def push_to_hub(
+        self,
+        repo_id: str,
+        token: str | None = None,
+        private: bool | None = None,
+        safe_serialization: bool = True,
+        commit_message: str | None = None,
+        local_model_path: str | None = None,
+        exist_ok: bool = False,
+        replace_model_card: bool = False,
+        train_datasets: list[str] | None = None,
+        revision: str | None = None,
+        create_pr: bool = False,
+    ) -&gt; str:
+        &#34;&#34;&#34;
+        Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.
+
+        Args:
+            repo_id (str): Repository name for your model in the Hub, including the user or organization.
+            token (str, optional): An authentication token (See https://huggingface.co/settings/token)
+            private (bool, optional): Set to true, for hosting a private model
+            safe_serialization (bool, optional): If true, save the model using safetensors. If false, save the model the traditional PyTorch way
+            commit_message (str, optional): Message to commit while pushing.
+            local_model_path (str, optional): Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded
+            exist_ok (bool, optional): If true, saving to an existing repository is OK. If false, saving only to a new repository is possible
+            replace_model_card (bool, optional): If true, replace an existing model card in the hub with the automatically created model card
+            train_datasets (List[str], optional): Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.
+            revision (str, optional): Branch to push the uploaded files to
+            create_pr (bool, optional): If True, create a pull request instead of pushing directly to the main branch
+
+        Returns:
+            str: The url of the commit of your model in the repository on the Hugging Face Hub.
+        &#34;&#34;&#34;
+        api = HfApi(token=token)
+        repo_url = api.create_repo(
+            repo_id=repo_id,
+            private=private,
+            repo_type=None,
+            exist_ok=exist_ok or create_pr,
+        )
+        repo_id = repo_url.repo_id  # Update the repo_id in case the old repo_id didn&#39;t contain a user or organization
+        self.model_card_data.set_model_id(repo_id)
+        if revision is not None:
+            api.create_branch(repo_id=repo_id, branch=revision, exist_ok=True)
+
+        if commit_message is None:
+            backend = self.get_backend()
+            if backend == &#34;torch&#34;:
+                commit_message = &#34;Add new SentenceTransformer model&#34;
+            else:
+                commit_message = f&#34;Add new SentenceTransformer model with an {backend} backend&#34;
+
+        commit_description = &#34;&#34;
+        if create_pr:
+            commit_description = f&#34;&#34;&#34;\
+Hello!
+
+*This pull request has been automatically generated from the [`push_to_hub`](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html#sentence_transformers.SentenceTransformer.push_to_hub) method from the Sentence Transformers library.*
+
+## Full Model Architecture:
+```
+{self}
+```
+
+## Tip:
+Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
+```python
+from sentence_transformers import SentenceTransformer
+
+# TODO: Fill in the PR number
+pr_number = 2
+model = SentenceTransformer(
+    &#34;{repo_id}&#34;,
+    revision=f&#34;refs/pr/{{pr_number}}&#34;,
+    backend=&#34;{self.get_backend()}&#34;,
+)
+
+# Verify that everything works as expected
+embeddings = model.encode([&#34;The weather is lovely today.&#34;, &#34;It&#39;s so sunny outside!&#34;, &#34;He drove to the stadium.&#34;])
+print(embeddings.shape)
+
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+```
+&#34;&#34;&#34;
+
+        if local_model_path:
+            folder_url = api.upload_folder(
+                repo_id=repo_id,
+                folder_path=local_model_path,
+                commit_message=commit_message,
+                commit_description=commit_description,
+                revision=revision,
+                create_pr=create_pr,
+            )
+        else:
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                create_model_card = replace_model_card or not os.path.exists(os.path.join(tmp_dir, &#34;README.md&#34;))
+                self.save_pretrained(
+                    tmp_dir,
+                    model_name=repo_url.repo_id,
+                    create_model_card=create_model_card,
+                    train_datasets=train_datasets,
+                    safe_serialization=safe_serialization,
+                )
+                folder_url = api.upload_folder(
+                    repo_id=repo_id,
+                    folder_path=tmp_dir,
+                    commit_message=commit_message,
+                    commit_description=commit_description,
+                    revision=revision,
+                    create_pr=create_pr,
+                )
+
+        if create_pr:
+            return folder_url.pr_url
+        return folder_url.commit_url</code></pre>
+</details>
+<div class="desc"><p>Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>repo_id</code></strong> :&ensp;<code>str</code></dt>
+<dd>Repository name for your model in the Hub, including the user or organization.</dd>
+<dt><strong><code>token</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>An authentication token (See <a href="https://huggingface.co/settings/token">https://huggingface.co/settings/token</a>)</dd>
+<dt><strong><code>private</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Set to true, for hosting a private model</dd>
+<dt><strong><code>safe_serialization</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If true, save the model using safetensors. If false, save the model the traditional PyTorch way</dd>
+<dt><strong><code>commit_message</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Message to commit while pushing.</dd>
+<dt><strong><code>local_model_path</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded</dd>
+<dt><strong><code>exist_ok</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If true, saving to an existing repository is OK. If false, saving only to a new repository is possible</dd>
+<dt><strong><code>replace_model_card</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If true, replace an existing model card in the hub with the automatically created model card</dd>
+<dt><strong><code>train_datasets</code></strong> :&ensp;<code>List[str]</code>, optional</dt>
+<dd>Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.</dd>
+<dt><strong><code>revision</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Branch to push the uploaded files to</dd>
+<dt><strong><code>create_pr</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If True, create a pull request instead of pushing directly to the main branch</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>str</code></dt>
+<dd>The url of the commit of your model in the repository on the Hugging Face Hub.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.save"><code class="name flex">
+<span>def <span class="ident">save</span></span>(<span>self,<br>path: str,<br>model_name: str | None = None,<br>create_model_card: bool = True,<br>train_datasets: list[str] | None = None,<br>safe_serialization: bool = True) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def save(
+    self,
+    path: str,
+    model_name: str | None = None,
+    create_model_card: bool = True,
+    train_datasets: list[str] | None = None,
+    safe_serialization: bool = True,
+) -&gt; None:
+    &#34;&#34;&#34;
+    Saves a model and its configuration files to a directory, so that it can be loaded
+    with ``SentenceTransformer(path)`` again.
+
+    Args:
+        path (str): Path on disc where the model will be saved.
+        model_name (str, optional): Optional model name.
+        create_model_card (bool, optional): If True, create a README.md with basic information about this model.
+        train_datasets (List[str], optional): Optional list with the names of the datasets used to train the model.
+        safe_serialization (bool, optional): If True, save the model using safetensors. If False, save the model
+            the traditional (but unsafe) PyTorch way.
+    &#34;&#34;&#34;
+    if path is None:
+        return
+
+    os.makedirs(path, exist_ok=True)
+
+    logger.info(f&#34;Save model to {path}&#34;)
+    modules_config = []
+
+    # Save some model info
+    self._model_config[&#34;__version__&#34;] = {
+        &#34;sentence_transformers&#34;: __version__,
+        &#34;transformers&#34;: transformers.__version__,
+        &#34;pytorch&#34;: torch.__version__,
+    }
+
+    with open(os.path.join(path, &#34;config_sentence_transformers.json&#34;), &#34;w&#34;) as fOut:
+        config = self._model_config.copy()
+        config[&#34;prompts&#34;] = self.prompts
+        config[&#34;default_prompt_name&#34;] = self.default_prompt_name
+        config[&#34;similarity_fn_name&#34;] = self.similarity_fn_name
+        json.dump(config, fOut, indent=2)
+
+    # Save modules
+    for idx, name in enumerate(self._modules):
+        module = self._modules[name]
+        if idx == 0 and hasattr(module, &#34;save_in_root&#34;):  # Save first module in the main folder
+            model_path = path + &#34;/&#34;
+        else:
+            model_path = os.path.join(path, str(idx) + &#34;_&#34; + type(module).__name__)
+
+        os.makedirs(model_path, exist_ok=True)
+        # Try to save with safetensors, but fall back to the traditional PyTorch way if the module doesn&#39;t support it
+        try:
+            module.save(model_path, safe_serialization=safe_serialization)
+        except TypeError:
+            module.save(model_path)
+
+        # &#34;module&#34; only works for Sentence Transformers as the modules have the same names as the classes
+        class_ref = type(module).__module__
+        # For remote modules, we want to remove &#34;transformers_modules.{repo_name}&#34;:
+        if class_ref.startswith(&#34;transformers_modules.&#34;):
+            class_file = sys.modules[class_ref].__file__
+
+            # Save the custom module file
+            dest_file = Path(model_path) / (Path(class_file).name)
+            shutil.copy(class_file, dest_file)
+
+            # Save all files importeed in the custom module file
+            for needed_file in get_relative_import_files(class_file):
+                dest_file = Path(model_path) / (Path(needed_file).name)
+                shutil.copy(needed_file, dest_file)
+
+            # For remote modules, we want to ignore the &#34;transformers_modules.{repo_id}&#34; part,
+            # i.e. we only want the filename
+            class_ref = f&#34;{class_ref.split(&#39;.&#39;)[-1]}.{type(module).__name__}&#34;
+        # For other cases, we want to add the class name:
+        elif not class_ref.startswith(&#34;sentence_transformers.&#34;):
+            class_ref = f&#34;{class_ref}.{type(module).__name__}&#34;
+        modules_config.append({&#34;idx&#34;: idx, &#34;name&#34;: name, &#34;path&#34;: os.path.basename(model_path), &#34;type&#34;: class_ref})
+
+    with open(os.path.join(path, &#34;modules.json&#34;), &#34;w&#34;) as fOut:
+        json.dump(modules_config, fOut, indent=2)
+
+    # Create model card
+    if create_model_card:
+        self._create_model_card(path, model_name, train_datasets)</code></pre>
+</details>
+<div class="desc"><p>Saves a model and its configuration files to a directory, so that it can be loaded
+with <code><a title="lang_main.types.SentenceTransformer" href="#lang_main.types.SentenceTransformer">SentenceTransformer</a>(path)</code> again.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>path</code></strong> :&ensp;<code>str</code></dt>
+<dd>Path on disc where the model will be saved.</dd>
+<dt><strong><code>model_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Optional model name.</dd>
+<dt><strong><code>create_model_card</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If True, create a README.md with basic information about this model.</dd>
+<dt><strong><code>train_datasets</code></strong> :&ensp;<code>List[str]</code>, optional</dt>
+<dd>Optional list with the names of the datasets used to train the model.</dd>
+<dt><strong><code>safe_serialization</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If True, save the model using safetensors. If False, save the model
+the traditional (but unsafe) PyTorch way.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.save_pretrained"><code class="name flex">
+<span>def <span class="ident">save_pretrained</span></span>(<span>self,<br>path: str,<br>model_name: str | None = None,<br>create_model_card: bool = True,<br>train_datasets: list[str] | None = None,<br>safe_serialization: bool = True) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def save_pretrained(
+    self,
+    path: str,
+    model_name: str | None = None,
+    create_model_card: bool = True,
+    train_datasets: list[str] | None = None,
+    safe_serialization: bool = True,
+) -&gt; None:
+    &#34;&#34;&#34;
+    Saves a model and its configuration files to a directory, so that it can be loaded
+    with ``SentenceTransformer(path)`` again.
+
+    Args:
+        path (str): Path on disc where the model will be saved.
+        model_name (str, optional): Optional model name.
+        create_model_card (bool, optional): If True, create a README.md with basic information about this model.
+        train_datasets (List[str], optional): Optional list with the names of the datasets used to train the model.
+        safe_serialization (bool, optional): If True, save the model using safetensors. If False, save the model
+            the traditional (but unsafe) PyTorch way.
+    &#34;&#34;&#34;
+    self.save(
+        path,
+        model_name=model_name,
+        create_model_card=create_model_card,
+        train_datasets=train_datasets,
+        safe_serialization=safe_serialization,
+    )</code></pre>
+</details>
+<div class="desc"><p>Saves a model and its configuration files to a directory, so that it can be loaded
+with <code><a title="lang_main.types.SentenceTransformer" href="#lang_main.types.SentenceTransformer">SentenceTransformer</a>(path)</code> again.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>path</code></strong> :&ensp;<code>str</code></dt>
+<dd>Path on disc where the model will be saved.</dd>
+<dt><strong><code>model_name</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Optional model name.</dd>
+<dt><strong><code>create_model_card</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If True, create a README.md with basic information about this model.</dd>
+<dt><strong><code>train_datasets</code></strong> :&ensp;<code>List[str]</code>, optional</dt>
+<dd>Optional list with the names of the datasets used to train the model.</dd>
+<dt><strong><code>safe_serialization</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If True, save the model using safetensors. If False, save the model
+the traditional (but unsafe) PyTorch way.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.save_to_hub"><code class="name flex">
+<span>def <span class="ident">save_to_hub</span></span>(<span>self,<br>repo_id: str,<br>organization: str | None = None,<br>token: str | None = None,<br>private: bool | None = None,<br>safe_serialization: bool = True,<br>commit_message: str = 'Add new SentenceTransformer model.',<br>local_model_path: str | None = None,<br>exist_ok: bool = False,<br>replace_model_card: bool = False,<br>train_datasets: list[str] | None = None) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@save_to_hub_args_decorator
+def save_to_hub(
+    self,
+    repo_id: str,
+    organization: str | None = None,
+    token: str | None = None,
+    private: bool | None = None,
+    safe_serialization: bool = True,
+    commit_message: str = &#34;Add new SentenceTransformer model.&#34;,
+    local_model_path: str | None = None,
+    exist_ok: bool = False,
+    replace_model_card: bool = False,
+    train_datasets: list[str] | None = None,
+) -&gt; str:
+    &#34;&#34;&#34;
+    DEPRECATED, use `push_to_hub` instead.
+
+    Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.
+
+    Args:
+        repo_id (str): Repository name for your model in the Hub, including the user or organization.
+        token (str, optional): An authentication token (See https://huggingface.co/settings/token)
+        private (bool, optional): Set to true, for hosting a private model
+        safe_serialization (bool, optional): If true, save the model using safetensors. If false, save the model the traditional PyTorch way
+        commit_message (str, optional): Message to commit while pushing.
+        local_model_path (str, optional): Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded
+        exist_ok (bool, optional): If true, saving to an existing repository is OK. If false, saving only to a new repository is possible
+        replace_model_card (bool, optional): If true, replace an existing model card in the hub with the automatically created model card
+        train_datasets (List[str], optional): Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.
+
+    Returns:
+        str: The url of the commit of your model in the repository on the Hugging Face Hub.
+    &#34;&#34;&#34;
+    logger.warning(
+        &#34;The `save_to_hub` method is deprecated and will be removed in a future version of SentenceTransformers.&#34;
+        &#34; Please use `push_to_hub` instead for future model uploads.&#34;
+    )
+
+    if organization:
+        if &#34;/&#34; not in repo_id:
+            logger.warning(
+                f&#39;Providing an `organization` to `save_to_hub` is deprecated, please use `repo_id=&#34;{organization}/{repo_id}&#34;` instead.&#39;
+            )
+            repo_id = f&#34;{organization}/{repo_id}&#34;
+        elif repo_id.split(&#34;/&#34;)[0] != organization:
+            raise ValueError(
+                &#34;Providing an `organization` to `save_to_hub` is deprecated, please only use `repo_id`.&#34;
+            )
+        else:
+            logger.warning(
+                f&#39;Providing an `organization` to `save_to_hub` is deprecated, please only use `repo_id=&#34;{repo_id}&#34;` instead.&#39;
+            )
+
+    return self.push_to_hub(
+        repo_id=repo_id,
+        token=token,
+        private=private,
+        safe_serialization=safe_serialization,
+        commit_message=commit_message,
+        local_model_path=local_model_path,
+        exist_ok=exist_ok,
+        replace_model_card=replace_model_card,
+        train_datasets=train_datasets,
+    )</code></pre>
+</details>
+<div class="desc"><p>DEPRECATED, use <code>push_to_hub</code> instead.</p>
+<p>Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>repo_id</code></strong> :&ensp;<code>str</code></dt>
+<dd>Repository name for your model in the Hub, including the user or organization.</dd>
+<dt><strong><code>token</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>An authentication token (See <a href="https://huggingface.co/settings/token">https://huggingface.co/settings/token</a>)</dd>
+<dt><strong><code>private</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>Set to true, for hosting a private model</dd>
+<dt><strong><code>safe_serialization</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If true, save the model using safetensors. If false, save the model the traditional PyTorch way</dd>
+<dt><strong><code>commit_message</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Message to commit while pushing.</dd>
+<dt><strong><code>local_model_path</code></strong> :&ensp;<code>str</code>, optional</dt>
+<dd>Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded</dd>
+<dt><strong><code>exist_ok</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If true, saving to an existing repository is OK. If false, saving only to a new repository is possible</dd>
+<dt><strong><code>replace_model_card</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If true, replace an existing model card in the hub with the automatically created model card</dd>
+<dt><strong><code>train_datasets</code></strong> :&ensp;<code>List[str]</code>, optional</dt>
+<dd>Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>str</code></dt>
+<dd>The url of the commit of your model in the repository on the Hugging Face Hub.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.set_pooling_include_prompt"><code class="name flex">
+<span>def <span class="ident">set_pooling_include_prompt</span></span>(<span>self, include_prompt: bool) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def set_pooling_include_prompt(self, include_prompt: bool) -&gt; None:
+    &#34;&#34;&#34;
+    Sets the `include_prompt` attribute in the pooling layer in the model, if there is one.
+
+    This is useful for INSTRUCTOR models, as the prompt should be excluded from the pooling strategy
+    for these models.
+
+    Args:
+        include_prompt (bool): Whether to include the prompt in the pooling layer.
+
+    Returns:
+        None
+    &#34;&#34;&#34;
+    for module in self:
+        if isinstance(module, Pooling):
+            module.include_prompt = include_prompt
+            break</code></pre>
+</details>
+<div class="desc"><p>Sets the <code>include_prompt</code> attribute in the pooling layer in the model, if there is one.</p>
+<p>This is useful for INSTRUCTOR models, as the prompt should be excluded from the pooling strategy
+for these models.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>include_prompt</code></strong> :&ensp;<code>bool</code></dt>
+<dd>Whether to include the prompt in the pooling layer.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<p>None</p></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.start_multi_process_pool"><code class="name flex">
+<span>def <span class="ident">start_multi_process_pool</span></span>(<span>self, target_devices: list[str] = None) ‑> dict[typing.Literal['input', 'output', 'processes'], typing.Any]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def start_multi_process_pool(
+    self, target_devices: list[str] = None
+) -&gt; dict[Literal[&#34;input&#34;, &#34;output&#34;, &#34;processes&#34;], Any]:
+    &#34;&#34;&#34;
+    Starts a multi-process pool to process the encoding with several independent processes
+    via :meth:`SentenceTransformer.encode_multi_process &lt;sentence_transformers.SentenceTransformer.encode_multi_process&gt;`.
+
+    This method is recommended if you want to encode on multiple GPUs or CPUs. It is advised
+    to start only one process per GPU. This method works together with encode_multi_process
+    and stop_multi_process_pool.
+
+    Args:
+        target_devices (List[str], optional): PyTorch target devices, e.g. [&#34;cuda:0&#34;, &#34;cuda:1&#34;, ...],
+            [&#34;npu:0&#34;, &#34;npu:1&#34;, ...], or [&#34;cpu&#34;, &#34;cpu&#34;, &#34;cpu&#34;, &#34;cpu&#34;]. If target_devices is None and CUDA/NPU
+            is available, then all available CUDA/NPU devices will be used. If target_devices is None and
+            CUDA/NPU is not available, then 4 CPU devices will be used.
+
+    Returns:
+        Dict[str, Any]: A dictionary with the target processes, an input queue, and an output queue.
+    &#34;&#34;&#34;
+    if target_devices is None:
+        if torch.cuda.is_available():
+            target_devices = [f&#34;cuda:{i}&#34; for i in range(torch.cuda.device_count())]
+        elif is_torch_npu_available():
+            target_devices = [f&#34;npu:{i}&#34; for i in range(torch.npu.device_count())]
+        else:
+            logger.info(&#34;CUDA/NPU is not available. Starting 4 CPU workers&#34;)
+            target_devices = [&#34;cpu&#34;] * 4
+
+    logger.info(&#34;Start multi-process pool on devices: {}&#34;.format(&#34;, &#34;.join(map(str, target_devices))))
+
+    self.to(&#34;cpu&#34;)
+    self.share_memory()
+    ctx = mp.get_context(&#34;spawn&#34;)
+    input_queue = ctx.Queue()
+    output_queue = ctx.Queue()
+    processes = []
+
+    for device_id in target_devices:
+        p = ctx.Process(
+            target=SentenceTransformer._encode_multi_process_worker,
+            args=(device_id, self, input_queue, output_queue),
+            daemon=True,
+        )
+        p.start()
+        processes.append(p)
+
+    return {&#34;input&#34;: input_queue, &#34;output&#34;: output_queue, &#34;processes&#34;: processes}</code></pre>
+</details>
+<div class="desc"><p>Starts a multi-process pool to process the encoding with several independent processes
+via :meth:<code>SentenceTransformer.encode_multi_process &lt;sentence_transformers.SentenceTransformer.encode_multi_process&gt;</code>.</p>
+<p>This method is recommended if you want to encode on multiple GPUs or CPUs. It is advised
+to start only one process per GPU. This method works together with encode_multi_process
+and stop_multi_process_pool.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>target_devices</code></strong> :&ensp;<code>List[str]</code>, optional</dt>
+<dd>PyTorch target devices, e.g. ["cuda:0", "cuda:1", &hellip;],
+["npu:0", "npu:1", &hellip;], or ["cpu", "cpu", "cpu", "cpu"]. If target_devices is None and CUDA/NPU
+is available, then all available CUDA/NPU devices will be used. If target_devices is None and
+CUDA/NPU is not available, then 4 CPU devices will be used.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Dict[str, Any]</code></dt>
+<dd>A dictionary with the target processes, an input queue, and an output queue.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.tokenize"><code class="name flex">
+<span>def <span class="ident">tokenize</span></span>(<span>self, texts: list[str] | list[dict] | list[tuple[str, str]]) ‑> dict[str, torch.Tensor]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def tokenize(self, texts: list[str] | list[dict] | list[tuple[str, str]]) -&gt; dict[str, Tensor]:
+    &#34;&#34;&#34;
+    Tokenizes the texts.
+
+    Args:
+        texts (Union[List[str], List[Dict], List[Tuple[str, str]]]): A list of texts to be tokenized.
+
+    Returns:
+        Dict[str, Tensor]: A dictionary of tensors with the tokenized texts. Common keys are &#34;input_ids&#34;,
+            &#34;attention_mask&#34;, and &#34;token_type_ids&#34;.
+    &#34;&#34;&#34;
+    return self._first_module().tokenize(texts)</code></pre>
+</details>
+<div class="desc"><p>Tokenizes the texts.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>texts</code></strong> :&ensp;<code>Union[List[str], List[Dict], List[Tuple[str, str]]]</code></dt>
+<dd>A list of texts to be tokenized.</dd>
+</dl>
+<h2 id="returns">Returns</h2>
+<dl>
+<dt><code>Dict[str, <a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>]</code></dt>
+<dd>A dictionary of tensors with the tokenized texts. Common keys are "input_ids",
+"attention_mask", and "token_type_ids".</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.SentenceTransformer.truncate_sentence_embeddings"><code class="name flex">
+<span>def <span class="ident">truncate_sentence_embeddings</span></span>(<span>self, truncate_dim: int | None) ‑> Iterator[None]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@contextmanager
+def truncate_sentence_embeddings(self, truncate_dim: int | None) -&gt; Iterator[None]:
+    &#34;&#34;&#34;
+    In this context, :meth:`SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;` outputs
+    sentence embeddings truncated at dimension ``truncate_dim``.
+
+    This may be useful when you are using the same model for different applications where different dimensions
+    are needed.
+
+    Args:
+        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. ``None`` does no truncation.
+
+    Example:
+        ::
+
+            from sentence_transformers import SentenceTransformer
+
+            model = SentenceTransformer(&#34;all-mpnet-base-v2&#34;)
+
+            with model.truncate_sentence_embeddings(truncate_dim=16):
+                embeddings_truncated = model.encode([&#34;hello there&#34;, &#34;hiya&#34;])
+            assert embeddings_truncated.shape[-1] == 16
+    &#34;&#34;&#34;
+    original_output_dim = self.truncate_dim
+    try:
+        self.truncate_dim = truncate_dim
+        yield
+    finally:
+        self.truncate_dim = original_output_dim</code></pre>
+</details>
+<div class="desc"><p>In this context, :meth:<code>SentenceTransformer.encode &lt;sentence_transformers.SentenceTransformer.encode&gt;</code> outputs
+sentence embeddings truncated at dimension <code>truncate_dim</code>.</p>
+<p>This may be useful when you are using the same model for different applications where different dimensions
+are needed.</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>truncate_dim</code></strong> :&ensp;<code>int</code>, optional</dt>
+<dd>The dimension to truncate sentence embeddings to. <code>None</code> does no truncation.</dd>
+</dl>
+<h2 id="example">Example</h2>
+<p>::</p>
+<pre><code>from sentence_transformers import SentenceTransformer
+
+model = SentenceTransformer("all-mpnet-base-v2")
+
+with model.truncate_sentence_embeddings(truncate_dim=16):
+    embeddings_truncated = model.encode(["hello there", "hiya"])
+assert embeddings_truncated.shape[-1] == 16
+</code></pre></div>
+</dd>
+</dl>
+</dd>
+<dt id="lang_main.types.Tensor"><code class="flex name class">
+<span>class <span class="ident">Tensor</span></span>
+<span>(</span><span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>torch._C.TensorBase</li>
+</ul>
+<h3>Subclasses</h3>
+<ul class="hlist">
+<li>torch._subclasses.fake_tensor.FakeTensor</li>
+<li>torch._subclasses.functional_tensor.FunctionalTensor</li>
+<li>torch.masked.maskedtensor.core.MaskedTensor</li>
+<li>torch.nn.parameter.Buffer</li>
+<li>torch.nn.parameter.Parameter</li>
+<li>torch.nn.parameter.UninitializedBuffer</li>
+<li>torch.sparse.semi_structured.SparseSemiStructuredTensor</li>
+<li>torch.testing._internal.logging_tensor.LoggingTensor</li>
+</ul>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.types.Tensor.align_to"><code class="name flex">
+<span>def <span class="ident">align_to</span></span>(<span>self, *names)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def align_to(self, *names):
+    r&#34;&#34;&#34;Permutes the dimensions of the :attr:`self` tensor to match the order
+    specified in :attr:`names`, adding size-one dims for any new names.
+
+    All of the dims of :attr:`self` must be named in order to use this method.
+    The resulting tensor is a view on the original tensor.
+
+    All dimension names of :attr:`self` must be present in :attr:`names`.
+    :attr:`names` may contain additional names that are not in ``self.names``;
+    the output tensor has a size-one dimension for each of those new names.
+
+    :attr:`names` may contain up to one Ellipsis (``...``).
+    The Ellipsis is expanded to be equal to all dimension names of :attr:`self`
+    that are not mentioned in :attr:`names`, in the order that they appear
+    in :attr:`self`.
+
+    Python 2 does not support Ellipsis but one may use a string literal
+    instead (``&#39;...&#39;``).
+
+    Args:
+        names (iterable of str): The desired dimension ordering of the
+            output tensor. May contain up to one Ellipsis that is expanded
+            to all unmentioned dim names of :attr:`self`.
+
+    Examples::
+
+        &gt;&gt;&gt; tensor = torch.randn(2, 2, 2, 2, 2, 2)
+        &gt;&gt;&gt; named_tensor = tensor.refine_names(&#39;A&#39;, &#39;B&#39;, &#39;C&#39;, &#39;D&#39;, &#39;E&#39;, &#39;F&#39;)
+
+        # Move the F and E dims to the front while keeping the rest in order
+        &gt;&gt;&gt; named_tensor.align_to(&#39;F&#39;, &#39;E&#39;, ...)
+
+    .. warning::
+        The named tensor API is experimental and subject to change.
+
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.align_to, (self,), self, *names)
+    ellipsis_idx = single_ellipsis_index(names, &#34;align_to&#34;)
+    if ellipsis_idx is None:
+        return super().align_to(names)
+    return super().align_to(
+        [name for name in names if not is_ellipsis(name)], ellipsis_idx
+    )</code></pre>
+</details>
+<div class="desc"><p>Permutes the dimensions of the :attr:<code>self</code> tensor to match the order
+specified in :attr:<code>names</code>, adding size-one dims for any new names.</p>
+<p>All of the dims of :attr:<code>self</code> must be named in order to use this method.
+The resulting tensor is a view on the original tensor.</p>
+<p>All dimension names of :attr:<code>self</code> must be present in :attr:<code>names</code>.
+:attr:<code>names</code> may contain additional names that are not in <code>self.names</code>;
+the output tensor has a size-one dimension for each of those new names.</p>
+<p>:attr:<code>names</code> may contain up to one Ellipsis (<code>&hellip;</code>).
+The Ellipsis is expanded to be equal to all dimension names of :attr:<code>self</code>
+that are not mentioned in :attr:<code>names</code>, in the order that they appear
+in :attr:<code>self</code>.</p>
+<p>Python 2 does not support Ellipsis but one may use a string literal
+instead (<code>'...'</code>).</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>names</code></strong> :&ensp;<code>iterable</code> of <code>str</code></dt>
+<dd>The desired dimension ordering of the
+output tensor. May contain up to one Ellipsis that is expanded
+to all unmentioned dim names of :attr:<code>self</code>.</dd>
+</dl>
+<p>Examples::</p>
+<pre><code>&gt;&gt;&gt; tensor = torch.randn(2, 2, 2, 2, 2, 2)
+&gt;&gt;&gt; named_tensor = tensor.refine_names('A', 'B', 'C', 'D', 'E', 'F')
+
+# Move the F and E dims to the front while keeping the rest in order
+&gt;&gt;&gt; named_tensor.align_to('F', 'E', ...)
+</code></pre>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>The named tensor API is experimental and subject to change.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.backward"><code class="name flex">
+<span>def <span class="ident">backward</span></span>(<span>self, gradient=None, retain_graph=None, create_graph=False, inputs=None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def backward(
+    self, gradient=None, retain_graph=None, create_graph=False, inputs=None
+):
+    r&#34;&#34;&#34;Computes the gradient of current tensor wrt graph leaves.
+
+    The graph is differentiated using the chain rule. If the tensor is
+    non-scalar (i.e. its data has more than one element) and requires
+    gradient, the function additionally requires specifying a ``gradient``.
+    It should be a tensor of matching type and shape, that represents
+    the gradient of the differentiated function w.r.t. ``self``.
+
+    This function accumulates gradients in the leaves - you might need to zero
+    ``.grad`` attributes or set them to ``None`` before calling it.
+    See :ref:`Default gradient layouts&lt;default-grad-layouts&gt;`
+    for details on the memory layout of accumulated gradients.
+
+    .. note::
+
+        If you run any forward ops, create ``gradient``, and/or call ``backward``
+        in a user-specified CUDA stream context, see
+        :ref:`Stream semantics of backward passes&lt;bwd-cuda-stream-semantics&gt;`.
+
+    .. note::
+
+        When ``inputs`` are provided and a given input is not a leaf,
+        the current implementation will call its grad_fn (though it is not strictly needed to get this gradients).
+        It is an implementation detail on which the user should not rely.
+        See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
+
+    Args:
+        gradient (Tensor, optional): The gradient of the function
+            being differentiated w.r.t. ``self``.
+            This argument can be omitted if ``self`` is a scalar.
+        retain_graph (bool, optional): If ``False``, the graph used to compute
+            the grads will be freed. Note that in nearly all cases setting
+            this option to True is not needed and often can be worked around
+            in a much more efficient way. Defaults to the value of
+            ``create_graph``.
+        create_graph (bool, optional): If ``True``, graph of the derivative will
+            be constructed, allowing to compute higher order derivative
+            products. Defaults to ``False``.
+        inputs (sequence of Tensor, optional): Inputs w.r.t. which the gradient will be
+            accumulated into ``.grad``. All other tensors will be ignored. If not
+            provided, the gradient is accumulated into all the leaf Tensors that were
+            used to compute the :attr:`tensors`.
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.backward,
+            (self,),
+            self,
+            gradient=gradient,
+            retain_graph=retain_graph,
+            create_graph=create_graph,
+            inputs=inputs,
+        )
+    torch.autograd.backward(
+        self, gradient, retain_graph, create_graph, inputs=inputs
+    )</code></pre>
+</details>
+<div class="desc"><p>Computes the gradient of current tensor wrt graph leaves.</p>
+<p>The graph is differentiated using the chain rule. If the tensor is
+non-scalar (i.e. its data has more than one element) and requires
+gradient, the function additionally requires specifying a <code>gradient</code>.
+It should be a tensor of matching type and shape, that represents
+the gradient of the differentiated function w.r.t. <code>self</code>.</p>
+<p>This function accumulates gradients in the leaves - you might need to zero
+<code>.grad</code> attributes or set them to <code>None</code> before calling it.
+See :ref:<code>Default gradient layouts&lt;default-grad-layouts&gt;</code>
+for details on the memory layout of accumulated gradients.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>If you run any forward ops, create <code>gradient</code>, and/or call <code>backward</code>
+in a user-specified CUDA stream context, see
+:ref:<code>Stream semantics of backward passes&lt;bwd-cuda-stream-semantics&gt;</code>.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>When <code>inputs</code> are provided and a given input is not a leaf,
+the current implementation will call its grad_fn (though it is not strictly needed to get this gradients).
+It is an implementation detail on which the user should not rely.
+See <a href="https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780">https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780</a> for more details.</p>
+</div>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>gradient</code></strong> :&ensp;<code><a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a></code>, optional</dt>
+<dd>The gradient of the function
+being differentiated w.r.t. <code>self</code>.
+This argument can be omitted if <code>self</code> is a scalar.</dd>
+<dt><strong><code>retain_graph</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If <code>False</code>, the graph used to compute
+the grads will be freed. Note that in nearly all cases setting
+this option to True is not needed and often can be worked around
+in a much more efficient way. Defaults to the value of
+<code>create_graph</code>.</dd>
+<dt><strong><code>create_graph</code></strong> :&ensp;<code>bool</code>, optional</dt>
+<dd>If <code>True</code>, graph of the derivative will
+be constructed, allowing to compute higher order derivative
+products. Defaults to <code>False</code>.</dd>
+<dt><strong><code>inputs</code></strong> :&ensp;<code>sequence</code> of <code><a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a></code>, optional</dt>
+<dd>Inputs w.r.t. which the gradient will be
+accumulated into <code>.grad</code>. All other tensors will be ignored. If not
+provided, the gradient is accumulated into all the leaf Tensors that were
+used to compute the :attr:<code>tensors</code>.</dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.Tensor.detach"><code class="name flex">
+<span>def <span class="ident">detach</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Returns a new Tensor, detached from the current graph.</p>
+<p>The result will never require gradient.</p>
+<p>This method also affects forward mode AD gradients and the result will never
+have forward mode AD gradients.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Returned Tensor shares the same storage with the original one.
+In-place modifications on either of them will be seen, and may trigger
+errors in correctness checks.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.detach_"><code class="name flex">
+<span>def <span class="ident">detach_</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Detaches the Tensor from the graph that created it, making it a leaf.
+Views cannot be detached in-place.</p>
+<p>This method also affects forward mode AD gradients and the result will never
+have forward mode AD gradients.</p></div>
+</dd>
+<dt id="lang_main.types.Tensor.dim_order"><code class="name flex">
+<span>def <span class="ident">dim_order</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def dim_order(self):
+    &#34;&#34;&#34;
+
+    dim_order() -&gt; tuple
+
+    Returns a tuple of int describing the dim order or physical layout of :attr:`self`.
+
+    Args:
+        None
+
+    Dim order represents how dimensions are laid out in memory,
+    starting from the outermost to the innermost dimension.
+
+    Example::
+        &gt;&gt;&gt; torch.empty((2, 3, 5, 7)).dim_order()
+        (0, 1, 2, 3)
+        &gt;&gt;&gt; torch.empty((2, 3, 5, 7), memory_format=torch.channels_last).dim_order()
+        (0, 2, 3, 1)
+
+    .. warning::
+        The dim_order tensor API is experimental and subject to change.
+
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.dim_order, (self,), self)
+
+    import torch._prims_common as utils
+
+    return tuple(utils.compute_elementwise_output_logical_to_physical_perm(self))</code></pre>
+</details>
+<div class="desc"><p>dim_order() -&gt; tuple</p>
+<p>Returns a tuple of int describing the dim order or physical layout of :attr:<code>self</code>.</p>
+<h2 id="args">Args</h2>
+<p>None
+Dim order represents how dimensions are laid out in memory,
+starting from the outermost to the innermost dimension.</p>
+<p>Example::
+&gt;&gt;&gt; torch.empty((2, 3, 5, 7)).dim_order()
+(0, 1, 2, 3)
+&gt;&gt;&gt; torch.empty((2, 3, 5, 7), memory_format=torch.channels_last).dim_order()
+(0, 2, 3, 1)</p>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>The dim_order tensor API is experimental and subject to change.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.eig"><code class="name flex">
+<span>def <span class="ident">eig</span></span>(<span>self, eigenvectors=False)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def eig(self, eigenvectors=False):
+    from torch._linalg_utils import eig
+
+    return eig(self, eigenvectors=eigenvectors)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.is_shared"><code class="name flex">
+<span>def <span class="ident">is_shared</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def is_shared(self):
+    r&#34;&#34;&#34;Checks if tensor is in shared memory.
+
+    This is always ``True`` for CUDA tensors.
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.is_shared, (self,), self)
+    return self._typed_storage()._is_shared()</code></pre>
+</details>
+<div class="desc"><p>Checks if tensor is in shared memory.</p>
+<p>This is always <code>True</code> for CUDA tensors.</p></div>
+</dd>
+<dt id="lang_main.types.Tensor.istft"><code class="name flex">
+<span>def <span class="ident">istft</span></span>(<span>self,<br>n_fft: int,<br>hop_length: int | None = None,<br>win_length: int | None = None,<br>window: Optional[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>] = None,<br>center: bool = True,<br>normalized: bool = False,<br>onesided: bool | None = None,<br>length: int | None = None,<br>return_complex: bool = False)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def istft(
+    self,
+    n_fft: int,
+    hop_length: Optional[int] = None,
+    win_length: Optional[int] = None,
+    window: &#34;Optional[Tensor]&#34; = None,
+    center: bool = True,
+    normalized: bool = False,
+    onesided: Optional[bool] = None,
+    length: Optional[int] = None,
+    return_complex: bool = False,
+):
+    r&#34;&#34;&#34;See :func:`torch.istft`&#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.istft,
+            (self,),
+            self,
+            n_fft,
+            hop_length=hop_length,
+            win_length=win_length,
+            window=window,
+            center=center,
+            normalized=normalized,
+            onesided=onesided,
+            length=length,
+            return_complex=return_complex,
+        )
+    return torch.istft(
+        self,
+        n_fft,
+        hop_length,
+        win_length,
+        window,
+        center,
+        normalized,
+        onesided,
+        length,
+        return_complex=return_complex,
+    )</code></pre>
+</details>
+<div class="desc"><p>See :func:<code>torch.istft</code></p></div>
+</dd>
+<dt id="lang_main.types.Tensor.lstsq"><code class="name flex">
+<span>def <span class="ident">lstsq</span></span>(<span>self, other)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def lstsq(self, other):
+    from torch._linalg_utils import lstsq
+
+    return lstsq(self, other)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.lu"><code class="name flex">
+<span>def <span class="ident">lu</span></span>(<span>self, pivot=True, get_infos=False)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def lu(self, pivot=True, get_infos=False):
+    r&#34;&#34;&#34;See :func:`torch.lu`&#34;&#34;&#34;
+    # If get_infos is True, then we don&#39;t need to check for errors and vice versa
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.lu, (self,), self, pivot=pivot, get_infos=get_infos
+        )
+
+    LU, pivots, infos = torch._lu_with_info(
+        self, pivot=pivot, check_errors=(not get_infos)
+    )
+    if get_infos:
+        return LU, pivots, infos
+    else:
+        return LU, pivots</code></pre>
+</details>
+<div class="desc"><p>See :func:<code>torch.lu</code></p></div>
+</dd>
+<dt id="lang_main.types.Tensor.module_load"><code class="name flex">
+<span>def <span class="ident">module_load</span></span>(<span>self, other, assign=False)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def module_load(self, other, assign=False):
+    r&#34;&#34;&#34;Defines how to transform ``other`` when loading it into ``self`` in :meth:`~nn.Module.load_state_dict`.
+
+    Used when :func:`~torch.__future__.get_swap_module_params_on_conversion` is ``True``.
+
+    It is expected that ``self`` is a parameter or buffer in an ``nn.Module`` and ``other`` is the
+    value in the state dictionary with the corresponding key, this method defines
+    how ``other`` is remapped before being swapped with ``self`` via
+    :func:`~torch.utils.swap_tensors` in :meth:`~nn.Module.load_state_dict`.
+
+    .. note::
+        This method should always return a new object that is not ``self`` or ``other``.
+        For example, the default implementation returns ``self.copy_(other).detach()``
+        if ``assign`` is ``False`` or ``other.detach()`` if ``assign`` is ``True``.
+
+    Args:
+        other (Tensor): value in state dict with key corresponding to ``self``
+        assign (bool): the assign argument passed to :meth:`nn.Module.load_state_dict`
+
+    &#34;&#34;&#34;
+    if has_torch_function_variadic(self, other):
+        return handle_torch_function(
+            Tensor.module_load, (self, other), self, other, assign=assign
+        )
+
+    if assign:
+        return other.detach()
+    else:
+        return self.copy_(other).detach()</code></pre>
+</details>
+<div class="desc"><p>Defines how to transform <code>other</code> when loading it into <code>self</code> in :meth:<code>~nn.Module.load_state_dict</code>.</p>
+<p>Used when :func:<code>~torch.__future__.get_swap_module_params_on_conversion</code> is <code>True</code>.</p>
+<p>It is expected that <code>self</code> is a parameter or buffer in an <code>nn.Module</code> and <code>other</code> is the
+value in the state dictionary with the corresponding key, this method defines
+how <code>other</code> is remapped before being swapped with <code>self</code> via
+:func:<code>~torch.utils.swap_tensors</code> in :meth:<code>~nn.Module.load_state_dict</code>.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>This method should always return a new object that is not <code>self</code> or <code>other</code>.
+For example, the default implementation returns <code>self.copy_(other).detach()</code>
+if <code>assign</code> is <code>False</code> or <code>other.detach()</code> if <code>assign</code> is <code>True</code>.</p>
+</div>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>other</code></strong> :&ensp;<code><a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a></code></dt>
+<dd>value in state dict with key corresponding to <code>self</code></dd>
+<dt><strong><code>assign</code></strong> :&ensp;<code>bool</code></dt>
+<dd>the assign argument passed to :meth:<code>nn.Module.load_state_dict</code></dd>
+</dl></div>
+</dd>
+<dt id="lang_main.types.Tensor.norm"><code class="name flex">
+<span>def <span class="ident">norm</span></span>(<span>self, p: float | str | None = 'fro', dim=None, keepdim=False, dtype=None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def norm(
+    self,
+    p: Optional[Union[float, str]] = &#34;fro&#34;,
+    dim=None,
+    keepdim=False,
+    dtype=None,
+):
+    r&#34;&#34;&#34;See :func:`torch.norm`&#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.norm, (self,), self, p=p, dim=dim, keepdim=keepdim, dtype=dtype
+        )
+    return torch.norm(self, p, dim, keepdim, dtype=dtype)</code></pre>
+</details>
+<div class="desc"><p>See :func:<code>torch.norm</code></p></div>
+</dd>
+<dt id="lang_main.types.Tensor.refine_names"><code class="name flex">
+<span>def <span class="ident">refine_names</span></span>(<span>self, *names)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def refine_names(self, *names):
+    r&#34;&#34;&#34;Refines the dimension names of :attr:`self` according to :attr:`names`.
+
+    Refining is a special case of renaming that &#34;lifts&#34; unnamed dimensions.
+    A ``None`` dim can be refined to have any name; a named dim can only be
+    refined to have the same name.
+
+    Because named tensors can coexist with unnamed tensors, refining names
+    gives a nice way to write named-tensor-aware code that works with both
+    named and unnamed tensors.
+
+    :attr:`names` may contain up to one Ellipsis (``...``).
+    The Ellipsis is expanded greedily; it is expanded in-place to fill
+    :attr:`names` to the same length as ``self.dim()`` using names from the
+    corresponding indices of ``self.names``.
+
+    Python 2 does not support Ellipsis but one may use a string literal
+    instead (``&#39;...&#39;``).
+
+    Args:
+        names (iterable of str): The desired names of the output tensor. May
+            contain up to one Ellipsis.
+
+    Examples::
+
+        &gt;&gt;&gt; imgs = torch.randn(32, 3, 128, 128)
+        &gt;&gt;&gt; named_imgs = imgs.refine_names(&#39;N&#39;, &#39;C&#39;, &#39;H&#39;, &#39;W&#39;)
+        &gt;&gt;&gt; named_imgs.names
+        (&#39;N&#39;, &#39;C&#39;, &#39;H&#39;, &#39;W&#39;)
+
+        &gt;&gt;&gt; tensor = torch.randn(2, 3, 5, 7, 11)
+        &gt;&gt;&gt; tensor = tensor.refine_names(&#39;A&#39;, ..., &#39;B&#39;, &#39;C&#39;)
+        &gt;&gt;&gt; tensor.names
+        (&#39;A&#39;, None, None, &#39;B&#39;, &#39;C&#39;)
+
+    .. warning::
+        The named tensor API is experimental and subject to change.
+
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.refine_names, (self,), self, *names)
+    names = resolve_ellipsis(names, self.names, &#34;refine_names&#34;)
+    return super().refine_names(names)</code></pre>
+</details>
+<div class="desc"><p>Refines the dimension names of :attr:<code>self</code> according to :attr:<code>names</code>.</p>
+<p>Refining is a special case of renaming that "lifts" unnamed dimensions.
+A <code>None</code> dim can be refined to have any name; a named dim can only be
+refined to have the same name.</p>
+<p>Because named tensors can coexist with unnamed tensors, refining names
+gives a nice way to write named-tensor-aware code that works with both
+named and unnamed tensors.</p>
+<p>:attr:<code>names</code> may contain up to one Ellipsis (<code>&hellip;</code>).
+The Ellipsis is expanded greedily; it is expanded in-place to fill
+:attr:<code>names</code> to the same length as <code>self.dim()</code> using names from the
+corresponding indices of <code>self.names</code>.</p>
+<p>Python 2 does not support Ellipsis but one may use a string literal
+instead (<code>'...'</code>).</p>
+<h2 id="args">Args</h2>
+<dl>
+<dt><strong><code>names</code></strong> :&ensp;<code>iterable</code> of <code>str</code></dt>
+<dd>The desired names of the output tensor. May
+contain up to one Ellipsis.</dd>
+</dl>
+<p>Examples::</p>
+<pre><code>&gt;&gt;&gt; imgs = torch.randn(32, 3, 128, 128)
+&gt;&gt;&gt; named_imgs = imgs.refine_names('N', 'C', 'H', 'W')
+&gt;&gt;&gt; named_imgs.names
+('N', 'C', 'H', 'W')
+
+&gt;&gt;&gt; tensor = torch.randn(2, 3, 5, 7, 11)
+&gt;&gt;&gt; tensor = tensor.refine_names('A', ..., 'B', 'C')
+&gt;&gt;&gt; tensor.names
+('A', None, None, 'B', 'C')
+</code></pre>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>The named tensor API is experimental and subject to change.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.register_hook"><code class="name flex">
+<span>def <span class="ident">register_hook</span></span>(<span>self, hook)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def register_hook(self, hook):
+    r&#34;&#34;&#34;Registers a backward hook.
+
+    The hook will be called every time a gradient with respect to the
+    Tensor is computed. The hook should have the following signature::
+
+        hook(grad) -&gt; Tensor or None
+
+
+    The hook should not modify its argument, but it can optionally return
+    a new gradient which will be used in place of :attr:`grad`.
+
+    This function returns a handle with a method ``handle.remove()``
+    that removes the hook from the module.
+
+    .. note::
+        See :ref:`backward-hooks-execution` for more information on how when this hook
+        is executed, and how its execution is ordered relative to other hooks.
+
+    Example::
+
+        &gt;&gt;&gt; v = torch.tensor([0., 0., 0.], requires_grad=True)
+        &gt;&gt;&gt; h = v.register_hook(lambda grad: grad * 2)  # double the gradient
+        &gt;&gt;&gt; v.backward(torch.tensor([1., 2., 3.]))
+        &gt;&gt;&gt; v.grad
+
+         2
+         4
+         6
+        [torch.FloatTensor of size (3,)]
+
+        &gt;&gt;&gt; h.remove()  # removes the hook
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.register_hook, (self,), self, hook)
+    if not self.requires_grad:
+        raise RuntimeError(
+            &#34;cannot register a hook on a tensor that doesn&#39;t require gradient&#34;
+        )
+    if self._backward_hooks is None:
+        self._backward_hooks = OrderedDict()
+        if self.grad_fn is not None:
+            self.grad_fn._register_hook_dict(self)
+
+    from torch.utils.hooks import RemovableHandle
+
+    handle = RemovableHandle(self._backward_hooks)
+    self._backward_hooks[handle.id] = hook
+    return handle</code></pre>
+</details>
+<div class="desc"><p>Registers a backward hook.</p>
+<p>The hook will be called every time a gradient with respect to the
+Tensor is computed. The hook should have the following signature::</p>
+<pre><code>hook(grad) -&gt; Tensor or None
+</code></pre>
+<p>The hook should not modify its argument, but it can optionally return
+a new gradient which will be used in place of :attr:<code>grad</code>.</p>
+<p>This function returns a handle with a method <code>handle.remove()</code>
+that removes the hook from the module.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>See :ref:<code>backward-hooks-execution</code> for more information on how when this hook
+is executed, and how its execution is ordered relative to other hooks.</p>
+</div>
+<p>Example::</p>
+<pre><code>&gt;&gt;&gt; v = torch.tensor([0., 0., 0.], requires_grad=True)
+&gt;&gt;&gt; h = v.register_hook(lambda grad: grad * 2)  # double the gradient
+&gt;&gt;&gt; v.backward(torch.tensor([1., 2., 3.]))
+&gt;&gt;&gt; v.grad
+
+ 2
+ 4
+ 6
+[torch.FloatTensor of size (3,)]
+
+&gt;&gt;&gt; h.remove()  # removes the hook
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Tensor.register_post_accumulate_grad_hook"><code class="name flex">
+<span>def <span class="ident">register_post_accumulate_grad_hook</span></span>(<span>self, hook)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def register_post_accumulate_grad_hook(self, hook):
+    r&#34;&#34;&#34;Registers a backward hook that runs after grad accumulation.
+
+    The hook will be called after all gradients for a tensor have been accumulated,
+    meaning that the .grad field has been updated on that tensor. The post
+    accumulate grad hook is ONLY applicable for leaf tensors (tensors without a
+    .grad_fn field). Registering this hook on a non-leaf tensor will error!
+
+    The hook should have the following signature::
+
+        hook(param: Tensor) -&gt; None
+
+    Note that, unlike other autograd hooks, this hook operates on the tensor
+    that requires grad and not the grad itself. The hook can in-place modify
+    and access its Tensor argument, including its .grad field.
+
+    This function returns a handle with a method ``handle.remove()``
+    that removes the hook from the module.
+
+    .. note::
+        See :ref:`backward-hooks-execution` for more information on how when this hook
+        is executed, and how its execution is ordered relative to other hooks. Since
+        this hook runs during the backward pass, it will run in no_grad mode (unless
+        create_graph is True). You can use torch.enable_grad() to re-enable autograd
+        within the hook if you need it.
+
+    Example::
+
+        &gt;&gt;&gt; v = torch.tensor([0., 0., 0.], requires_grad=True)
+        &gt;&gt;&gt; lr = 0.01
+        &gt;&gt;&gt; # simulate a simple SGD update
+        &gt;&gt;&gt; h = v.register_post_accumulate_grad_hook(lambda p: p.add_(p.grad, alpha=-lr))
+        &gt;&gt;&gt; v.backward(torch.tensor([1., 2., 3.]))
+        &gt;&gt;&gt; v
+        tensor([-0.0100, -0.0200, -0.0300], requires_grad=True)
+
+        &gt;&gt;&gt; h.remove()  # removes the hook
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.register_post_accumulate_grad_hook, (self,), self, hook
+        )
+    if not self.requires_grad:
+        raise RuntimeError(
+            &#34;cannot register a hook on a tensor that doesn&#39;t require gradient&#34;
+        )
+    if self.grad_fn is not None:
+        raise RuntimeError(
+            &#34;post accumulate grad hooks cannot be registered on non-leaf tensors&#34;
+        )
+    if self._post_accumulate_grad_hooks is None:
+        self._post_accumulate_grad_hooks: Dict[Any, Any] = OrderedDict()
+
+    from torch.utils.hooks import RemovableHandle
+
+    handle = RemovableHandle(self._post_accumulate_grad_hooks)
+    self._post_accumulate_grad_hooks[handle.id] = hook
+    return handle</code></pre>
+</details>
+<div class="desc"><p>Registers a backward hook that runs after grad accumulation.</p>
+<p>The hook will be called after all gradients for a tensor have been accumulated,
+meaning that the .grad field has been updated on that tensor. The post
+accumulate grad hook is ONLY applicable for leaf tensors (tensors without a
+.grad_fn field). Registering this hook on a non-leaf tensor will error!</p>
+<p>The hook should have the following signature::</p>
+<pre><code>hook(param: Tensor) -&gt; None
+</code></pre>
+<p>Note that, unlike other autograd hooks, this hook operates on the tensor
+that requires grad and not the grad itself. The hook can in-place modify
+and access its Tensor argument, including its .grad field.</p>
+<p>This function returns a handle with a method <code>handle.remove()</code>
+that removes the hook from the module.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>See :ref:<code>backward-hooks-execution</code> for more information on how when this hook
+is executed, and how its execution is ordered relative to other hooks. Since
+this hook runs during the backward pass, it will run in no_grad mode (unless
+create_graph is True). You can use torch.enable_grad() to re-enable autograd
+within the hook if you need it.</p>
+</div>
+<p>Example::</p>
+<pre><code>&gt;&gt;&gt; v = torch.tensor([0., 0., 0.], requires_grad=True)
+&gt;&gt;&gt; lr = 0.01
+&gt;&gt;&gt; # simulate a simple SGD update
+&gt;&gt;&gt; h = v.register_post_accumulate_grad_hook(lambda p: p.add_(p.grad, alpha=-lr))
+&gt;&gt;&gt; v.backward(torch.tensor([1., 2., 3.]))
+&gt;&gt;&gt; v
+tensor([-0.0100, -0.0200, -0.0300], requires_grad=True)
+
+&gt;&gt;&gt; h.remove()  # removes the hook
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Tensor.reinforce"><code class="name flex">
+<span>def <span class="ident">reinforce</span></span>(<span>self, reward)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def reinforce(self, reward):
+    def trim(str):
+        return &#34;\n&#34;.join([line.strip() for line in str.split(&#34;\n&#34;)])
+
+    raise RuntimeError(
+        trim(
+            r&#34;&#34;&#34;reinforce() was removed.
+        Use torch.distributions instead.
+        See https://pytorch.org/docs/main/distributions.html
+
+        Instead of:
+
+        probs = policy_network(state)
+        action = probs.multinomial()
+        next_state, reward = env.step(action)
+        action.reinforce(reward)
+        action.backward()
+
+        Use:
+
+        probs = policy_network(state)
+        # NOTE: categorical is equivalent to what used to be called multinomial
+        m = torch.distributions.Categorical(probs)
+        action = m.sample()
+        next_state, reward = env.step(action)
+        loss = -m.log_prob(action) * reward
+        loss.backward()
+    &#34;&#34;&#34;
+        )
+    )</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.rename"><code class="name flex">
+<span>def <span class="ident">rename</span></span>(<span>self, *names, **rename_map)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def rename(self, *names, **rename_map):
+    &#34;&#34;&#34;Renames dimension names of :attr:`self`.
+
+    There are two main usages:
+
+    ``self.rename(**rename_map)`` returns a view on tensor that has dims
+    renamed as specified in the mapping :attr:`rename_map`.
+
+    ``self.rename(*names)`` returns a view on tensor, renaming all
+    dimensions positionally using :attr:`names`.
+    Use ``self.rename(None)`` to drop names on a tensor.
+
+    One cannot specify both positional args :attr:`names` and keyword args
+    :attr:`rename_map`.
+
+    Examples::
+
+        &gt;&gt;&gt; imgs = torch.rand(2, 3, 5, 7, names=(&#39;N&#39;, &#39;C&#39;, &#39;H&#39;, &#39;W&#39;))
+        &gt;&gt;&gt; renamed_imgs = imgs.rename(N=&#39;batch&#39;, C=&#39;channels&#39;)
+        &gt;&gt;&gt; renamed_imgs.names
+        (&#39;batch&#39;, &#39;channels&#39;, &#39;H&#39;, &#39;W&#39;)
+
+        &gt;&gt;&gt; renamed_imgs = imgs.rename(None)
+        &gt;&gt;&gt; renamed_imgs.names
+        (None, None, None, None)
+
+        &gt;&gt;&gt; renamed_imgs = imgs.rename(&#39;batch&#39;, &#39;channel&#39;, &#39;height&#39;, &#39;width&#39;)
+        &gt;&gt;&gt; renamed_imgs.names
+        (&#39;batch&#39;, &#39;channel&#39;, &#39;height&#39;, &#39;width&#39;)
+
+    .. warning::
+        The named tensor API is experimental and subject to change.
+
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.rename, (self,), self, *names, **rename_map
+        )
+
+    # See Note [rename_ / rename API]
+    return update_names(self, names, rename_map, inplace=False)</code></pre>
+</details>
+<div class="desc"><p>Renames dimension names of :attr:<code>self</code>.</p>
+<p>There are two main usages:</p>
+<p><code>self.rename(**rename_map)</code> returns a view on tensor that has dims
+renamed as specified in the mapping :attr:<code>rename_map</code>.</p>
+<p><code>self.rename(*names)</code> returns a view on tensor, renaming all
+dimensions positionally using :attr:<code>names</code>.
+Use <code>self.rename(None)</code> to drop names on a tensor.</p>
+<p>One cannot specify both positional args :attr:<code>names</code> and keyword args
+:attr:<code>rename_map</code>.</p>
+<p>Examples::</p>
+<pre><code>&gt;&gt;&gt; imgs = torch.rand(2, 3, 5, 7, names=('N', 'C', 'H', 'W'))
+&gt;&gt;&gt; renamed_imgs = imgs.rename(N='batch', C='channels')
+&gt;&gt;&gt; renamed_imgs.names
+('batch', 'channels', 'H', 'W')
+
+&gt;&gt;&gt; renamed_imgs = imgs.rename(None)
+&gt;&gt;&gt; renamed_imgs.names
+(None, None, None, None)
+
+&gt;&gt;&gt; renamed_imgs = imgs.rename('batch', 'channel', 'height', 'width')
+&gt;&gt;&gt; renamed_imgs.names
+('batch', 'channel', 'height', 'width')
+</code></pre>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>The named tensor API is experimental and subject to change.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.rename_"><code class="name flex">
+<span>def <span class="ident">rename_</span></span>(<span>self, *names, **rename_map)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def rename_(self, *names, **rename_map):
+    &#34;&#34;&#34;In-place version of :meth:`~Tensor.rename`.&#34;&#34;&#34;
+
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.rename_, (self,), self, *names, **rename_map
+        )
+
+    # Note [rename_ / rename API]
+    # The Python API for these is different from the C++ API. In Python:
+    # 1) tensor.rename(*names) takes a vararglist of names
+    # 2) tensor.rename(**rename_map) takes a map of names to rename.
+    # C++ is static, making it difficult to implement similar behavior.
+    return update_names(self, names, rename_map, inplace=True)</code></pre>
+</details>
+<div class="desc"><p>In-place version of :meth:<code>~Tensor.rename</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Tensor.resize"><code class="name flex">
+<span>def <span class="ident">resize</span></span>(<span>self, *sizes)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def resize(self, *sizes):
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.resize, (self,), self, *sizes)
+    warnings.warn(&#34;non-inplace resize is deprecated&#34;)
+    from torch.autograd._functions import Resize
+
+    return Resize.apply(self, sizes)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.resize_as"><code class="name flex">
+<span>def <span class="ident">resize_as</span></span>(<span>self, tensor)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def resize_as(self, tensor):
+    if has_torch_function_variadic(self, tensor):
+        return handle_torch_function(Tensor.resize_as, (self, tensor), self, tensor)
+    warnings.warn(&#34;non-inplace resize_as is deprecated&#34;)
+    from torch.autograd._functions import Resize
+
+    return Resize.apply(self, tensor.size())</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.share_memory_"><code class="name flex">
+<span>def <span class="ident">share_memory_</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def share_memory_(self):
+    r&#34;&#34;&#34;Moves the underlying storage to shared memory.
+
+    This is a no-op if the underlying storage is already in shared memory
+    and for CUDA tensors. Tensors in shared memory cannot be resized.
+
+    See :meth:`torch.UntypedStorage.share_memory_` for more details.
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.share_memory_, (self,), self)
+    self._typed_storage()._share_memory_()
+    return self</code></pre>
+</details>
+<div class="desc"><p>Moves the underlying storage to shared memory.</p>
+<p>This is a no-op if the underlying storage is already in shared memory
+and for CUDA tensors. Tensors in shared memory cannot be resized.</p>
+<p>See :meth:<code>torch.UntypedStorage.share_memory_</code> for more details.</p></div>
+</dd>
+<dt id="lang_main.types.Tensor.solve"><code class="name flex">
+<span>def <span class="ident">solve</span></span>(<span>self, other)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def solve(self, other):
+    from torch._linalg_utils import solve
+
+    return solve(self, other)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.split"><code class="name flex">
+<span>def <span class="ident">split</span></span>(<span>self, split_size, dim=0)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def split(self, split_size, dim=0):
+    r&#34;&#34;&#34;See :func:`torch.split`&#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.split, (self,), self, split_size, dim=dim
+        )
+    if isinstance(split_size, Tensor):
+        try:
+            split_size = int(split_size)
+        except ValueError:
+            pass
+
+    if isinstance(split_size, (int, torch.SymInt)):
+        return torch._VF.split(self, split_size, dim)  # type: ignore[attr-defined]
+    else:
+        return torch._VF.split_with_sizes(self, split_size, dim)</code></pre>
+</details>
+<div class="desc"><p>See :func:<code>torch.split</code></p></div>
+</dd>
+<dt id="lang_main.types.Tensor.stft"><code class="name flex">
+<span>def <span class="ident">stft</span></span>(<span>self,<br>n_fft: int,<br>hop_length: int | None = None,<br>win_length: int | None = None,<br>window: Optional[<a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a>] = None,<br>center: bool = True,<br>pad_mode: str = 'reflect',<br>normalized: bool = False,<br>onesided: bool | None = None,<br>return_complex: bool | None = None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def stft(
+    self,
+    n_fft: int,
+    hop_length: Optional[int] = None,
+    win_length: Optional[int] = None,
+    window: &#34;Optional[Tensor]&#34; = None,
+    center: bool = True,
+    pad_mode: str = &#34;reflect&#34;,
+    normalized: bool = False,
+    onesided: Optional[bool] = None,
+    return_complex: Optional[bool] = None,
+):
+    r&#34;&#34;&#34;See :func:`torch.stft`
+
+    .. warning::
+      This function changed signature at version 0.4.1. Calling with
+      the previous signature may cause error or return incorrect result.
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.stft,
+            (self,),
+            self,
+            n_fft,
+            hop_length=hop_length,
+            win_length=win_length,
+            window=window,
+            center=center,
+            pad_mode=pad_mode,
+            normalized=normalized,
+            onesided=onesided,
+            return_complex=return_complex,
+        )
+    return torch.stft(
+        self,
+        n_fft,
+        hop_length,
+        win_length,
+        window,
+        center,
+        pad_mode,
+        normalized,
+        onesided,
+        return_complex=return_complex,
+    )</code></pre>
+</details>
+<div class="desc"><p>See :func:<code>torch.stft</code></p>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>This function changed signature at version 0.4.1. Calling with
+the previous signature may cause error or return incorrect result.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.storage"><code class="name flex">
+<span>def <span class="ident">storage</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def storage(self):
+    r&#34;&#34;&#34;
+    storage() -&gt; torch.TypedStorage
+
+    Returns the underlying :class:`TypedStorage`.
+
+    .. warning::
+
+        :class:`TypedStorage` is deprecated. It will be removed in the future, and
+        :class:`UntypedStorage` will be the only storage class. To access the
+        :class:`UntypedStorage` directly, use :attr:`Tensor.untyped_storage()`.
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.storage, (self,), self)
+
+    torch.storage._warn_typed_storage_removal(stacklevel=2)
+    return self._typed_storage()</code></pre>
+</details>
+<div class="desc"><p>storage() -&gt; torch.TypedStorage</p>
+<p>Returns the underlying :class:<code>TypedStorage</code>.</p>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>:class:<code>TypedStorage</code> is deprecated. It will be removed in the future, and
+:class:<code>UntypedStorage</code> will be the only storage class. To access the
+:class:<code>UntypedStorage</code> directly, use :attr:<code>Tensor.untyped_storage()</code>.</p>
+</div></div>
+</dd>
+<dt id="lang_main.types.Tensor.storage_type"><code class="name flex">
+<span>def <span class="ident">storage_type</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def storage_type(self):
+    r&#34;&#34;&#34;storage_type() -&gt; type
+
+    Returns the type of the underlying storage.
+
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.storage_type, (self,), self)
+
+    torch.storage._warn_typed_storage_removal()
+
+    return self._typed_storage()._get_legacy_storage_class()</code></pre>
+</details>
+<div class="desc"><p>storage_type() -&gt; type</p>
+<p>Returns the type of the underlying storage.</p></div>
+</dd>
+<dt id="lang_main.types.Tensor.symeig"><code class="name flex">
+<span>def <span class="ident">symeig</span></span>(<span>self, eigenvectors=False)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def symeig(self, eigenvectors=False):
+    from torch._linalg_utils import _symeig
+
+    return _symeig(self, eigenvectors=eigenvectors)</code></pre>
+</details>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Tensor.to_sparse_coo"><code class="name flex">
+<span>def <span class="ident">to_sparse_coo</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def to_sparse_coo(self):
+    &#34;&#34;&#34;Convert a tensor to :ref:`coordinate format &lt;sparse-coo-docs&gt;`.
+
+    Examples::
+
+         &gt;&gt;&gt; dense = torch.randn(5, 5)
+         &gt;&gt;&gt; sparse = dense.to_sparse_coo()
+         &gt;&gt;&gt; sparse._nnz()
+         25
+
+    &#34;&#34;&#34;
+    return self.to_sparse()</code></pre>
+</details>
+<div class="desc"><p>Convert a tensor to :ref:<code>coordinate format &lt;sparse-coo-docs&gt;</code>.</p>
+<p>Examples::</p>
+<pre><code> &gt;&gt;&gt; dense = torch.randn(5, 5)
+ &gt;&gt;&gt; sparse = dense.to_sparse_coo()
+ &gt;&gt;&gt; sparse._nnz()
+ 25
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Tensor.unflatten"><code class="name flex">
+<span>def <span class="ident">unflatten</span></span>(<span>self, dim, sizes)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def unflatten(self, dim, sizes):
+    r&#34;&#34;&#34;
+    unflatten(dim, sizes) -&gt; Tensor
+
+    See :func:`torch.unflatten`.
+
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(Tensor.unflatten, (self,), self, dim, sizes)
+
+    if not sizes:
+        raise RuntimeError(&#34;unflatten: sizes must be non-empty&#34;)
+
+    names = None
+    if isinstance(sizes, OrderedDict) or (
+        isinstance(sizes, (tuple, list)) and isinstance(sizes[0], (tuple, list))
+    ):
+        names, sizes = unzip_namedshape(sizes)
+        return super().unflatten(dim, sizes, names)
+    else:
+        return super().unflatten(dim, sizes)</code></pre>
+</details>
+<div class="desc"><p>unflatten(dim, sizes) -&gt; Tensor</p>
+<p>See :func:<code>torch.unflatten</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Tensor.unique"><code class="name flex">
+<span>def <span class="ident">unique</span></span>(<span>self, sorted=True, return_inverse=False, return_counts=False, dim=None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def unique(self, sorted=True, return_inverse=False, return_counts=False, dim=None):
+    r&#34;&#34;&#34;Returns the unique elements of the input tensor.
+
+    See :func:`torch.unique`
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.unique,
+            (self,),
+            self,
+            sorted=sorted,
+            return_inverse=return_inverse,
+            return_counts=return_counts,
+            dim=dim,
+        )
+    return torch.unique(
+        self,
+        sorted=sorted,
+        return_inverse=return_inverse,
+        return_counts=return_counts,
+        dim=dim,
+    )</code></pre>
+</details>
+<div class="desc"><p>Returns the unique elements of the input tensor.</p>
+<p>See :func:<code>torch.unique</code></p></div>
+</dd>
+<dt id="lang_main.types.Tensor.unique_consecutive"><code class="name flex">
+<span>def <span class="ident">unique_consecutive</span></span>(<span>self, return_inverse=False, return_counts=False, dim=None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def unique_consecutive(self, return_inverse=False, return_counts=False, dim=None):
+    r&#34;&#34;&#34;Eliminates all but the first element from every consecutive group of equivalent elements.
+
+    See :func:`torch.unique_consecutive`
+    &#34;&#34;&#34;
+    if has_torch_function_unary(self):
+        return handle_torch_function(
+            Tensor.unique_consecutive,
+            (self,),
+            self,
+            return_inverse=return_inverse,
+            return_counts=return_counts,
+            dim=dim,
+        )
+    return torch.unique_consecutive(
+        self, return_inverse=return_inverse, return_counts=return_counts, dim=dim
+    )</code></pre>
+</details>
+<div class="desc"><p>Eliminates all but the first element from every consecutive group of equivalent elements.</p>
+<p>See :func:<code>torch.unique_consecutive</code></p></div>
+</dd>
+</dl>
+</dd>
+<dt id="lang_main.types.Token"><code class="flex name class">
+<span>class <span class="ident">SpacyToken</span></span>
+<span>(</span><span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>An individual token – i.e. a word, punctuation symbol, whitespace,
+etc.</p>
+<p>DOCS: <a href="https://spacy.io/api/token">https://spacy.io/api/token</a></p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="lang_main.types.Token.ancestors"><code class="name">var <span class="ident">ancestors</span></code></dt>
+<dd>
+<div class="desc"><p>A sequence of this token's syntactic ancestors.</p>
+<p>YIELDS (Token): A sequence of ancestor tokens such that
+<code>ancestor.is_ancestor(self)</code>.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#ancestors">https://spacy.io/api/token#ancestors</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.children"><code class="name">var <span class="ident">children</span></code></dt>
+<dd>
+<div class="desc"><p>A sequence of the token's immediate syntactic children.</p>
+<p>YIELDS (Token): A child token such that <code>child.head==self</code>.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#children">https://spacy.io/api/token#children</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.cluster"><code class="name">var <span class="ident">cluster</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (int): Brown cluster ID.</p></div>
+</dd>
+<dt id="lang_main.types.Token.conjuncts"><code class="name">var <span class="ident">conjuncts</span></code></dt>
+<dd>
+<div class="desc"><p>A sequence of coordinated tokens, including the token itself.</p>
+<p>RETURNS (tuple): The coordinated tokens.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#conjuncts">https://spacy.io/api/token#conjuncts</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.dep"><code class="name">var <span class="ident">dep</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of syntactic dependency label.</p></div>
+</dd>
+<dt id="lang_main.types.Token.dep_"><code class="name">var <span class="ident">dep_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The syntactic dependency label.</p></div>
+</dd>
+<dt id="lang_main.types.Token.doc"><code class="name">var <span class="ident">doc</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Token.ent_id"><code class="name">var <span class="ident">ent_id</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the entity the token is an instance of,
+if any.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_id_"><code class="name">var <span class="ident">ent_id_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): ID of the entity the token is an instance of,
+if any.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_iob"><code class="name">var <span class="ident">ent_iob</span></code></dt>
+<dd>
+<div class="desc"><p>IOB code of named entity tag. <code>1="I", 2="O", 3="B"</code>. 0 means no tag
+is assigned.</p>
+<p>RETURNS (uint64): IOB code of named entity tag.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_iob_"><code class="name">var <span class="ident">ent_iob_</span></code></dt>
+<dd>
+<div class="desc"><p>IOB code of named entity tag. "B" means the token begins an entity,
+"I" means it is inside an entity, "O" means it is outside an entity,
+and "" means no entity tag is set. "B" with an empty ent_type
+means that the token is blocked from further processing by NER.</p>
+<p>RETURNS (str): IOB code of named entity tag.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_kb_id"><code class="name">var <span class="ident">ent_kb_id</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): Named entity KB ID.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_kb_id_"><code class="name">var <span class="ident">ent_kb_id_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Named entity KB ID.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_type"><code class="name">var <span class="ident">ent_type</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): Named entity type.</p></div>
+</dd>
+<dt id="lang_main.types.Token.ent_type_"><code class="name">var <span class="ident">ent_type_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Named entity type.</p></div>
+</dd>
+<dt id="lang_main.types.Token.has_vector"><code class="name">var <span class="ident">has_vector</span></code></dt>
+<dd>
+<div class="desc"><p>A boolean value indicating whether a word vector is associated with
+the object.</p>
+<p>RETURNS (bool): Whether a word vector is associated with the object.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#has_vector">https://spacy.io/api/token#has_vector</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.head"><code class="name">var <span class="ident">head</span></code></dt>
+<dd>
+<div class="desc"><p>The syntactic parent, or "governor", of this token.
+If token.has_head() is <code>False</code>, this method will return itself.</p>
+<p>RETURNS (Token): The token predicted by the parser to be the head of
+the current token.</p></div>
+</dd>
+<dt id="lang_main.types.Token.i"><code class="name">var <span class="ident">i</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Token.idx"><code class="name">var <span class="ident">idx</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (int): The character offset of the token within the parent
+document.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_alpha"><code class="name">var <span class="ident">is_alpha</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token consists of alpha characters.
+Equivalent to <code>token.text.isalpha()</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_ascii"><code class="name">var <span class="ident">is_ascii</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token consists of ASCII characters.
+Equivalent to <code>[any(ord(c) &gt;= 128 for c in token.text)]</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_bracket"><code class="name">var <span class="ident">is_bracket</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is a bracket.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_currency"><code class="name">var <span class="ident">is_currency</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is a currency symbol.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_digit"><code class="name">var <span class="ident">is_digit</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token consists of digits. Equivalent to
+<code>token.text.isdigit()</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_left_punct"><code class="name">var <span class="ident">is_left_punct</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is a left punctuation mark.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_lower"><code class="name">var <span class="ident">is_lower</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is in lowercase. Equivalent to
+<code>token.text.islower()</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_oov"><code class="name">var <span class="ident">is_oov</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is out-of-vocabulary.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_punct"><code class="name">var <span class="ident">is_punct</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is punctuation.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_quote"><code class="name">var <span class="ident">is_quote</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is a quotation mark.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_right_punct"><code class="name">var <span class="ident">is_right_punct</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is a right punctuation mark.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_sent_end"><code class="name">var <span class="ident">is_sent_end</span></code></dt>
+<dd>
+<div class="desc"><p>A boolean value indicating whether the token ends a sentence.
+<code>None</code> if unknown. Defaults to <code>True</code> for the last token in the <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code>.</p>
+<p>RETURNS (bool / None): Whether the token ends a sentence.
+None if unknown.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#is_sent_end">https://spacy.io/api/token#is_sent_end</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.is_sent_start"><code class="name">var <span class="ident">is_sent_start</span></code></dt>
+<dd>
+<div class="desc"><p>A boolean value indicating whether the token starts a sentence.
+<code>None</code> if unknown. Defaults to <code>True</code> for the first token in the <code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code>.</p>
+<p>RETURNS (bool / None): Whether the token starts a sentence.
+None if unknown.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_space"><code class="name">var <span class="ident">is_space</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token consists of whitespace characters.
+Equivalent to <code>token.text.isspace()</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_stop"><code class="name">var <span class="ident">is_stop</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is a stop word, i.e. part of a
+"stop list" defined by the language data.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_title"><code class="name">var <span class="ident">is_title</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is in titlecase. Equivalent to
+<code>token.text.istitle()</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_upper"><code class="name">var <span class="ident">is_upper</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token is in uppercase. Equivalent to
+<code>token.text.isupper()</code></p></div>
+</dd>
+<dt id="lang_main.types.Token.lang"><code class="name">var <span class="ident">lang</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the language of the parent document's
+vocabulary.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lang_"><code class="name">var <span class="ident">lang_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Language of the parent document's vocabulary,
+e.g. 'en'.</p></div>
+</dd>
+<dt id="lang_main.types.Token.left_edge"><code class="name">var <span class="ident">left_edge</span></code></dt>
+<dd>
+<div class="desc"><p>The leftmost token of this token's syntactic descendents.</p>
+<p>RETURNS (Token): The first token such that <code>self.is_ancestor(token)</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lefts"><code class="name">var <span class="ident">lefts</span></code></dt>
+<dd>
+<div class="desc"><p>The leftward immediate children of the word, in the syntactic
+dependency parse.</p>
+<p>YIELDS (Token): A left-child of the token.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#lefts">https://spacy.io/api/token#lefts</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.lemma"><code class="name">var <span class="ident">lemma</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the base form of the word, with no
+inflectional suffixes.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lemma_"><code class="name">var <span class="ident">lemma_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The token lemma, i.e. the base form of the word,
+with no inflectional suffixes.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lex"><code class="name">var <span class="ident">lex</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (Lexeme): The underlying lexeme.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lex_id"><code class="name">var <span class="ident">lex_id</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (int): Sequential ID of the token's lexical type.</p></div>
+</dd>
+<dt id="lang_main.types.Token.like_email"><code class="name">var <span class="ident">like_email</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token resembles an email address.</p></div>
+</dd>
+<dt id="lang_main.types.Token.like_num"><code class="name">var <span class="ident">like_num</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token resembles a number, e.g. "10.9",
+"10", "ten", etc.</p></div>
+</dd>
+<dt id="lang_main.types.Token.like_url"><code class="name">var <span class="ident">like_url</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (bool): Whether the token resembles a URL.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lower"><code class="name">var <span class="ident">lower</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the lowercase token text.</p></div>
+</dd>
+<dt id="lang_main.types.Token.lower_"><code class="name">var <span class="ident">lower_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The lowercase token text. Equivalent to
+<code>Token.text.lower()</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.morph"><code class="name">var <span class="ident">morph</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Token.n_lefts"><code class="name">var <span class="ident">n_lefts</span></code></dt>
+<dd>
+<div class="desc"><p>The number of leftward immediate children of the word, in the
+syntactic dependency parse.</p>
+<p>RETURNS (int): The number of leftward immediate children of the
+word, in the syntactic dependency parse.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#n_lefts">https://spacy.io/api/token#n_lefts</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.n_rights"><code class="name">var <span class="ident">n_rights</span></code></dt>
+<dd>
+<div class="desc"><p>The number of rightward immediate children of the word, in the
+syntactic dependency parse.</p>
+<p>RETURNS (int): The number of rightward immediate children of the
+word, in the syntactic dependency parse.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#n_rights">https://spacy.io/api/token#n_rights</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.norm"><code class="name">var <span class="ident">norm</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the token's norm, i.e. a normalised form of
+the token text. Usually set in the language's tokenizer exceptions
+or norm exceptions.</p></div>
+</dd>
+<dt id="lang_main.types.Token.norm_"><code class="name">var <span class="ident">norm_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The token's norm, i.e. a normalised form of the
+token text. Usually set in the language's tokenizer exceptions or
+norm exceptions.</p></div>
+</dd>
+<dt id="lang_main.types.Token.orth"><code class="name">var <span class="ident">orth</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the verbatim text content.</p></div>
+</dd>
+<dt id="lang_main.types.Token.orth_"><code class="name">var <span class="ident">orth_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Verbatim text content (identical to
+<code><a title="lang_main.types.Token.text" href="#lang_main.types.Token.text">Token.text</a></code>). Exists mostly for consistency with the other
+attributes.</p></div>
+</dd>
+<dt id="lang_main.types.Token.pos"><code class="name">var <span class="ident">pos</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of coarse-grained part-of-speech tag.</p></div>
+</dd>
+<dt id="lang_main.types.Token.pos_"><code class="name">var <span class="ident">pos_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Coarse-grained part-of-speech tag.</p></div>
+</dd>
+<dt id="lang_main.types.Token.prefix"><code class="name">var <span class="ident">prefix</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of a length-N substring from the start of the
+token. Defaults to <code>N=1</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.prefix_"><code class="name">var <span class="ident">prefix_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): A length-N substring from the start of the token.
+Defaults to <code>N=1</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.prob"><code class="name">var <span class="ident">prob</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (float): Smoothed log probability estimate of token type.</p></div>
+</dd>
+<dt id="lang_main.types.Token.rank"><code class="name">var <span class="ident">rank</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (int): Sequential ID of the token's lexical type, used to
+index into tables, e.g. for word vectors.</p></div>
+</dd>
+<dt id="lang_main.types.Token.right_edge"><code class="name">var <span class="ident">right_edge</span></code></dt>
+<dd>
+<div class="desc"><p>The rightmost token of this token's syntactic descendents.</p>
+<p>RETURNS (Token): The last token such that <code>self.is_ancestor(token)</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.rights"><code class="name">var <span class="ident">rights</span></code></dt>
+<dd>
+<div class="desc"><p>The rightward immediate children of the word, in the syntactic
+dependency parse.</p>
+<p>YIELDS (Token): A right-child of the token.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#rights">https://spacy.io/api/token#rights</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.sent"><code class="name">var <span class="ident">sent</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (Span): The sentence span that the token is a part of.</p></div>
+</dd>
+<dt id="lang_main.types.Token.sent_start"><code class="name">var <span class="ident">sent_start</span></code></dt>
+<dd>
+<div class="desc"><p>Deprecated: use Token.is_sent_start instead.</p></div>
+</dd>
+<dt id="lang_main.types.Token.sentiment"><code class="name">var <span class="ident">sentiment</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (float): A scalar value indicating the positivity or
+negativity of the token.</p></div>
+</dd>
+<dt id="lang_main.types.Token.shape"><code class="name">var <span class="ident">shape</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of the token's shape, a transform of the
+token's string, to show orthographic features (e.g. "Xxxx", "dd").</p></div>
+</dd>
+<dt id="lang_main.types.Token.shape_"><code class="name">var <span class="ident">shape_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Transform of the token's string, to show
+orthographic features. For example, "Xxxx" or "dd".</p></div>
+</dd>
+<dt id="lang_main.types.Token.subtree"><code class="name">var <span class="ident">subtree</span></code></dt>
+<dd>
+<div class="desc"><p>A sequence containing the token and all the token's syntactic
+descendants.</p>
+<p>YIELDS (Token): A descendent token such that
+<code>self.is_ancestor(descendent) or token == self</code>.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#subtree">https://spacy.io/api/token#subtree</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.suffix"><code class="name">var <span class="ident">suffix</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of a length-N substring from the end of the
+token. Defaults to <code>N=3</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.suffix_"><code class="name">var <span class="ident">suffix_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): A length-N substring from the end of the token.
+Defaults to <code>N=3</code>.</p></div>
+</dd>
+<dt id="lang_main.types.Token.tag"><code class="name">var <span class="ident">tag</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (uint64): ID of fine-grained part-of-speech tag.</p></div>
+</dd>
+<dt id="lang_main.types.Token.tag_"><code class="name">var <span class="ident">tag_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): Fine-grained part-of-speech tag.</p></div>
+</dd>
+<dt id="lang_main.types.Token.tensor"><code class="name">var <span class="ident">tensor</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Token.text"><code class="name">var <span class="ident">text</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The original verbatim text of the token.</p></div>
+</dd>
+<dt id="lang_main.types.Token.text_with_ws"><code class="name">var <span class="ident">text_with_ws</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The text content of the span (with trailing
+whitespace).</p></div>
+</dd>
+<dt id="lang_main.types.Token.vector"><code class="name">var <span class="ident">vector</span></code></dt>
+<dd>
+<div class="desc"><p>A real-valued meaning representation.</p>
+<p>RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
+representing the token's semantics.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#vector">https://spacy.io/api/token#vector</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.vector_norm"><code class="name">var <span class="ident">vector_norm</span></code></dt>
+<dd>
+<div class="desc"><p>The L2 norm of the token's vector representation.</p>
+<p>RETURNS (float): The L2 norm of the vector representation.</p>
+<p>DOCS: <a href="https://spacy.io/api/token#vector_norm">https://spacy.io/api/token#vector_norm</a></p></div>
+</dd>
+<dt id="lang_main.types.Token.vocab"><code class="name">var <span class="ident">vocab</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="lang_main.types.Token.whitespace_"><code class="name">var <span class="ident">whitespace_</span></code></dt>
+<dd>
+<div class="desc"><p>RETURNS (str): The trailing whitespace character, if present.</p></div>
+</dd>
+</dl>
+<h3>Methods</h3>
+<dl>
+<dt id="lang_main.types.Token.check_flag"><code class="name flex">
+<span>def <span class="ident">check_flag</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.check_flag(self, attr_id_t flag_id) -&gt; bool
+Check the value of a boolean flag.</p>
+<pre><code>    flag_id (int): The ID of the flag attribute.
+    RETURNS (bool): Whether the flag is set.
+
+    DOCS: &lt;https://spacy.io/api/token#check_flag&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.get_extension"><code class="name flex">
+<span>def <span class="ident">get_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.get_extension(type cls, name)
+Look up a previously registered extension by name.</p>
+<pre><code>    name (str): Name of the extension.
+    RETURNS (tuple): A &lt;code&gt;(default, method, getter, setter)&lt;/code&gt; tuple.
+
+    DOCS: &lt;https://spacy.io/api/token#get_extension&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.has_dep"><code class="name flex">
+<span>def <span class="ident">has_dep</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.has_dep(self)
+Check whether the token has annotated dep information.
+Returns False when the dep label is unset/missing.</p>
+<pre><code>    RETURNS (bool): Whether the dep label is valid or not.
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.has_extension"><code class="name flex">
+<span>def <span class="ident">has_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.has_extension(type cls, name)
+Check whether an extension has been registered.</p>
+<pre><code>    name (str): Name of the extension.
+    RETURNS (bool): Whether the extension has been registered.
+
+    DOCS: &lt;https://spacy.io/api/token#has_extension&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.has_head"><code class="name flex">
+<span>def <span class="ident">has_head</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.has_head(self)
+Check whether the token has annotated head information.
+Return False when the head annotation is unset/missing.</p>
+<pre><code>    RETURNS (bool): Whether the head annotation is valid or not.
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.has_morph"><code class="name flex">
+<span>def <span class="ident">has_morph</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.has_morph(self)
+Check whether the token has annotated morph information.
+Return False when the morph annotation is unset/missing.</p>
+<pre><code>    RETURNS (bool): Whether the morph annotation is set.
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.iob_strings"><code class="name flex">
+<span>def <span class="ident">iob_strings</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.iob_strings(type cls)</p></div>
+</dd>
+<dt id="lang_main.types.Token.is_ancestor"><code class="name flex">
+<span>def <span class="ident">is_ancestor</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.is_ancestor(self, descendant)
+Check whether this token is a parent, grandparent, etc. of another
+in the dependency tree.</p>
+<pre><code>    descendant (Token): Another token.
+    RETURNS (bool): Whether this token is the ancestor of the descendant.
+
+    DOCS: &lt;https://spacy.io/api/token#is_ancestor&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.nbor"><code class="name flex">
+<span>def <span class="ident">nbor</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.nbor(self, int i=1)
+Get a neighboring token.</p>
+<pre><code>    i (int): The relative position of the token to get. Defaults to 1.
+    RETURNS (Token): The token at position `self.doc[self.i+i]`.
+
+    DOCS: &lt;https://spacy.io/api/token#nbor&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.remove_extension"><code class="name flex">
+<span>def <span class="ident">remove_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.remove_extension(type cls, name)
+Remove a previously registered extension.</p>
+<pre><code>    name (str): Name of the extension.
+    RETURNS (tuple): A &lt;code&gt;(default, method, getter, setter)&lt;/code&gt; tuple of the
+        removed extension.
+
+    DOCS: &lt;https://spacy.io/api/token#remove_extension&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.set_extension"><code class="name flex">
+<span>def <span class="ident">set_extension</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.set_extension(type cls, name, **kwargs)
+Define a custom attribute which becomes available as <code>Token._</code>.</p>
+<pre><code>    name (str): Name of the attribute to set.
+    default: Optional default value of the attribute.
+    getter (callable): Optional getter function.
+    setter (callable): Optional setter function.
+    method (callable): Optional method for method extension.
+    force (bool): Force overwriting existing attribute.
+
+    DOCS: &lt;https://spacy.io/api/token#set_extension&gt;
+    USAGE: &lt;https://spacy.io/usage/processing-pipelines#custom-components-attributes&gt;
+</code></pre></div>
+</dd>
+<dt id="lang_main.types.Token.set_morph"><code class="name flex">
+<span>def <span class="ident">set_morph</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.set_morph(self, features)</p></div>
+</dd>
+<dt id="lang_main.types.Token.similarity"><code class="name flex">
+<span>def <span class="ident">similarity</span></span>(<span>...)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Token.similarity(self, other)
+Make a semantic similarity estimate. The default estimate is cosine
+similarity using an average of word vectors.</p>
+<pre><code>    other (object): The object to compare with. By default, accepts &lt;code&gt;&lt;a title="lang_main.types.Doc" href="#lang_main.types.Doc"&gt;Doc&lt;/a&gt;&lt;/code&gt;,
+        &lt;code&gt;Span&lt;/code&gt;, &lt;code&gt;&lt;a title="lang_main.types.Token" href="#lang_main.types.Token"&gt;Token&lt;/a&gt;&lt;/code&gt; and &lt;code&gt;Lexeme&lt;/code&gt; objects.
+    RETURNS (float): A scalar similarity score. Higher is more similar.
+
+    DOCS: &lt;https://spacy.io/api/token#similarity&gt;
+</code></pre></div>
+</dd>
+</dl>
+</dd>
+</dl>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="lang_main" href="index.html">lang_main</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="lang_main.types.Doc" href="#lang_main.types.Doc">Doc</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.types.Doc.cats" href="#lang_main.types.Doc.cats">cats</a></code></li>
+<li><code><a title="lang_main.types.Doc.char_span" href="#lang_main.types.Doc.char_span">char_span</a></code></li>
+<li><code><a title="lang_main.types.Doc.copy" href="#lang_main.types.Doc.copy">copy</a></code></li>
+<li><code><a title="lang_main.types.Doc.count_by" href="#lang_main.types.Doc.count_by">count_by</a></code></li>
+<li><code><a title="lang_main.types.Doc.doc" href="#lang_main.types.Doc.doc">doc</a></code></li>
+<li><code><a title="lang_main.types.Doc.ents" href="#lang_main.types.Doc.ents">ents</a></code></li>
+<li><code><a title="lang_main.types.Doc.extend_tensor" href="#lang_main.types.Doc.extend_tensor">extend_tensor</a></code></li>
+<li><code><a title="lang_main.types.Doc.from_array" href="#lang_main.types.Doc.from_array">from_array</a></code></li>
+<li><code><a title="lang_main.types.Doc.from_bytes" href="#lang_main.types.Doc.from_bytes">from_bytes</a></code></li>
+<li><code><a title="lang_main.types.Doc.from_dict" href="#lang_main.types.Doc.from_dict">from_dict</a></code></li>
+<li><code><a title="lang_main.types.Doc.from_disk" href="#lang_main.types.Doc.from_disk">from_disk</a></code></li>
+<li><code><a title="lang_main.types.Doc.from_docs" href="#lang_main.types.Doc.from_docs">from_docs</a></code></li>
+<li><code><a title="lang_main.types.Doc.from_json" href="#lang_main.types.Doc.from_json">from_json</a></code></li>
+<li><code><a title="lang_main.types.Doc.get_extension" href="#lang_main.types.Doc.get_extension">get_extension</a></code></li>
+<li><code><a title="lang_main.types.Doc.get_lca_matrix" href="#lang_main.types.Doc.get_lca_matrix">get_lca_matrix</a></code></li>
+<li><code><a title="lang_main.types.Doc.has_annotation" href="#lang_main.types.Doc.has_annotation">has_annotation</a></code></li>
+<li><code><a title="lang_main.types.Doc.has_extension" href="#lang_main.types.Doc.has_extension">has_extension</a></code></li>
+<li><code><a title="lang_main.types.Doc.has_unknown_spaces" href="#lang_main.types.Doc.has_unknown_spaces">has_unknown_spaces</a></code></li>
+<li><code><a title="lang_main.types.Doc.has_vector" href="#lang_main.types.Doc.has_vector">has_vector</a></code></li>
+<li><code><a title="lang_main.types.Doc.is_nered" href="#lang_main.types.Doc.is_nered">is_nered</a></code></li>
+<li><code><a title="lang_main.types.Doc.is_parsed" href="#lang_main.types.Doc.is_parsed">is_parsed</a></code></li>
+<li><code><a title="lang_main.types.Doc.is_sentenced" href="#lang_main.types.Doc.is_sentenced">is_sentenced</a></code></li>
+<li><code><a title="lang_main.types.Doc.is_tagged" href="#lang_main.types.Doc.is_tagged">is_tagged</a></code></li>
+<li><code><a title="lang_main.types.Doc.lang" href="#lang_main.types.Doc.lang">lang</a></code></li>
+<li><code><a title="lang_main.types.Doc.lang_" href="#lang_main.types.Doc.lang_">lang_</a></code></li>
+<li><code><a title="lang_main.types.Doc.mem" href="#lang_main.types.Doc.mem">mem</a></code></li>
+<li><code><a title="lang_main.types.Doc.noun_chunks" href="#lang_main.types.Doc.noun_chunks">noun_chunks</a></code></li>
+<li><code><a title="lang_main.types.Doc.noun_chunks_iterator" href="#lang_main.types.Doc.noun_chunks_iterator">noun_chunks_iterator</a></code></li>
+<li><code><a title="lang_main.types.Doc.remove_extension" href="#lang_main.types.Doc.remove_extension">remove_extension</a></code></li>
+<li><code><a title="lang_main.types.Doc.retokenize" href="#lang_main.types.Doc.retokenize">retokenize</a></code></li>
+<li><code><a title="lang_main.types.Doc.sentiment" href="#lang_main.types.Doc.sentiment">sentiment</a></code></li>
+<li><code><a title="lang_main.types.Doc.sents" href="#lang_main.types.Doc.sents">sents</a></code></li>
+<li><code><a title="lang_main.types.Doc.set_ents" href="#lang_main.types.Doc.set_ents">set_ents</a></code></li>
+<li><code><a title="lang_main.types.Doc.set_extension" href="#lang_main.types.Doc.set_extension">set_extension</a></code></li>
+<li><code><a title="lang_main.types.Doc.similarity" href="#lang_main.types.Doc.similarity">similarity</a></code></li>
+<li><code><a title="lang_main.types.Doc.spans" href="#lang_main.types.Doc.spans">spans</a></code></li>
+<li><code><a title="lang_main.types.Doc.tensor" href="#lang_main.types.Doc.tensor">tensor</a></code></li>
+<li><code><a title="lang_main.types.Doc.text" href="#lang_main.types.Doc.text">text</a></code></li>
+<li><code><a title="lang_main.types.Doc.text_with_ws" href="#lang_main.types.Doc.text_with_ws">text_with_ws</a></code></li>
+<li><code><a title="lang_main.types.Doc.to_array" href="#lang_main.types.Doc.to_array">to_array</a></code></li>
+<li><code><a title="lang_main.types.Doc.to_bytes" href="#lang_main.types.Doc.to_bytes">to_bytes</a></code></li>
+<li><code><a title="lang_main.types.Doc.to_dict" href="#lang_main.types.Doc.to_dict">to_dict</a></code></li>
+<li><code><a title="lang_main.types.Doc.to_disk" href="#lang_main.types.Doc.to_disk">to_disk</a></code></li>
+<li><code><a title="lang_main.types.Doc.to_json" href="#lang_main.types.Doc.to_json">to_json</a></code></li>
+<li><code><a title="lang_main.types.Doc.to_utf8_array" href="#lang_main.types.Doc.to_utf8_array">to_utf8_array</a></code></li>
+<li><code><a title="lang_main.types.Doc.user_data" href="#lang_main.types.Doc.user_data">user_data</a></code></li>
+<li><code><a title="lang_main.types.Doc.user_hooks" href="#lang_main.types.Doc.user_hooks">user_hooks</a></code></li>
+<li><code><a title="lang_main.types.Doc.user_span_hooks" href="#lang_main.types.Doc.user_span_hooks">user_span_hooks</a></code></li>
+<li><code><a title="lang_main.types.Doc.user_token_hooks" href="#lang_main.types.Doc.user_token_hooks">user_token_hooks</a></code></li>
+<li><code><a title="lang_main.types.Doc.vector" href="#lang_main.types.Doc.vector">vector</a></code></li>
+<li><code><a title="lang_main.types.Doc.vector_norm" href="#lang_main.types.Doc.vector_norm">vector_norm</a></code></li>
+<li><code><a title="lang_main.types.Doc.vocab" href="#lang_main.types.Doc.vocab">vocab</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="lang_main.types.Language" href="#lang_main.types.Language">Language</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.types.Language.Defaults" href="#lang_main.types.Language.Defaults">Defaults</a></code></li>
+<li><code><a title="lang_main.types.Language.add_pipe" href="#lang_main.types.Language.add_pipe">add_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.analyze_pipes" href="#lang_main.types.Language.analyze_pipes">analyze_pipes</a></code></li>
+<li><code><a title="lang_main.types.Language.begin_training" href="#lang_main.types.Language.begin_training">begin_training</a></code></li>
+<li><code><a title="lang_main.types.Language.component" href="#lang_main.types.Language.component">component</a></code></li>
+<li><code><a title="lang_main.types.Language.component_names" href="#lang_main.types.Language.component_names">component_names</a></code></li>
+<li><code><a title="lang_main.types.Language.components" href="#lang_main.types.Language.components">components</a></code></li>
+<li><code><a title="lang_main.types.Language.config" href="#lang_main.types.Language.config">config</a></code></li>
+<li><code><a title="lang_main.types.Language.create_optimizer" href="#lang_main.types.Language.create_optimizer">create_optimizer</a></code></li>
+<li><code><a title="lang_main.types.Language.create_pipe" href="#lang_main.types.Language.create_pipe">create_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.create_pipe_from_source" href="#lang_main.types.Language.create_pipe_from_source">create_pipe_from_source</a></code></li>
+<li><code><a title="lang_main.types.Language.default_config" href="#lang_main.types.Language.default_config">default_config</a></code></li>
+<li><code><a title="lang_main.types.Language.disable_pipe" href="#lang_main.types.Language.disable_pipe">disable_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.disable_pipes" href="#lang_main.types.Language.disable_pipes">disable_pipes</a></code></li>
+<li><code><a title="lang_main.types.Language.disabled" href="#lang_main.types.Language.disabled">disabled</a></code></li>
+<li><code><a title="lang_main.types.Language.enable_pipe" href="#lang_main.types.Language.enable_pipe">enable_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.evaluate" href="#lang_main.types.Language.evaluate">evaluate</a></code></li>
+<li><code><a title="lang_main.types.Language.factories" href="#lang_main.types.Language.factories">factories</a></code></li>
+<li><code><a title="lang_main.types.Language.factory" href="#lang_main.types.Language.factory">factory</a></code></li>
+<li><code><a title="lang_main.types.Language.factory_names" href="#lang_main.types.Language.factory_names">factory_names</a></code></li>
+<li><code><a title="lang_main.types.Language.from_bytes" href="#lang_main.types.Language.from_bytes">from_bytes</a></code></li>
+<li><code><a title="lang_main.types.Language.from_config" href="#lang_main.types.Language.from_config">from_config</a></code></li>
+<li><code><a title="lang_main.types.Language.from_disk" href="#lang_main.types.Language.from_disk">from_disk</a></code></li>
+<li><code><a title="lang_main.types.Language.get_factory_meta" href="#lang_main.types.Language.get_factory_meta">get_factory_meta</a></code></li>
+<li><code><a title="lang_main.types.Language.get_factory_name" href="#lang_main.types.Language.get_factory_name">get_factory_name</a></code></li>
+<li><code><a title="lang_main.types.Language.get_pipe" href="#lang_main.types.Language.get_pipe">get_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.get_pipe_config" href="#lang_main.types.Language.get_pipe_config">get_pipe_config</a></code></li>
+<li><code><a title="lang_main.types.Language.get_pipe_meta" href="#lang_main.types.Language.get_pipe_meta">get_pipe_meta</a></code></li>
+<li><code><a title="lang_main.types.Language.has_factory" href="#lang_main.types.Language.has_factory">has_factory</a></code></li>
+<li><code><a title="lang_main.types.Language.has_pipe" href="#lang_main.types.Language.has_pipe">has_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.initialize" href="#lang_main.types.Language.initialize">initialize</a></code></li>
+<li><code><a title="lang_main.types.Language.lang" href="#lang_main.types.Language.lang">lang</a></code></li>
+<li><code><a title="lang_main.types.Language.make_doc" href="#lang_main.types.Language.make_doc">make_doc</a></code></li>
+<li><code><a title="lang_main.types.Language.memory_zone" href="#lang_main.types.Language.memory_zone">memory_zone</a></code></li>
+<li><code><a title="lang_main.types.Language.meta" href="#lang_main.types.Language.meta">meta</a></code></li>
+<li><code><a title="lang_main.types.Language.path" href="#lang_main.types.Language.path">path</a></code></li>
+<li><code><a title="lang_main.types.Language.pipe" href="#lang_main.types.Language.pipe">pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.pipe_factories" href="#lang_main.types.Language.pipe_factories">pipe_factories</a></code></li>
+<li><code><a title="lang_main.types.Language.pipe_labels" href="#lang_main.types.Language.pipe_labels">pipe_labels</a></code></li>
+<li><code><a title="lang_main.types.Language.pipe_names" href="#lang_main.types.Language.pipe_names">pipe_names</a></code></li>
+<li><code><a title="lang_main.types.Language.pipeline" href="#lang_main.types.Language.pipeline">pipeline</a></code></li>
+<li><code><a title="lang_main.types.Language.rehearse" href="#lang_main.types.Language.rehearse">rehearse</a></code></li>
+<li><code><a title="lang_main.types.Language.remove_pipe" href="#lang_main.types.Language.remove_pipe">remove_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.rename_pipe" href="#lang_main.types.Language.rename_pipe">rename_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.replace_listeners" href="#lang_main.types.Language.replace_listeners">replace_listeners</a></code></li>
+<li><code><a title="lang_main.types.Language.replace_pipe" href="#lang_main.types.Language.replace_pipe">replace_pipe</a></code></li>
+<li><code><a title="lang_main.types.Language.resume_training" href="#lang_main.types.Language.resume_training">resume_training</a></code></li>
+<li><code><a title="lang_main.types.Language.select_pipes" href="#lang_main.types.Language.select_pipes">select_pipes</a></code></li>
+<li><code><a title="lang_main.types.Language.set_error_handler" href="#lang_main.types.Language.set_error_handler">set_error_handler</a></code></li>
+<li><code><a title="lang_main.types.Language.set_factory_meta" href="#lang_main.types.Language.set_factory_meta">set_factory_meta</a></code></li>
+<li><code><a title="lang_main.types.Language.to_bytes" href="#lang_main.types.Language.to_bytes">to_bytes</a></code></li>
+<li><code><a title="lang_main.types.Language.to_disk" href="#lang_main.types.Language.to_disk">to_disk</a></code></li>
+<li><code><a title="lang_main.types.Language.update" href="#lang_main.types.Language.update">update</a></code></li>
+<li><code><a title="lang_main.types.Language.use_params" href="#lang_main.types.Language.use_params">use_params</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="lang_main.types.SentenceTransformer" href="#lang_main.types.SentenceTransformer">SentenceTransformer</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.types.SentenceTransformer.device" href="#lang_main.types.SentenceTransformer.device">device</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.encode" href="#lang_main.types.SentenceTransformer.encode">encode</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.encode_multi_process" href="#lang_main.types.SentenceTransformer.encode_multi_process">encode_multi_process</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.evaluate" href="#lang_main.types.SentenceTransformer.evaluate">evaluate</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.forward" href="#lang_main.types.SentenceTransformer.forward">forward</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.get_backend" href="#lang_main.types.SentenceTransformer.get_backend">get_backend</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.get_max_seq_length" href="#lang_main.types.SentenceTransformer.get_max_seq_length">get_max_seq_length</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.get_sentence_embedding_dimension" href="#lang_main.types.SentenceTransformer.get_sentence_embedding_dimension">get_sentence_embedding_dimension</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.get_sentence_features" href="#lang_main.types.SentenceTransformer.get_sentence_features">get_sentence_features</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.gradient_checkpointing_enable" href="#lang_main.types.SentenceTransformer.gradient_checkpointing_enable">gradient_checkpointing_enable</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.load" href="#lang_main.types.SentenceTransformer.load">load</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.max_seq_length" href="#lang_main.types.SentenceTransformer.max_seq_length">max_seq_length</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.push_to_hub" href="#lang_main.types.SentenceTransformer.push_to_hub">push_to_hub</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.save" href="#lang_main.types.SentenceTransformer.save">save</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.save_pretrained" href="#lang_main.types.SentenceTransformer.save_pretrained">save_pretrained</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.save_to_hub" href="#lang_main.types.SentenceTransformer.save_to_hub">save_to_hub</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.set_pooling_include_prompt" href="#lang_main.types.SentenceTransformer.set_pooling_include_prompt">set_pooling_include_prompt</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.similarity" href="#lang_main.types.SentenceTransformer.similarity">similarity</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.similarity_fn_name" href="#lang_main.types.SentenceTransformer.similarity_fn_name">similarity_fn_name</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.similarity_pairwise" href="#lang_main.types.SentenceTransformer.similarity_pairwise">similarity_pairwise</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.start_multi_process_pool" href="#lang_main.types.SentenceTransformer.start_multi_process_pool">start_multi_process_pool</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.stop_multi_process_pool" href="#lang_main.types.SentenceTransformer.stop_multi_process_pool">stop_multi_process_pool</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.tokenize" href="#lang_main.types.SentenceTransformer.tokenize">tokenize</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.tokenizer" href="#lang_main.types.SentenceTransformer.tokenizer">tokenizer</a></code></li>
+<li><code><a title="lang_main.types.SentenceTransformer.truncate_sentence_embeddings" href="#lang_main.types.SentenceTransformer.truncate_sentence_embeddings">truncate_sentence_embeddings</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="lang_main.types.Tensor" href="#lang_main.types.Tensor">Tensor</a></code></h4>
+<ul class="">
+<li><code><a title="lang_main.types.Tensor.align_to" href="#lang_main.types.Tensor.align_to">align_to</a></code></li>
+<li><code><a title="lang_main.types.Tensor.backward" href="#lang_main.types.Tensor.backward">backward</a></code></li>
+<li><code><a title="lang_main.types.Tensor.detach" href="#lang_main.types.Tensor.detach">detach</a></code></li>
+<li><code><a title="lang_main.types.Tensor.detach_" href="#lang_main.types.Tensor.detach_">detach_</a></code></li>
+<li><code><a title="lang_main.types.Tensor.dim_order" href="#lang_main.types.Tensor.dim_order">dim_order</a></code></li>
+<li><code><a title="lang_main.types.Tensor.eig" href="#lang_main.types.Tensor.eig">eig</a></code></li>
+<li><code><a title="lang_main.types.Tensor.is_shared" href="#lang_main.types.Tensor.is_shared">is_shared</a></code></li>
+<li><code><a title="lang_main.types.Tensor.istft" href="#lang_main.types.Tensor.istft">istft</a></code></li>
+<li><code><a title="lang_main.types.Tensor.lstsq" href="#lang_main.types.Tensor.lstsq">lstsq</a></code></li>
+<li><code><a title="lang_main.types.Tensor.lu" href="#lang_main.types.Tensor.lu">lu</a></code></li>
+<li><code><a title="lang_main.types.Tensor.module_load" href="#lang_main.types.Tensor.module_load">module_load</a></code></li>
+<li><code><a title="lang_main.types.Tensor.norm" href="#lang_main.types.Tensor.norm">norm</a></code></li>
+<li><code><a title="lang_main.types.Tensor.refine_names" href="#lang_main.types.Tensor.refine_names">refine_names</a></code></li>
+<li><code><a title="lang_main.types.Tensor.register_hook" href="#lang_main.types.Tensor.register_hook">register_hook</a></code></li>
+<li><code><a title="lang_main.types.Tensor.register_post_accumulate_grad_hook" href="#lang_main.types.Tensor.register_post_accumulate_grad_hook">register_post_accumulate_grad_hook</a></code></li>
+<li><code><a title="lang_main.types.Tensor.reinforce" href="#lang_main.types.Tensor.reinforce">reinforce</a></code></li>
+<li><code><a title="lang_main.types.Tensor.rename" href="#lang_main.types.Tensor.rename">rename</a></code></li>
+<li><code><a title="lang_main.types.Tensor.rename_" href="#lang_main.types.Tensor.rename_">rename_</a></code></li>
+<li><code><a title="lang_main.types.Tensor.resize" href="#lang_main.types.Tensor.resize">resize</a></code></li>
+<li><code><a title="lang_main.types.Tensor.resize_as" href="#lang_main.types.Tensor.resize_as">resize_as</a></code></li>
+<li><code><a title="lang_main.types.Tensor.share_memory_" href="#lang_main.types.Tensor.share_memory_">share_memory_</a></code></li>
+<li><code><a title="lang_main.types.Tensor.solve" href="#lang_main.types.Tensor.solve">solve</a></code></li>
+<li><code><a title="lang_main.types.Tensor.split" href="#lang_main.types.Tensor.split">split</a></code></li>
+<li><code><a title="lang_main.types.Tensor.stft" href="#lang_main.types.Tensor.stft">stft</a></code></li>
+<li><code><a title="lang_main.types.Tensor.storage" href="#lang_main.types.Tensor.storage">storage</a></code></li>
+<li><code><a title="lang_main.types.Tensor.storage_type" href="#lang_main.types.Tensor.storage_type">storage_type</a></code></li>
+<li><code><a title="lang_main.types.Tensor.symeig" href="#lang_main.types.Tensor.symeig">symeig</a></code></li>
+<li><code><a title="lang_main.types.Tensor.to_sparse_coo" href="#lang_main.types.Tensor.to_sparse_coo">to_sparse_coo</a></code></li>
+<li><code><a title="lang_main.types.Tensor.unflatten" href="#lang_main.types.Tensor.unflatten">unflatten</a></code></li>
+<li><code><a title="lang_main.types.Tensor.unique" href="#lang_main.types.Tensor.unique">unique</a></code></li>
+<li><code><a title="lang_main.types.Tensor.unique_consecutive" href="#lang_main.types.Tensor.unique_consecutive">unique_consecutive</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="lang_main.types.Token" href="#lang_main.types.Token">Token</a></code></h4>
+<ul class="two-column">
+<li><code><a title="lang_main.types.Token.ancestors" href="#lang_main.types.Token.ancestors">ancestors</a></code></li>
+<li><code><a title="lang_main.types.Token.check_flag" href="#lang_main.types.Token.check_flag">check_flag</a></code></li>
+<li><code><a title="lang_main.types.Token.children" href="#lang_main.types.Token.children">children</a></code></li>
+<li><code><a title="lang_main.types.Token.cluster" href="#lang_main.types.Token.cluster">cluster</a></code></li>
+<li><code><a title="lang_main.types.Token.conjuncts" href="#lang_main.types.Token.conjuncts">conjuncts</a></code></li>
+<li><code><a title="lang_main.types.Token.dep" href="#lang_main.types.Token.dep">dep</a></code></li>
+<li><code><a title="lang_main.types.Token.dep_" href="#lang_main.types.Token.dep_">dep_</a></code></li>
+<li><code><a title="lang_main.types.Token.doc" href="#lang_main.types.Token.doc">doc</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_id" href="#lang_main.types.Token.ent_id">ent_id</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_id_" href="#lang_main.types.Token.ent_id_">ent_id_</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_iob" href="#lang_main.types.Token.ent_iob">ent_iob</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_iob_" href="#lang_main.types.Token.ent_iob_">ent_iob_</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_kb_id" href="#lang_main.types.Token.ent_kb_id">ent_kb_id</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_kb_id_" href="#lang_main.types.Token.ent_kb_id_">ent_kb_id_</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_type" href="#lang_main.types.Token.ent_type">ent_type</a></code></li>
+<li><code><a title="lang_main.types.Token.ent_type_" href="#lang_main.types.Token.ent_type_">ent_type_</a></code></li>
+<li><code><a title="lang_main.types.Token.get_extension" href="#lang_main.types.Token.get_extension">get_extension</a></code></li>
+<li><code><a title="lang_main.types.Token.has_dep" href="#lang_main.types.Token.has_dep">has_dep</a></code></li>
+<li><code><a title="lang_main.types.Token.has_extension" href="#lang_main.types.Token.has_extension">has_extension</a></code></li>
+<li><code><a title="lang_main.types.Token.has_head" href="#lang_main.types.Token.has_head">has_head</a></code></li>
+<li><code><a title="lang_main.types.Token.has_morph" href="#lang_main.types.Token.has_morph">has_morph</a></code></li>
+<li><code><a title="lang_main.types.Token.has_vector" href="#lang_main.types.Token.has_vector">has_vector</a></code></li>
+<li><code><a title="lang_main.types.Token.head" href="#lang_main.types.Token.head">head</a></code></li>
+<li><code><a title="lang_main.types.Token.i" href="#lang_main.types.Token.i">i</a></code></li>
+<li><code><a title="lang_main.types.Token.idx" href="#lang_main.types.Token.idx">idx</a></code></li>
+<li><code><a title="lang_main.types.Token.iob_strings" href="#lang_main.types.Token.iob_strings">iob_strings</a></code></li>
+<li><code><a title="lang_main.types.Token.is_alpha" href="#lang_main.types.Token.is_alpha">is_alpha</a></code></li>
+<li><code><a title="lang_main.types.Token.is_ancestor" href="#lang_main.types.Token.is_ancestor">is_ancestor</a></code></li>
+<li><code><a title="lang_main.types.Token.is_ascii" href="#lang_main.types.Token.is_ascii">is_ascii</a></code></li>
+<li><code><a title="lang_main.types.Token.is_bracket" href="#lang_main.types.Token.is_bracket">is_bracket</a></code></li>
+<li><code><a title="lang_main.types.Token.is_currency" href="#lang_main.types.Token.is_currency">is_currency</a></code></li>
+<li><code><a title="lang_main.types.Token.is_digit" href="#lang_main.types.Token.is_digit">is_digit</a></code></li>
+<li><code><a title="lang_main.types.Token.is_left_punct" href="#lang_main.types.Token.is_left_punct">is_left_punct</a></code></li>
+<li><code><a title="lang_main.types.Token.is_lower" href="#lang_main.types.Token.is_lower">is_lower</a></code></li>
+<li><code><a title="lang_main.types.Token.is_oov" href="#lang_main.types.Token.is_oov">is_oov</a></code></li>
+<li><code><a title="lang_main.types.Token.is_punct" href="#lang_main.types.Token.is_punct">is_punct</a></code></li>
+<li><code><a title="lang_main.types.Token.is_quote" href="#lang_main.types.Token.is_quote">is_quote</a></code></li>
+<li><code><a title="lang_main.types.Token.is_right_punct" href="#lang_main.types.Token.is_right_punct">is_right_punct</a></code></li>
+<li><code><a title="lang_main.types.Token.is_sent_end" href="#lang_main.types.Token.is_sent_end">is_sent_end</a></code></li>
+<li><code><a title="lang_main.types.Token.is_sent_start" href="#lang_main.types.Token.is_sent_start">is_sent_start</a></code></li>
+<li><code><a title="lang_main.types.Token.is_space" href="#lang_main.types.Token.is_space">is_space</a></code></li>
+<li><code><a title="lang_main.types.Token.is_stop" href="#lang_main.types.Token.is_stop">is_stop</a></code></li>
+<li><code><a title="lang_main.types.Token.is_title" href="#lang_main.types.Token.is_title">is_title</a></code></li>
+<li><code><a title="lang_main.types.Token.is_upper" href="#lang_main.types.Token.is_upper">is_upper</a></code></li>
+<li><code><a title="lang_main.types.Token.lang" href="#lang_main.types.Token.lang">lang</a></code></li>
+<li><code><a title="lang_main.types.Token.lang_" href="#lang_main.types.Token.lang_">lang_</a></code></li>
+<li><code><a title="lang_main.types.Token.left_edge" href="#lang_main.types.Token.left_edge">left_edge</a></code></li>
+<li><code><a title="lang_main.types.Token.lefts" href="#lang_main.types.Token.lefts">lefts</a></code></li>
+<li><code><a title="lang_main.types.Token.lemma" href="#lang_main.types.Token.lemma">lemma</a></code></li>
+<li><code><a title="lang_main.types.Token.lemma_" href="#lang_main.types.Token.lemma_">lemma_</a></code></li>
+<li><code><a title="lang_main.types.Token.lex" href="#lang_main.types.Token.lex">lex</a></code></li>
+<li><code><a title="lang_main.types.Token.lex_id" href="#lang_main.types.Token.lex_id">lex_id</a></code></li>
+<li><code><a title="lang_main.types.Token.like_email" href="#lang_main.types.Token.like_email">like_email</a></code></li>
+<li><code><a title="lang_main.types.Token.like_num" href="#lang_main.types.Token.like_num">like_num</a></code></li>
+<li><code><a title="lang_main.types.Token.like_url" href="#lang_main.types.Token.like_url">like_url</a></code></li>
+<li><code><a title="lang_main.types.Token.lower" href="#lang_main.types.Token.lower">lower</a></code></li>
+<li><code><a title="lang_main.types.Token.lower_" href="#lang_main.types.Token.lower_">lower_</a></code></li>
+<li><code><a title="lang_main.types.Token.morph" href="#lang_main.types.Token.morph">morph</a></code></li>
+<li><code><a title="lang_main.types.Token.n_lefts" href="#lang_main.types.Token.n_lefts">n_lefts</a></code></li>
+<li><code><a title="lang_main.types.Token.n_rights" href="#lang_main.types.Token.n_rights">n_rights</a></code></li>
+<li><code><a title="lang_main.types.Token.nbor" href="#lang_main.types.Token.nbor">nbor</a></code></li>
+<li><code><a title="lang_main.types.Token.norm" href="#lang_main.types.Token.norm">norm</a></code></li>
+<li><code><a title="lang_main.types.Token.norm_" href="#lang_main.types.Token.norm_">norm_</a></code></li>
+<li><code><a title="lang_main.types.Token.orth" href="#lang_main.types.Token.orth">orth</a></code></li>
+<li><code><a title="lang_main.types.Token.orth_" href="#lang_main.types.Token.orth_">orth_</a></code></li>
+<li><code><a title="lang_main.types.Token.pos" href="#lang_main.types.Token.pos">pos</a></code></li>
+<li><code><a title="lang_main.types.Token.pos_" href="#lang_main.types.Token.pos_">pos_</a></code></li>
+<li><code><a title="lang_main.types.Token.prefix" href="#lang_main.types.Token.prefix">prefix</a></code></li>
+<li><code><a title="lang_main.types.Token.prefix_" href="#lang_main.types.Token.prefix_">prefix_</a></code></li>
+<li><code><a title="lang_main.types.Token.prob" href="#lang_main.types.Token.prob">prob</a></code></li>
+<li><code><a title="lang_main.types.Token.rank" href="#lang_main.types.Token.rank">rank</a></code></li>
+<li><code><a title="lang_main.types.Token.remove_extension" href="#lang_main.types.Token.remove_extension">remove_extension</a></code></li>
+<li><code><a title="lang_main.types.Token.right_edge" href="#lang_main.types.Token.right_edge">right_edge</a></code></li>
+<li><code><a title="lang_main.types.Token.rights" href="#lang_main.types.Token.rights">rights</a></code></li>
+<li><code><a title="lang_main.types.Token.sent" href="#lang_main.types.Token.sent">sent</a></code></li>
+<li><code><a title="lang_main.types.Token.sent_start" href="#lang_main.types.Token.sent_start">sent_start</a></code></li>
+<li><code><a title="lang_main.types.Token.sentiment" href="#lang_main.types.Token.sentiment">sentiment</a></code></li>
+<li><code><a title="lang_main.types.Token.set_extension" href="#lang_main.types.Token.set_extension">set_extension</a></code></li>
+<li><code><a title="lang_main.types.Token.set_morph" href="#lang_main.types.Token.set_morph">set_morph</a></code></li>
+<li><code><a title="lang_main.types.Token.shape" href="#lang_main.types.Token.shape">shape</a></code></li>
+<li><code><a title="lang_main.types.Token.shape_" href="#lang_main.types.Token.shape_">shape_</a></code></li>
+<li><code><a title="lang_main.types.Token.similarity" href="#lang_main.types.Token.similarity">similarity</a></code></li>
+<li><code><a title="lang_main.types.Token.subtree" href="#lang_main.types.Token.subtree">subtree</a></code></li>
+<li><code><a title="lang_main.types.Token.suffix" href="#lang_main.types.Token.suffix">suffix</a></code></li>
+<li><code><a title="lang_main.types.Token.suffix_" href="#lang_main.types.Token.suffix_">suffix_</a></code></li>
+<li><code><a title="lang_main.types.Token.tag" href="#lang_main.types.Token.tag">tag</a></code></li>
+<li><code><a title="lang_main.types.Token.tag_" href="#lang_main.types.Token.tag_">tag_</a></code></li>
+<li><code><a title="lang_main.types.Token.tensor" href="#lang_main.types.Token.tensor">tensor</a></code></li>
+<li><code><a title="lang_main.types.Token.text" href="#lang_main.types.Token.text">text</a></code></li>
+<li><code><a title="lang_main.types.Token.text_with_ws" href="#lang_main.types.Token.text_with_ws">text_with_ws</a></code></li>
+<li><code><a title="lang_main.types.Token.vector" href="#lang_main.types.Token.vector">vector</a></code></li>
+<li><code><a title="lang_main.types.Token.vector_norm" href="#lang_main.types.Token.vector_norm">vector_norm</a></code></li>
+<li><code><a title="lang_main.types.Token.vocab" href="#lang_main.types.Token.vocab">vocab</a></code></li>
+<li><code><a title="lang_main.types.Token.whitespace_" href="#lang_main.types.Token.whitespace_">whitespace_</a></code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/src/lang_main/analysis/shared.py b/src/lang_main/analysis/shared.py
index a90df48..04cb674 100644
--- a/src/lang_main/analysis/shared.py
+++ b/src/lang_main/analysis/shared.py
@@ -5,9 +5,6 @@ from typing import cast
 import networkx as nx
 import numpy as np
 import numpy.typing as npt
-
-# import sentence_transformers # TODO check removal
-# import sentence_transformers.util # TODO check removal
 from networkx import Graph
 from pandas import DataFrame, Series
 from sentence_transformers import SentenceTransformer
diff --git a/src/lang_main/analysis/timeline.py b/src/lang_main/analysis/timeline.py
index 0c8c0cd..5819678 100644
--- a/src/lang_main/analysis/timeline.py
+++ b/src/lang_main/analysis/timeline.py
@@ -47,7 +47,7 @@ def _non_relevant_obj_ids(
         feats_per_obj_id = feats_per_obj_id.dropna()
         unique_feats_per_obj_id = len(feats_per_obj_id.unique())
 
-        if unique_feats_per_obj_id > thresh_unique_feat_per_id:
+        if unique_feats_per_obj_id >= thresh_unique_feat_per_id:
             ids_to_ignore.add(obj_id)
 
     return tuple(ids_to_ignore)
diff --git a/src/lang_main/model_loader.py b/src/lang_main/model_loader.py
index a00c3d2..0ac98c0 100644
--- a/src/lang_main/model_loader.py
+++ b/src/lang_main/model_loader.py
@@ -119,7 +119,7 @@ def _preprocess_STFR_model_name(
             raise FileNotFoundError(
                 f'Target model >{model_name}< not found under {model_path}'
             )
-        model_name_or_path = str(model_path)
+        model_name_or_path = str(model_path)  # pragma: no cover
     else:
         model_name_or_path = model_name
 
diff --git a/src/lang_main/pipelines/predefined.py b/src/lang_main/pipelines/predefined.py
index 8a5e6d0..4a399f3 100644
--- a/src/lang_main/pipelines/predefined.py
+++ b/src/lang_main/pipelines/predefined.py
@@ -30,11 +30,12 @@ from lang_main.constants import (
     DATE_COLS,
     FEATURE_NAME_OBJ_ID,
     FEATURE_NAME_OBJ_TEXT,
+    MAX_EDGE_NUMBER,
     MODEL_INPUT_FEATURES,
     NAME_DELTA_FEAT_TO_REPAIR,
     SAVE_PATH_FOLDER,
+    TARGET_FEATURE,
     THRESHOLD_AMOUNT_CHARACTERS,
-    THRESHOLD_EDGE_NUMBER,
     THRESHOLD_NUM_ACTIVITIES,
     THRESHOLD_SIMILARITY,
     THRESHOLD_TIMELINE_SIMILARITY,
@@ -72,7 +73,7 @@ def build_base_target_feature_pipe() -> Pipeline:
     pipe_target_feat.add(
         entry_wise_cleansing,
         {
-            'target_features': ('VorgangsBeschreibung',),
+            'target_features': (TARGET_FEATURE,),
             'cleansing_func': clean_string_slim,
         },
         save_result=True,
@@ -81,7 +82,7 @@ def build_base_target_feature_pipe() -> Pipeline:
     pipe_target_feat.add(
         analyse_feature,
         {
-            'target_feature': 'VorgangsBeschreibung',
+            'target_feature': TARGET_FEATURE,
         },
         save_result=True,
     )
@@ -140,7 +141,7 @@ def build_tk_graph_post_pipe() -> Pipeline:
     pipe_graph_postprocessing.add(
         graphs.filter_graph_by_number_edges,
         {
-            'limit': THRESHOLD_EDGE_NUMBER,
+            'limit': MAX_EDGE_NUMBER,
             'property': 'weight',
         },
     )
diff --git a/tests/analysis/test_graphs.py b/tests/analysis/test_graphs.py
index 1145b46..929c679 100644
--- a/tests/analysis/test_graphs.py
+++ b/tests/analysis/test_graphs.py
@@ -321,7 +321,7 @@ def test_pipe_add_graph_metrics():
 def test_pipe_rescale_graph_edge_weights(tk_graph):
     rescaled_tkg, rescaled_undir = graphs.pipe_rescale_graph_edge_weights(tk_graph)
     assert rescaled_tkg[2][1]['weight'] == pytest.approx(1.0)
-    assert rescaled_tkg[1][2]['weight'] == pytest.approx(0.0952)
+    assert rescaled_tkg[1][2]['weight'] == pytest.approx(0.095238)
     assert rescaled_undir[2][1]['weight'] == pytest.approx(1.0)
     assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0)
 
@@ -331,7 +331,7 @@ def test_rescale_edge_weights(import_graph, request):
     test_graph = request.getfixturevalue(import_graph)
     rescaled_graph = graphs.rescale_edge_weights(test_graph)
     assert rescaled_graph[2][1]['weight'] == pytest.approx(1.0)
-    assert rescaled_graph[1][2]['weight'] == pytest.approx(0.0952)
+    assert rescaled_graph[1][2]['weight'] == pytest.approx(0.095238)
 
 
 @pytest.mark.parametrize('import_graph', ['graph', 'tk_graph'])
diff --git a/tests/analysis/test_timeline.py b/tests/analysis/test_timeline.py
index 374b882..1ab6bbc 100644
--- a/tests/analysis/test_timeline.py
+++ b/tests/analysis/test_timeline.py
@@ -72,7 +72,7 @@ def test_calc_delta_to_repair(data_pre_cleaned, convert_to_days):
 def test_non_relevant_obj_ids(data_pre_cleaned):
     feature_uniqueness = 'HObjektText'
     feature_obj_id = 'ObjektID'
-    threshold = 1
+    threshold = 2
     data = data_pre_cleaned.copy()
     data.at[0, feature_obj_id] = 1
     ids_to_ignore = tl._non_relevant_obj_ids(
@@ -88,7 +88,7 @@ def test_non_relevant_obj_ids(data_pre_cleaned):
 def test_remove_non_relevant_obj_ids(data_pre_cleaned):
     feature_uniqueness = 'HObjektText'
     feature_obj_id = 'ObjektID'
-    threshold = 1
+    threshold = 2
     data = data_pre_cleaned.copy()
     data.at[0, feature_obj_id] = 1
 
diff --git a/tests/test_model_loader.py b/tests/test_model_loader.py
index 1127383..09179bc 100644
--- a/tests/test_model_loader.py
+++ b/tests/test_model_loader.py
@@ -25,8 +25,6 @@ from lang_main.types import LanguageModels
 @pytest.mark.parametrize(
     'model_name',
     [
-        STFRModelTypes.ALL_DISTILROBERTA_V1,
-        STFRModelTypes.ALL_MINI_LM_L12_V2,
         STFRModelTypes.ALL_MINI_LM_L6_V2,
         STFRModelTypes.ALL_MPNET_BASE_V2,
     ],
@@ -47,6 +45,25 @@ def test_load_sentence_transformer(
     assert isinstance(model, SentenceTransformer)
 
 
+def test_preprocess_STFR_model_name() -> None:
+    model_name_not_exist = 'TestModel'
+    ret_model_name = model_loader._preprocess_STFR_model_name(
+        model_name=model_name_not_exist, backend=STFRBackends.TORCH, force_download=True
+    )
+    assert ret_model_name == model_name_not_exist
+    ret_model_name = model_loader._preprocess_STFR_model_name(
+        model_name=model_name_not_exist, backend=STFRBackends.TORCH, force_download=False
+    )
+    assert ret_model_name == model_name_not_exist
+
+    model_name_exist = STFRModelTypes.E5_BASE_STS_EN_DE
+    backend_exist = STFRBackends.ONNX
+    with pytest.raises(FileNotFoundError):
+        _ = model_loader._preprocess_STFR_model_name(
+            model_name=model_name_exist, backend=backend_exist, force_download=False
+        )
+
+
 @pytest.mark.parametrize(
     'similarity_func',
     [
@@ -57,8 +74,6 @@ def test_load_sentence_transformer(
 @pytest.mark.parametrize(
     'model_name',
     [
-        STFRModelTypes.ALL_DISTILROBERTA_V1,
-        STFRModelTypes.ALL_MINI_LM_L12_V2,
         STFRModelTypes.ALL_MINI_LM_L6_V2,
         STFRModelTypes.ALL_MPNET_BASE_V2,
     ],
@@ -108,6 +123,14 @@ def test_instantiate_spacy_model():
     assert isinstance(model, Language)
 
 
+def test_fail_instantiate_spacy_model():
+    with pytest.raises(KeyError):
+        _ = model_loader.instantiate_model(
+            model_load_map=model_loader.MODEL_LOADER_MAP,
+            model='test',  # type: ignore
+        )  # type: ignore
+
+
 @pytest.mark.mload
 def test_instantiate_stfr_model():
     model = model_loader.instantiate_model(