2025-01-22 16:54:15 +01:00

334 lines
19 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
<meta name="generator" content="pdoc3 0.11.5">
<title>lang_main.analysis.timeline API documentation</title>
<meta name="description" content="">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => {
hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
hljs.highlightAll();
/* Collapse source docstrings */
setTimeout(() => {
[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
.forEach(el => {
let d = document.createElement('details');
d.classList.add('hljs-string');
d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
el.replaceWith(d);
});
}, 100);
})</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>lang_main.analysis.timeline</code></h1>
</header>
<section id="section-intro">
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="lang_main.analysis.timeline.calc_delta_to_next_failure"><code class="name flex">
<span>def <span class="ident">calc_delta_to_next_failure</span></span>(<span>data: pandas.core.frame.DataFrame,<br>date_feature: str = 'ErstellungsDatum',<br>name_delta_feature: str = 'Zeitspanne bis zum nächsten Ereignis [Tage]',<br>convert_to_days: bool = True) > pandas.core.frame.DataFrame</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def calc_delta_to_next_failure(
data: DataFrameTLFiltered,
date_feature: str = &#39;ErstellungsDatum&#39;,
name_delta_feature: str = NAME_DELTA_FEAT_TO_NEXT_FAILURE,
convert_to_days: bool = True,
) -&gt; DataFrameTLFiltered:
data = data.copy()
last_val = data[date_feature].iat[-1]
shifted = data[date_feature].shift(-1, fill_value=last_val)
data[name_delta_feature] = shifted - data[date_feature]
data = data.sort_values(by=name_delta_feature, ascending=False)
if convert_to_days:
data[name_delta_feature] = data[name_delta_feature].dt.days
return data</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.calc_delta_to_repair"><code class="name flex">
<span>def <span class="ident">calc_delta_to_repair</span></span>(<span>data: pandas.core.frame.DataFrame,<br>date_feature_start: str = 'ErstellungsDatum',<br>date_feature_end: str = 'ErledigungsDatum',<br>name_delta_feature: str = 'Zeitspanne bis zur Behebung [Tage]',<br>convert_to_days: bool = True) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def calc_delta_to_repair(
data: DataFrame,
date_feature_start: str = &#39;ErstellungsDatum&#39;,
date_feature_end: str = &#39;ErledigungsDatum&#39;,
name_delta_feature: str = NAME_DELTA_FEAT_TO_REPAIR,
convert_to_days: bool = True,
) -&gt; tuple[DataFrame]:
logger.info(&#39;Calculating time differences between start and end of operations...&#39;)
data = data.copy()
data[name_delta_feature] = data[date_feature_end] - data[date_feature_start]
if convert_to_days:
data[name_delta_feature] = data[name_delta_feature].dt.days
logger.info(&#39;Calculation successful.&#39;)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.cleanup_descriptions"><code class="name flex">
<span>def <span class="ident">cleanup_descriptions</span></span>(<span>data: pandas.core.frame.DataFrame,<br>properties: Collection[str] = ('VorgangsBeschreibung', 'ErledigungsBeschreibung')) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def cleanup_descriptions(
data: DataFrame,
properties: Collection[str] = (
&#39;VorgangsBeschreibung&#39;,
&#39;ErledigungsBeschreibung&#39;,
),
) -&gt; tuple[DataFrame]:
logger.info(&#39;Cleaning necessary descriptions...&#39;)
data = data.copy()
features = list(properties)
data[features] = data[features].fillna(&#39;N.V.&#39;)
(data,) = entry_wise_cleansing(data, target_features=features)
logger.info(&#39;Cleansing successful.&#39;)
return (data.copy(),)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.filter_activities_per_obj_id"><code class="name flex">
<span>def <span class="ident">filter_activities_per_obj_id</span></span>(<span>data: pandas.core.frame.DataFrame,<br>activity_feature: str = 'VorgangsTypName',<br>relevant_activity_types: Iterable[str] = ('Reparaturauftrag (Portal)',),<br>feature_obj_id: str = 'ObjektID',<br>threshold_num_activities: int = 1) > tuple[pandas.core.frame.DataFrame, pandas.core.series.Series]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def filter_activities_per_obj_id(
data: DataFrame,
activity_feature: str = &#39;VorgangsTypName&#39;,
relevant_activity_types: Iterable[str] = (&#39;Reparaturauftrag (Portal)&#39;,),
feature_obj_id: str = &#39;ObjektID&#39;,
threshold_num_activities: int = 1,
) -&gt; tuple[DataFrame, Series]:
data = data.copy()
# filter only relevant activities, count occurrences for each ObjectID
logger.info(&#39;Filtering activities per ObjectID...&#39;)
filt_rel_activities = data[activity_feature].isin(relevant_activity_types)
data_filter_activities = data.loc[filt_rel_activities].copy()
num_activities_per_obj_id = cast(
Series, data_filter_activities[feature_obj_id].value_counts(sort=True)
)
# filter for ObjectIDs with more than given number of activities
filt_below_thresh = num_activities_per_obj_id &lt;= threshold_num_activities
# index of series contains ObjectIDs
obj_ids_below_thresh = num_activities_per_obj_id[filt_below_thresh].index
filt_entries_below_thresh = data_filter_activities[feature_obj_id].isin(
obj_ids_below_thresh
)
num_activities_per_obj_id = num_activities_per_obj_id.loc[~filt_below_thresh]
data_filter_activities = data_filter_activities.loc[~filt_entries_below_thresh]
logger.info(&#39;Activities per ObjectID filtered successfully.&#39;)
return data_filter_activities, num_activities_per_obj_id</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.filter_timeline_cands"><code class="name flex">
<span>def <span class="ident">filter_timeline_cands</span></span>(<span>data: pandas.core.frame.DataFrame,<br>cands: dict[int, tuple[tuple[int | numpy.int64, ...], ...]],<br>obj_id: int,<br>entry_idx: int,<br>sort_feature: str = 'ErstellungsDatum') > pandas.core.frame.DataFrame</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def filter_timeline_cands(
data: DataFrame,
cands: TimelineCandidates,
obj_id: ObjectID,
entry_idx: int,
sort_feature: str = &#39;ErstellungsDatum&#39;,
) -&gt; DataFrameTLFiltered:
data = data.copy()
cands_for_obj_id = cands[obj_id]
cands_choice = cands_for_obj_id[entry_idx]
data = data.loc[list(cands_choice)].sort_values(
by=sort_feature,
ascending=True,
)
return data</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.generate_model_input"><code class="name flex">
<span>def <span class="ident">generate_model_input</span></span>(<span>data: pandas.core.frame.DataFrame,<br>target_feature_name: str = 'nlp_model_input',<br>model_input_features: Iterable[str] = ('VorgangsTypName', 'VorgangsArtText', 'VorgangsBeschreibung')) > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def generate_model_input(
data: DataFrame,
target_feature_name: str = &#39;nlp_model_input&#39;,
model_input_features: Iterable[str] = (
&#39;VorgangsTypName&#39;,
&#39;VorgangsArtText&#39;,
&#39;VorgangsBeschreibung&#39;,
),
) -&gt; tuple[DataFrame]:
logger.info(&#39;Generating concatenation of model input features...&#39;)
data = data.copy()
model_input_features = list(model_input_features)
input_features = data[model_input_features].fillna(&#39;&#39;).astype(str)
data[target_feature_name] = input_features.apply(
lambda x: &#39; - &#39;.join(x),
axis=1,
)
logger.info(&#39;Model input generated successfully.&#39;)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.get_timeline_candidates"><code class="name flex">
<span>def <span class="ident">get_timeline_candidates</span></span>(<span>data: pandas.core.frame.DataFrame,<br>num_activities_per_obj_id: pandas.core.series.Series,<br>*,<br>model: sentence_transformers.SentenceTransformer.SentenceTransformer,<br>cos_sim_threshold: float,<br>feature_obj_id: str = 'ObjektID',<br>feature_obj_text: str = 'HObjektText',<br>model_input_feature: str = 'nlp_model_input') > tuple[dict[int, tuple[tuple[int | numpy.int64, ...], ...]], dict[int, str]]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def get_timeline_candidates(
data: DataFrame,
num_activities_per_obj_id: Series,
*,
model: SentenceTransformer,
cos_sim_threshold: float,
feature_obj_id: str = &#39;ObjektID&#39;,
feature_obj_text: str = &#39;HObjektText&#39;,
model_input_feature: str = &#39;nlp_model_input&#39;,
) -&gt; tuple[TimelineCandidates, dict[ObjectID, str]]:
logger.info(&#39;Obtaining timeline candidates...&#39;)
candidates = _get_timeline_candidates_index(
data=data,
num_activities_per_obj_id=num_activities_per_obj_id,
model=model,
cos_sim_threshold=cos_sim_threshold,
feature_obj_id=feature_obj_id,
model_input_feature=model_input_feature,
)
tl_candidates = _transform_timeline_candidates(candidates)
logger.info(&#39;Timeline candidates obtained successfully.&#39;)
# text mapping to obtain object descriptors
logger.info(&#39;Mapping ObjectIDs to their respective text descriptor...&#39;)
map_obj_text = _map_obj_id_to_texts(
data=data,
feature_obj_id=feature_obj_id,
feature_obj_text=feature_obj_text,
)
logger.info(&#39;ObjectIDs successfully mapped to text descriptors.&#39;)
return tl_candidates, map_obj_text</code></pre>
</details>
<div class="desc"></div>
</dd>
<dt id="lang_main.analysis.timeline.remove_non_relevant_obj_ids"><code class="name flex">
<span>def <span class="ident">remove_non_relevant_obj_ids</span></span>(<span>data: pandas.core.frame.DataFrame,<br>thresh_unique_feat_per_id: int,<br>*,<br>feature_uniqueness: str = 'HObjektText',<br>feature_obj_id: str = 'ObjektID') > tuple[pandas.core.frame.DataFrame]</span>
</code></dt>
<dd>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def remove_non_relevant_obj_ids(
data: DataFrame,
thresh_unique_feat_per_id: int,
*,
feature_uniqueness: str = &#39;HObjektText&#39;,
feature_obj_id: str = &#39;ObjektID&#39;,
) -&gt; tuple[DataFrame]:
logger.info(&#39;Removing non-relevant ObjectIDs from dataset...&#39;)
data = data.copy()
ids_to_ignore = _non_relevant_obj_ids(
data=data,
thresh_unique_feat_per_id=thresh_unique_feat_per_id,
feature_uniqueness=feature_uniqueness,
feature_obj_id=feature_obj_id,
)
# only retain entries with ObjectIDs not in IDs to ignore
data = data.loc[~(data[feature_obj_id].isin(ids_to_ignore))]
logger.debug(&#39;Ignored ObjectIDs: %s&#39;, ids_to_ignore)
logger.info(&#39;Non-relevant ObjectIDs removed successfully.&#39;)
return (data,)</code></pre>
</details>
<div class="desc"></div>
</dd>
</dl>
</section>
<section>
</section>
</article>
<nav id="sidebar">
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="lang_main.analysis" href="index.html">lang_main.analysis</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="lang_main.analysis.timeline.calc_delta_to_next_failure" href="#lang_main.analysis.timeline.calc_delta_to_next_failure">calc_delta_to_next_failure</a></code></li>
<li><code><a title="lang_main.analysis.timeline.calc_delta_to_repair" href="#lang_main.analysis.timeline.calc_delta_to_repair">calc_delta_to_repair</a></code></li>
<li><code><a title="lang_main.analysis.timeline.cleanup_descriptions" href="#lang_main.analysis.timeline.cleanup_descriptions">cleanup_descriptions</a></code></li>
<li><code><a title="lang_main.analysis.timeline.filter_activities_per_obj_id" href="#lang_main.analysis.timeline.filter_activities_per_obj_id">filter_activities_per_obj_id</a></code></li>
<li><code><a title="lang_main.analysis.timeline.filter_timeline_cands" href="#lang_main.analysis.timeline.filter_timeline_cands">filter_timeline_cands</a></code></li>
<li><code><a title="lang_main.analysis.timeline.generate_model_input" href="#lang_main.analysis.timeline.generate_model_input">generate_model_input</a></code></li>
<li><code><a title="lang_main.analysis.timeline.get_timeline_candidates" href="#lang_main.analysis.timeline.get_timeline_candidates">get_timeline_candidates</a></code></li>
<li><code><a title="lang_main.analysis.timeline.remove_non_relevant_obj_ids" href="#lang_main.analysis.timeline.remove_non_relevant_obj_ids">remove_non_relevant_obj_ids</a></code></li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.5</a>.</p>
</footer>
</body>
</html>