improved imports, dummy dataset generation
This commit is contained in:
3095
notebooks/Analyse_5-1_Timeline.ipynb
Normal file
3095
notebooks/Analyse_5-1_Timeline.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1085
notebooks/Preprocess_Pipeline.ipynb
Normal file
1085
notebooks/Preprocess_Pipeline.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
2275
notebooks/Token_Analysis.ipynb
Normal file
2275
notebooks/Token_Analysis.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
8228
notebooks/archive/Analyse.ipynb
Normal file
8228
notebooks/archive/Analyse.ipynb
Normal file
File diff suppressed because one or more lines are too long
12132
notebooks/archive/Analyse_2-2.ipynb
Normal file
12132
notebooks/archive/Analyse_2-2.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
11660
notebooks/archive/Analyse_2.ipynb
Normal file
11660
notebooks/archive/Analyse_2.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1790
notebooks/archive/Analyse_3.ipynb
Normal file
1790
notebooks/archive/Analyse_3.ipynb
Normal file
File diff suppressed because one or more lines are too long
5501
notebooks/archive/Analyse_4-1.ipynb
Normal file
5501
notebooks/archive/Analyse_4-1.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
3898
notebooks/archive/Analyse_4-2.ipynb
Normal file
3898
notebooks/archive/Analyse_4-2.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1469
notebooks/archive/test_new_dupl_merge.ipynb
Normal file
1469
notebooks/archive/test_new_dupl_merge.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
243
notebooks/archive/test_sentence_trf.ipynb
Normal file
243
notebooks/archive/test_sentence_trf.ipynb
Normal file
@@ -0,0 +1,243 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text1 = \"Betriebssicherheitsüberprüfung\"\n",
|
||||
"text3 = \"Ich habe die Betriebssicherheitsüberprüfung durchgeführt.\"\n",
|
||||
"text2 = \"die Betriebssicherheitsüberprüfung durchgeführt\"\n",
|
||||
"#text2 = \"Nach dem Batterie-Wechsel gingen alle Lichter aus\"\n",
|
||||
"sentences = [text1, text2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text1 = \"Wöchentliche Sichtkontrolle / Reinigung\"\n",
|
||||
"text3 = \"3-monatliche Sichtkontrolle / Reinigung\"\n",
|
||||
"text2 = \"Wöchentliche Sichtkontrolle / Reinigun\"\n",
|
||||
"#text2 = \"Nach dem Batterie-Wechsel gingen alle Lichter aus\"\n",
|
||||
"sentences = [text1, text2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text1 = \"Wöchentliche Sichtkontrolle / Reinigung\"\n",
|
||||
"text3 = \"Tägliche Kontrolle der Wasseraufbereitungsanlagen\"\n",
|
||||
"text2 = \"Wöchentliche Kontrolle der Wasseraufbereitungsanlagen\"\n",
|
||||
"text4 = \"Täglihce Kontolle der Wasseraufberitungsanlagen\"\n",
|
||||
"#text2 = \"Nach dem Batterie-Wechsel gingen alle Lichter aus\"\n",
|
||||
"sentences = [text1, text2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#text1 = 'Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers\\n'\n",
|
||||
"#text3 = 'Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\foersterflorian\\mambaforge\\envs\\test\\Lib\\site-packages\\torch\\_utils.py:776: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
|
||||
" return self.fget.__get__(instance, owner)()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cosine-Similarity t1+2: tensor([[0.4740]])\n",
|
||||
"Cosine-Similarity t1+3: tensor([[0.4360]])\n",
|
||||
"Cosine-Similarity t2+3: tensor([[0.9494]])\n",
|
||||
"Cosine-Similarity t2+4: tensor([[0.7007]])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n# Print the embeddings\\nfor sentence, embedding in zip(sentences, sentence_embeddings):\\n print(\"Sentence:\", sentence)\\n print(\"Embedding:\", embedding)\\n print(\"\")\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sentence_transformers import SentenceTransformer, util\n",
|
||||
"\n",
|
||||
"#model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
|
||||
"#model = SentenceTransformer(\"all-mpnet-base-v2 \")\n",
|
||||
"model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')\n",
|
||||
"\n",
|
||||
"# Our sentences we like to encode\n",
|
||||
"\"\"\"\n",
|
||||
"sentences = [\n",
|
||||
" \"This framework generates embeddings for each input sentence\",\n",
|
||||
" \"Sentences are passed as a list of string.\",\n",
|
||||
" \"The quick brown fox jumps over the lazy dog.\",\n",
|
||||
"]\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Sentences are encoded by calling model.encode()\n",
|
||||
"sentence_embeddings = model.encode(sentences)\n",
|
||||
"t1 = model.encode(text1)\n",
|
||||
"t2 = model.encode(text2)\n",
|
||||
"t3 = model.encode(text3)\n",
|
||||
"t4 = model.encode(text4)\n",
|
||||
"\n",
|
||||
"cos_sim = util.cos_sim(t1, t2)\n",
|
||||
"print(\"Cosine-Similarity t1+2:\", cos_sim)\n",
|
||||
"cos_sim = util.cos_sim(t1, t3)\n",
|
||||
"print(\"Cosine-Similarity t1+3:\", cos_sim)\n",
|
||||
"cos_sim = util.cos_sim(t2, t3)\n",
|
||||
"print(\"Cosine-Similarity t2+3:\", cos_sim)\n",
|
||||
"cos_sim = util.cos_sim(t2, t4)\n",
|
||||
"print(\"Cosine-Similarity t2+4:\", cos_sim)\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# Print the embeddings\n",
|
||||
"for sentence, embedding in zip(sentences, sentence_embeddings):\n",
|
||||
" print(\"Sentence:\", sentence)\n",
|
||||
" print(\"Embedding:\", embedding)\n",
|
||||
" print(\"\")\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"numpy.ndarray"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"type(t4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"False"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"isinstance(model, int)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.7007368206977844"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cos_sim.item()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cosine-Similarity: tensor([[0.6153]])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sentence_transformers import SentenceTransformer, util\n",
|
||||
"\n",
|
||||
"model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
|
||||
"\n",
|
||||
"# Sentences are encoded by calling model.encode()\n",
|
||||
"emb1 = model.encode(\"This is a red cat with a hat.\")\n",
|
||||
"emb2 = model.encode(\"Have you seen my red cat?\")\n",
|
||||
"\n",
|
||||
"cos_sim = util.cos_sim(emb1, emb2)\n",
|
||||
"print(\"Cosine-Similarity:\", cos_sim)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
1660
notebooks/display_results.ipynb
Normal file
1660
notebooks/display_results.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1244
notebooks/dummy_data_generation.ipynb
Normal file
1244
notebooks/dummy_data_generation.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
128
notebooks/lang_main.xml
Normal file
128
notebooks/lang_main.xml
Normal file
@@ -0,0 +1,128 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<vizmap id="VizMap-2024_07_12-08_08" documentVersion="3.1">
|
||||
<visualStyle name="lang_main">
|
||||
<network>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
|
||||
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
|
||||
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
|
||||
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
|
||||
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
|
||||
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
|
||||
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
|
||||
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
|
||||
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
|
||||
<visualProperty default="" name="NETWORK_TITLE"/>
|
||||
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
|
||||
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
|
||||
</network>
|
||||
<node>
|
||||
<dependency value="true" name="nodeCustomGraphicsSizeSync"/>
|
||||
<dependency value="true" name="nodeSizeLocked"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
|
||||
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
|
||||
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
|
||||
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
|
||||
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
|
||||
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
|
||||
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
|
||||
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
|
||||
<visualProperty default="40.0" name="NODE_HEIGHT"/>
|
||||
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
|
||||
<visualProperty default="false" name="NODE_SELECTED"/>
|
||||
<visualProperty default="0.0" name="NODE_DEPTH"/>
|
||||
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
|
||||
<visualProperty default="" name="NODE_TOOLTIP"/>
|
||||
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
|
||||
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
|
||||
<visualProperty default="18.0" name="NODE_SIZE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
|
||||
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
|
||||
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
|
||||
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
|
||||
<visualProperty default="60.0" name="NODE_WIDTH"/>
|
||||
<visualProperty default="" name="NODE_LABEL">
|
||||
<passthroughMapping attributeName="name" attributeType="string"/>
|
||||
</visualProperty>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
|
||||
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
|
||||
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
|
||||
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
|
||||
<visualProperty default="true" name="NODE_VISIBLE"/>
|
||||
<visualProperty default="255" name="NODE_TRANSPARENCY"/>
|
||||
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
|
||||
</node>
|
||||
<edge>
|
||||
<dependency value="true" name="arrowColorMatchesEdge"/>
|
||||
<visualProperty default="false" name="EDGE_SELECTED"/>
|
||||
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
|
||||
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
|
||||
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="" name="EDGE_LABEL"/>
|
||||
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
|
||||
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
|
||||
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
|
||||
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
|
||||
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
|
||||
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
|
||||
<visualProperty default="true" name="EDGE_VISIBLE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
|
||||
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
|
||||
<visualProperty default="" name="EDGE_TOOLTIP"/>
|
||||
<visualProperty default="0.0" name="EDGE_Z_ORDER"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
|
||||
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
|
||||
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
|
||||
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
|
||||
<visualProperty default="true" name="EDGE_CURVED"/>
|
||||
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
|
||||
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
|
||||
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
|
||||
<visualProperty default="3.0" name="EDGE_WIDTH">
|
||||
<continuousMapping attributeName="weight" attributeType="float">
|
||||
<continuousMappingPoint attrValue="0.09520000219345093" equalValue="2.0" greaterValue="2.0" lesserValue="1.0"/>
|
||||
<continuousMappingPoint attrValue="1.0" equalValue="10.0" greaterValue="1.0" lesserValue="10.0"/>
|
||||
</continuousMapping>
|
||||
</visualProperty>
|
||||
</edge>
|
||||
</visualStyle>
|
||||
</vizmap>
|
||||
59
notebooks/lang_main_config.toml
Normal file
59
notebooks/lang_main_config.toml
Normal file
@@ -0,0 +1,59 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = '../scripts/results/test_20240619/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = true
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
graph_static_rendering_skip = false
|
||||
time_analysis_skip = true
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
# input_features = [
|
||||
# 'VorgangsTypName',
|
||||
# 'VorgangsArtText',
|
||||
# 'VorgangsBeschreibung',
|
||||
# ]
|
||||
input_features = [
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
10689
notebooks/misc.ipynb
Normal file
10689
notebooks/misc.ipynb
Normal file
File diff suppressed because one or more lines are too long
123
notebooks/styles_template.xml
Normal file
123
notebooks/styles_template.xml
Normal file
@@ -0,0 +1,123 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<vizmap id="VizMap-2024_07_10-16_50" documentVersion="3.1">
|
||||
<visualStyle name="template">
|
||||
<network>
|
||||
<visualProperty default="1.0" name="NETWORK_SCALE_FACTOR"/>
|
||||
<visualProperty default="true" name="NETWORK_NODE_SELECTION"/>
|
||||
<visualProperty default="#F7FFFF" name="NETWORK_BACKGROUND_PAINT"/>
|
||||
<visualProperty default="false" name="NETWORK_ANNOTATION_SELECTION"/>
|
||||
<visualProperty default="false" name="NETWORK_NODE_LABEL_SELECTION"/>
|
||||
<visualProperty default="" name="NETWORK_TITLE"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_X_LOCATION"/>
|
||||
<visualProperty default="true" name="NETWORK_EDGE_SELECTION"/>
|
||||
<visualProperty default="550.0" name="NETWORK_WIDTH"/>
|
||||
<visualProperty default="0.0" name="NETWORK_DEPTH"/>
|
||||
<visualProperty default="400.0" name="NETWORK_HEIGHT"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Z_LOCATION"/>
|
||||
<visualProperty default="0.0" name="NETWORK_CENTER_Y_LOCATION"/>
|
||||
<visualProperty default="false" name="NETWORK_FORCE_HIGH_DETAIL"/>
|
||||
</network>
|
||||
<node>
|
||||
<dependency value="true" name="nodeCustomGraphicsSizeSync"/>
|
||||
<dependency value="true" name="nodeSizeLocked"/>
|
||||
<visualProperty default="0.0" name="NODE_LABEL_ROTATION"/>
|
||||
<visualProperty default="14" name="NODE_LABEL_FONT_SIZE"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_3"/>
|
||||
<visualProperty default="10.0" name="COMPOUND_NODE_PADDING"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_6"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_1"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_9"/>
|
||||
<visualProperty default="0.0" name="NODE_Z_LOCATION"/>
|
||||
<visualProperty default="true" name="NODE_VISIBLE"/>
|
||||
<visualProperty default="" name="NODE_TOOLTIP"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_1"/>
|
||||
<visualProperty default="500.0" name="NODE_LABEL_WIDTH"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_5"/>
|
||||
<visualProperty default="#FE9929" name="NODE_FILL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_2"/>
|
||||
<visualProperty default="#A63C06" name="NODE_LABEL_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_DEPTH"/>
|
||||
<visualProperty default="7.0" name="NODE_BORDER_WIDTH"/>
|
||||
<visualProperty default="#FFFF00" name="NODE_SELECTED_PAINT"/>
|
||||
<visualProperty default="60.0" name="NODE_WIDTH"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_3"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_7"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_4"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_1"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_8"/>
|
||||
<visualProperty default="SE,NW,c,-2.00,3.00" name="NODE_LABEL_POSITION"/>
|
||||
<visualProperty default="SOLID" name="NODE_BORDER_STROKE"/>
|
||||
<visualProperty default="255" name="NODE_BORDER_TRANSPARENCY"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="NODE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_8"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_7"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_8"/>
|
||||
<visualProperty default="18.0" name="NODE_SIZE"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_5"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_4"/>
|
||||
<visualProperty default="" name="NODE_LABEL">
|
||||
<passthroughMapping attributeName="name" attributeType="string"/>
|
||||
</visualProperty>
|
||||
<visualProperty default="255" name="NODE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_6"/>
|
||||
<visualProperty default="ELLIPSE" name="NODE_SHAPE"/>
|
||||
<visualProperty default="#D1F5BE" name="NODE_BORDER_PAINT"/>
|
||||
<visualProperty default="true" name="NODE_NESTED_NETWORK_IMAGE_VISIBLE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_7"/>
|
||||
<visualProperty default="false" name="NODE_SELECTED"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_9"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_3"/>
|
||||
<visualProperty default="SansSerif.plain,plain,12" name="NODE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="NODE_CUSTOMGRAPHICS_POSITION_2"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_4"/>
|
||||
<visualProperty default="#E1E1E1" name="NODE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="0.0" name="NODE_X_LOCATION"/>
|
||||
<visualProperty default="org.cytoscape.cg.model.NullCustomGraphics,0,[ Remove Graphics ]," name="NODE_CUSTOMGRAPHICS_2"/>
|
||||
<visualProperty default="ROUND_RECTANGLE" name="COMPOUND_NODE_SHAPE"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_6"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_5"/>
|
||||
<visualProperty default="0.0" name="NODE_CUSTOMGRAPHICS_SIZE_9"/>
|
||||
<visualProperty default="175" name="NODE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="255" name="NODE_TRANSPARENCY"/>
|
||||
<visualProperty default="40.0" name="NODE_HEIGHT"/>
|
||||
<visualProperty default="0.0" name="NODE_Y_LOCATION"/>
|
||||
</node>
|
||||
<edge>
|
||||
<dependency value="true" name="arrowColorMatchesEdge"/>
|
||||
<visualProperty default="NONE" name="EDGE_LABEL_BACKGROUND_SHAPE"/>
|
||||
<visualProperty default="" name="EDGE_TOOLTIP"/>
|
||||
<visualProperty default="AUTO_BEND" name="EDGE_STACKING"/>
|
||||
<visualProperty default="#B6B6B6" name="EDGE_LABEL_BACKGROUND_COLOR"/>
|
||||
<visualProperty default="C,C,c,0.00,0.00" name="EDGE_LABEL_POSITION"/>
|
||||
<visualProperty default="0.728545744495502,-0.684997151948455,0.6456513365424503" name="EDGE_BEND"/>
|
||||
<visualProperty default="10" name="EDGE_LABEL_FONT_SIZE"/>
|
||||
<visualProperty default="NONE" name="EDGE_TARGET_ARROW_SHAPE"/>
|
||||
<visualProperty default="false" name="EDGE_SELECTED"/>
|
||||
<visualProperty default="#000000" name="EDGE_LABEL_COLOR"/>
|
||||
<visualProperty default="#FFFFFF" name="EDGE_STROKE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="#000000" name="EDGE_TARGET_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_TRANSPARENCY"/>
|
||||
<visualProperty default="255" name="EDGE_LABEL_BACKGROUND_TRANSPARENCY"/>
|
||||
<visualProperty default="true" name="EDGE_CURVED"/>
|
||||
<visualProperty default="NONE" name="EDGE_SOURCE_ARROW_SHAPE"/>
|
||||
<visualProperty default="0.0" name="EDGE_LABEL_ROTATION"/>
|
||||
<visualProperty default="SansSerif.plain,plain,10" name="EDGE_LABEL_FONT_FACE"/>
|
||||
<visualProperty default="0.5" name="EDGE_STACKING_DENSITY"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_SOURCE_ARROW_SELECTED_PAINT"/>
|
||||
<visualProperty default="false" name="EDGE_LABEL_AUTOROTATE"/>
|
||||
<visualProperty default="3.0" name="EDGE_WIDTH"/>
|
||||
<visualProperty default="#FF0000" name="EDGE_STROKE_SELECTED_PAINT"/>
|
||||
<visualProperty default="true" name="EDGE_VISIBLE"/>
|
||||
<visualProperty default="#577399" name="EDGE_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="#000000" name="EDGE_SOURCE_ARROW_UNSELECTED_PAINT"/>
|
||||
<visualProperty default="" name="EDGE_LABEL"/>
|
||||
<visualProperty default="255" name="EDGE_TRANSPARENCY"/>
|
||||
<visualProperty default="SOLID" name="EDGE_LINE_TYPE"/>
|
||||
<visualProperty default="6.0" name="EDGE_TARGET_ARROW_SIZE"/>
|
||||
<visualProperty default="200.0" name="EDGE_LABEL_WIDTH"/>
|
||||
<visualProperty default="0.0" name="EDGE_Z_ORDER"/>
|
||||
<visualProperty default="6.0" name="EDGE_SOURCE_ARROW_SIZE"/>
|
||||
<visualProperty default="#FFFF00" name="EDGE_TARGET_ARROW_SELECTED_PAINT"/>
|
||||
</edge>
|
||||
</visualStyle>
|
||||
</vizmap>
|
||||
2335
notebooks/timeline_analysis.ipynb
Normal file
2335
notebooks/timeline_analysis.ipynb
Normal file
File diff suppressed because one or more lines are too long
824
notebooks/truncate_dataset.ipynb
Normal file
824
notebooks/truncate_dataset.ipynb
Normal file
@@ -0,0 +1,824 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "79034f9b-adae-4066-a35f-b0e7fd38055f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\foersterflorian\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.helpers:Loaded TOML config file successfully.\n",
|
||||
"INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
|
||||
"INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from ihm_analyse.lib.preprocess import load_raw_data\n",
|
||||
"from ihm_analyse import load_pickle\n",
|
||||
"from ihm_analyse.predefined_pipes import pipe_merge"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "af94968f-ae6c-402b-b866-cb6c15b81cef",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pwd = os.getcwd()\n",
|
||||
"pwd = Path(pwd)\n",
|
||||
"p = pwd / '01_03_Rohdaten_202403/'\n",
|
||||
"p"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "753daf9e-0209-4a13-b458-1048c8b2bfbf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export6 - 43306 Zeilen.csv'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export8 - 708 Zeilen.csv'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export9 - 8176 Zeilen.csv'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_trunc.csv')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"folder = list(p.glob(r'*.csv'))\n",
|
||||
"folder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50d9ef9c-c56b-4d5b-9dfd-c02b68a29288",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "52186a59-69f2-4ed2-8d19-dac76e50526a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "5b76284b-bcc3-4b31-9ece-bde35b22b717",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_59499_Zeilen.csv')"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"path_to_dataset = folder[1]\n",
|
||||
"path_to_dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "2701b8d9-657c-4d7a-b103-b8c8b1865224",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.preprocess:Loaded dataset successfully.\n",
|
||||
"INFO:ihm_analyse.preprocess:Dataset properties: number of entries: 59499, number of features 20\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"(data,) = load_raw_data(path_to_dataset)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "74d7b72e-3cab-46e2-bca2-67c70b9221c7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"17849"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"percentage_trunc = 0.3\n",
|
||||
"num_entries_trunc = int(len(data) * percentage_trunc)\n",
|
||||
"num_entries_trunc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "62d2fa0e-baa6-4d7c-bd37-5fdbe21005d3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 17849 entries, 0 to 17848\n",
|
||||
"Data columns (total 20 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 VorgangsID 17849 non-null int64 \n",
|
||||
" 1 ObjektID 17849 non-null int64 \n",
|
||||
" 2 HObjektText 17848 non-null object \n",
|
||||
" 3 ObjektArtID 17849 non-null int64 \n",
|
||||
" 4 ObjektArtText 17849 non-null object \n",
|
||||
" 5 VorgangsTypID 17849 non-null int64 \n",
|
||||
" 6 VorgangsTypName 17849 non-null object \n",
|
||||
" 7 VorgangsDatum 17849 non-null datetime64[ns]\n",
|
||||
" 8 VorgangsStatusId 17849 non-null int64 \n",
|
||||
" 9 VorgangsPrioritaet 17849 non-null int64 \n",
|
||||
" 10 VorgangsBeschreibung 15988 non-null object \n",
|
||||
" 11 VorgangsOrt 0 non-null float64 \n",
|
||||
" 12 VorgangsArtText 17849 non-null object \n",
|
||||
" 13 ErledigungsDatum 17849 non-null datetime64[ns]\n",
|
||||
" 14 ErledigungsArtText 11879 non-null object \n",
|
||||
" 15 ErledigungsBeschreibung 9916 non-null object \n",
|
||||
" 16 MPMelderArbeitsplatz 3 non-null object \n",
|
||||
" 17 MPAbteilungBezeichnung 3 non-null object \n",
|
||||
" 18 Arbeitsbeginn 1920 non-null datetime64[ns]\n",
|
||||
" 19 ErstellungsDatum 17849 non-null datetime64[ns]\n",
|
||||
"dtypes: datetime64[ns](4), float64(1), int64(6), object(9)\n",
|
||||
"memory usage: 2.7+ MB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data_trunc = data.iloc[:num_entries_trunc].copy()\n",
|
||||
"data_trunc.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "6a15fb8c-e3b7-4c92-b73d-788b337d6251",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/01_03_Rohdaten_202403/Export7_trunc.csv')"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"saving_path = p / 'Export7_trunc.csv'\n",
|
||||
"saving_path"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "fb912634-cefa-4b8d-a370-37f6c8178f5a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data_trunc.to_csv(\n",
|
||||
" path_or_buf=saving_path,\n",
|
||||
" sep=';', \n",
|
||||
" encoding='cp1252', \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "30085691-aa23-478c-8d65-d3e6800c7c77",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85733e19-6c52-479c-a8f0-3872bdbd5bfd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ae65a019-26a8-45c9-bfb9-2662b84ff2f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7ba722ae-51b8-4e8d-9a1a-917098a3f70e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "356f7d32-446e-4dc1-aa83-a0b816742087",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Token_Analysis-TokenGraph.pickle')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res_path = pwd / 'results/Export7_trunc/'\n",
|
||||
"contents = list(res_path.glob(r'*.pickle'))\n",
|
||||
"contents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "e4415e9c-6ebb-46d9-b06a-eb67df56689e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"preproc_data = contents[1]\n",
|
||||
"last_step = contents[-1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"id": "2b29672a-c573-4d09-8601-e468a23bad0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n",
|
||||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ret_preproc_data = load_pickle(preproc_data)\n",
|
||||
"ret_idx_paris = load_pickle(last_step)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"id": "20c807da-e64f-4a48-8306-28a0a3dcfae9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyError",
|
||||
"evalue": "0",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[53], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m idx_pairs \u001b[38;5;241m=\u001b[39m \u001b[43mret_idx_paris\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[0;32m 2\u001b[0m preproc_data \u001b[38;5;241m=\u001b[39m ret_preproc_data[\u001b[38;5;241m0\u001b[39m]\n",
|
||||
"File \u001b[1;32m~\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\networkx\\classes\\graph.py:513\u001b[0m, in \u001b[0;36mGraph.__getitem__\u001b[1;34m(self, n)\u001b[0m\n\u001b[0;32m 489\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, n):\n\u001b[0;32m 490\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a dict of neighbors of node n. Use: 'G[n]'.\u001b[39;00m\n\u001b[0;32m 491\u001b[0m \n\u001b[0;32m 492\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 511\u001b[0m \u001b[38;5;124;03m AtlasView({1: {}})\u001b[39;00m\n\u001b[0;32m 512\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 513\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mn\u001b[49m\u001b[43m]\u001b[49m\n",
|
||||
"File \u001b[1;32m~\\mambaforge\\envs\\ihm2\\Lib\\site-packages\\networkx\\classes\\coreviews.py:81\u001b[0m, in \u001b[0;36mAdjacencyView.__getitem__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name):\n\u001b[1;32m---> 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m AtlasView(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_atlas\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m)\n",
|
||||
"\u001b[1;31mKeyError\u001b[0m: 0"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"idx_pairs = ret_idx_paris[0]\n",
|
||||
"preproc_data = ret_preproc_data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "1fca8442-c0e9-420f-9ad4-22a3b672dda3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.pipelines:Starting processing pipeline >>Merge_Duplicates<<...\n",
|
||||
"INFO:ihm_analyse.preprocess:Start merging of similarity candidates...\n",
|
||||
"INFO:ihm_analyse.graphs:Graph properties: 5465 Nodes, 71087 Edges\n",
|
||||
"INFO:ihm_analyse.graphs:Node memory: 149.43 KB\n",
|
||||
"INFO:ihm_analyse.graphs:Edge memory: 3887.57 KB\n",
|
||||
"INFO:ihm_analyse.graphs:Total memory: 4037.00 KB\n",
|
||||
"INFO:ihm_analyse.preprocess:Similarity candidates merged successfully.\n",
|
||||
"INFO:ihm_analyse.helpers:Saved file successfully under results\\Export7_trunc\\Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle\n",
|
||||
"INFO:ihm_analyse.pipelines:Processing pipeline >>Merge_Duplicates<< successfully ended.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ret = pipe_merge.run(starting_values=(preproc_data, idx_pairs))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "87bd8bba-b0c3-45a1-a9a8-b6bd279cf51f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>entry</th>\n",
|
||||
" <th>len</th>\n",
|
||||
" <th>num_occur</th>\n",
|
||||
" <th>assoc_obj_ids</th>\n",
|
||||
" <th>num_assoc_obj_ids</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>445</th>\n",
|
||||
" <td>Wartung nach Arbeitsplan, siehe Extradaten / A...</td>\n",
|
||||
" <td>52</td>\n",
|
||||
" <td>3435</td>\n",
|
||||
" <td>[563, 604, 616, 617, 15089, 15226, 15276, 1533...</td>\n",
|
||||
" <td>36</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>26</th>\n",
|
||||
" <td>I/W nach Liste</td>\n",
|
||||
" <td>14</td>\n",
|
||||
" <td>238</td>\n",
|
||||
" <td>[2363, 2364, 2367, 2368, 2369, 2370, 2371, 237...</td>\n",
|
||||
" <td>85</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2377</th>\n",
|
||||
" <td>1 Wöchentliche Wartung aller 3 Etikettendrucke...</td>\n",
|
||||
" <td>91</td>\n",
|
||||
" <td>535</td>\n",
|
||||
" <td>[111, 121, 127, 209, 219, 220, 221, 222, 236, ...</td>\n",
|
||||
" <td>73</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2380</th>\n",
|
||||
" <td>Infratech Meet Di + DO JourFix PT/InT</td>\n",
|
||||
" <td>38</td>\n",
|
||||
" <td>183</td>\n",
|
||||
" <td>28526</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4337</th>\n",
|
||||
" <td>24.05.2022 10:28:01 (Halm, Karl-Josef) Aktione...</td>\n",
|
||||
" <td>579</td>\n",
|
||||
" <td>3817</td>\n",
|
||||
" <td>[5, 7, 9, 13, 14, 15, 17, 18, 24, 25, 30, 32, ...</td>\n",
|
||||
" <td>754</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3061</th>\n",
|
||||
" <td>stopper schaltet nicht.</td>\n",
|
||||
" <td>23</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[15280]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3059</th>\n",
|
||||
" <td>12.09.2022 13:48:24 (Struzyna, Christian) Temp...</td>\n",
|
||||
" <td>127</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[12671]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3054</th>\n",
|
||||
" <td>08.09.2022 12:56:33 (Unruh, Jakob) Neue Serie ...</td>\n",
|
||||
" <td>262</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[273]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3053</th>\n",
|
||||
" <td>Preset-Punkt überprüfen und ggf. nachjustieren...</td>\n",
|
||||
" <td>148</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[273]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3050</th>\n",
|
||||
" <td>13.09.2022 08:05:40 (Betke, Gennadi) Griefer ...</td>\n",
|
||||
" <td>79</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>[15785]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>3627 rows × 5 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" entry len num_occur \\\n",
|
||||
"445 Wartung nach Arbeitsplan, siehe Extradaten / A... 52 3435 \n",
|
||||
"26 I/W nach Liste 14 238 \n",
|
||||
"2377 1 Wöchentliche Wartung aller 3 Etikettendrucke... 91 535 \n",
|
||||
"2380 Infratech Meet Di + DO JourFix PT/InT 38 183 \n",
|
||||
"4337 24.05.2022 10:28:01 (Halm, Karl-Josef) Aktione... 579 3817 \n",
|
||||
"... ... ... ... \n",
|
||||
"3061 stopper schaltet nicht. 23 1 \n",
|
||||
"3059 12.09.2022 13:48:24 (Struzyna, Christian) Temp... 127 1 \n",
|
||||
"3054 08.09.2022 12:56:33 (Unruh, Jakob) Neue Serie ... 262 1 \n",
|
||||
"3053 Preset-Punkt überprüfen und ggf. nachjustieren... 148 1 \n",
|
||||
"3050 13.09.2022 08:05:40 (Betke, Gennadi) Griefer ... 79 1 \n",
|
||||
"\n",
|
||||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||||
"445 [563, 604, 616, 617, 15089, 15226, 15276, 1533... 36 \n",
|
||||
"26 [2363, 2364, 2367, 2368, 2369, 2370, 2371, 237... 85 \n",
|
||||
"2377 [111, 121, 127, 209, 219, 220, 221, 222, 236, ... 73 \n",
|
||||
"2380 28526 1 \n",
|
||||
"4337 [5, 7, 9, 13, 14, 15, 17, 18, 24, 25, 30, 32, ... 754 \n",
|
||||
"... ... ... \n",
|
||||
"3061 [15280] 1 \n",
|
||||
"3059 [12671] 1 \n",
|
||||
"3054 [273] 1 \n",
|
||||
"3053 [273] 1 \n",
|
||||
"3050 [15785] 1 \n",
|
||||
"\n",
|
||||
"[3627 rows x 5 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ret[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "43e6d41c-7a49-4756-9629-0ec0ee6c5b7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "1a9969fa-6b0d-466a-bd4f-1ba5f4868873",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" int('23456')\n",
|
||||
"except ValueError:\n",
|
||||
" print('went wrong')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "76af32de-5f0a-4d7e-9751-5f2a38a7a69e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bd5cffa-0b09-45c7-bc15-0cd3082353d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "e8423609-c95d-42c8-99f3-95274fa52ae8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-3_remove_NA.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-TargetFeature_Step-5_analyse_feature.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-1_build_cosSim_matrix.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-2_filt_thresh_cosSim_matrix.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Embedding1_Step-3_list_cosSim_dupl_candidates.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Merge_Duplicates_Step-1_merge_similarity_dupl.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Pipe-Token_Analysis_Step-1_build_token_graph.pickle'),\n",
|
||||
" WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Token_Analysis-TokenGraph.pickle')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res_path = pwd / 'results/Export7_trunc/'\n",
|
||||
"contents = list(res_path.glob(r'*.pickle'))\n",
|
||||
"contents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "71fa1c2e-22cf-483a-964c-a5cca2bd3790",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc/Token_Analysis-TokenGraph.pickle')"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"path_to_graph = contents[-1]\n",
|
||||
"path_to_graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "9101beaf-6a7c-4987-9c44-141386966291",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.helpers:Loaded file successfully.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tk_graph = load_pickle(path_to_graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "e08954c3-9a5f-43c8-a98e-f9b8a74c3ff5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"TokenGraph(name: TokenGraph, number of nodes: 10536, number of edges: 48562)"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tk_graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "2a8d0abb-d68b-4c6e-80e9-b3b27998c8d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'num_nodes': 10536,\n",
|
||||
" 'num_edges': 46393,\n",
|
||||
" 'min_edge_weight': 1,\n",
|
||||
" 'max_edge_weight': 15374,\n",
|
||||
" 'node_memory': 652596,\n",
|
||||
" 'edge_memory': 2598008,\n",
|
||||
" 'total_memory': 3250604}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tk_graph.metadata_undirected"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "cc34c667-5a33-4061-a83a-50fc8c537b19",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tk_graph_filtered = tk_graph.filter_by_edge_weight(100)\n",
|
||||
"tk_graph_filtered = tk_graph_filtered.filter_by_node_degree(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "67df971c-fe7a-4f88-89ae-ba1366da1166",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'num_nodes': 289,\n",
|
||||
" 'num_edges': 457,\n",
|
||||
" 'min_edge_weight': 100,\n",
|
||||
" 'max_edge_weight': 15369,\n",
|
||||
" 'node_memory': 17674,\n",
|
||||
" 'edge_memory': 25592,\n",
|
||||
" 'total_memory': 43266}"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tk_graph_filtered.metadata_undirected"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "8c524312-aff4-47f4-801e-ad8112aa2a70",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WindowsPath('A:/Arbeitsaufgaben/Instandhaltung/results/Export7_trunc')"
|
||||
]
|
||||
},
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"save_path_graph = res_path\n",
|
||||
"save_path_graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "b62c888f-620d-4b29-924b-45ea17d99bc1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:ihm_analyse.graphs:Successfully saved graph as GraphML file under A:\\Arbeitsaufgaben\\Instandhaltung\\results\\Export7_trunc\\TokenGraph-filtered.graphml.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tk_graph_filtered.save_graph(save_path_graph, filename='TokenGraph-filtered')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ca21e2d3-dc5a-4117-8be9-d132ba2c8d28",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user