5035 lines
231 KiB
Plaintext
5035 lines
231 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "6983ff64-9fc3-4ed1-bcf3-4a17a5a89661",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Looking iteratively for config file. Start: A:\\Arbeitsaufgaben\\tom-plugin\\.venv\\Lib\\site-packages\\lang_main, stop folder: tom-plugin\n",
|
||
"Loaded TOML config file successfully.\n",
|
||
"Loaded config from: >>A:\\Arbeitsaufgaben\\tom-plugin\\lang_main_config.toml<<\n",
|
||
"Library path is: A:\\Arbeitsaufgaben\n",
|
||
"Root path is: A:\\Arbeitsaufgaben\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from pathlib import Path\n",
|
||
"import time\n",
|
||
"\n",
|
||
"from stfr import load_models\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "9a957bc4-b2b8-49e3-ae5b-0ef5a02e87d5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from lang_main.types import STFRModelTypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "92415402-0cbe-4a52-b177-ac2e3cf6903d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from lang_main.pipelines import predefined\n",
|
||
"from lang_main.analysis import preprocessing as preproc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "0583b0f0-ba00-4c82-9272-0b4062bcd777",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"DATA_PTH = Path(r'A:\\Arbeitsaufgaben\\lang-data\\in\\02_202307\\Export4.csv')\n",
|
||
"\n",
|
||
"def preprocess_data(path, num_entries):\n",
|
||
" assert path.exists()\n",
|
||
" pipe_target_feat = predefined.build_base_target_feature_pipe()\n",
|
||
" ret = pipe_target_feat.run(starting_values=(path,))\n",
|
||
" df = ret[0]\n",
|
||
" cropped = df.iloc[:num_entries]\n",
|
||
" entries = tuple(cropped['entry'])\n",
|
||
"\n",
|
||
" return entries"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3fc75632-2a66-41d8-8c79-4ab9bfa348b6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "d3912bad-3f8f-4012-8890-5b653fef416f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"models_benchmark = (\n",
|
||
" # STFRModelTypes.ALL_MPNET_BASE_V2,\n",
|
||
" # STFRModelTypes.PARAPHRASE_MULTI_MPNET_BASE_V2,\n",
|
||
" # STFRModelTypes.JINAAI_BASE_DE_V2,\n",
|
||
" STFRModelTypes.GERMAN_SEMANTIC_STS_V2,\n",
|
||
" STFRModelTypes.E5_BASE_STS_EN_DE,\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"id": "de89dbd6-6999-4434-b30d-f17bae169f3a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def benchmark_sims(model, docs, batch_size=32):\n",
|
||
" if model.max_seq_length > 1024:\n",
|
||
" model.max_seq_length = 1024\n",
|
||
"\n",
|
||
" t1 = time.perf_counter()\n",
|
||
" embds = model.encode(docs, convert_to_numpy=False, convert_to_tensor=True, batch_size=batch_size)\n",
|
||
" sims = model.similarity(embds, embds).numpy()\n",
|
||
" t2 = time.perf_counter()\n",
|
||
" encoding_dur = t2 - t1\n",
|
||
" \n",
|
||
" return sims, encoding_dur"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "a5d79042-7696-4523-8db5-41778da6cdca",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def run_benchmark(models, docs, iterations_per_model):\n",
|
||
" print(f'Benchmark for number of entries: {len(docs)}')\n",
|
||
" print(f'Iterations per model: {iterations_per_model}')\n",
|
||
" \n",
|
||
" for model_name in models:\n",
|
||
" times_with_load = []\n",
|
||
" times_encoding = []\n",
|
||
" model = load_models(model_name, trust_remote=True)\n",
|
||
" \n",
|
||
" for it in range(iterations_per_model):\n",
|
||
" t1 = time.perf_counter()\n",
|
||
" sims, encoding_dur = benchmark_sims(model, docs)\n",
|
||
" t2 = time.perf_counter()\n",
|
||
" duration = t2 - t1\n",
|
||
" times_with_load.append(duration)\n",
|
||
" times_encoding.append(encoding_dur)\n",
|
||
"\n",
|
||
" avg_time_with_load = np.mean(times_with_load)\n",
|
||
" avg_time_encoding = np.mean(times_encoding)\n",
|
||
" print(f'Avg time for model >{model_name}< was:\\t\\twith loading: {avg_time_with_load:.6f} s\\tencoding: {avg_time_encoding:.6f} s')\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "492b138e-f9fc-45d5-bcb7-9ae4cf63c572",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2025-01-15 12:04:37 +0000 | lang_main:base:INFO | Starting pipeline >>Target_Feature<<...\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | Loaded dataset successfully.\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | Dataset properties: number of entries: 129020, number of features 20\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | Number of duplicates over all features: 84\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | Number of duplicates over subset >>['VorgangsID', 'ObjektID']<<: 725\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | Removed all duplicates from dataset successfully.\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | New Dataset properties: number of entries: 128211, number of features 20\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:preprocessing:INFO | Removed NA entries for features >>['VorgangsBeschreibung']<< from dataset successfully.\n",
|
||
"2025-01-15 12:04:38 +0000 | lang_main:io:INFO | Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\Pipe-Target_Feature_Step-3_remove_NA.pkl\n",
|
||
"2025-01-15 12:04:39 +0000 | lang_main:shared:INFO | Successfully applied entry-wise cleansing procedure >>clean_string_slim<< for features >>['VorgangsBeschreibung']<<\n",
|
||
"2025-01-15 12:04:39 +0000 | lang_main:io:INFO | Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\TIMELINE.pkl\n",
|
||
"2025-01-15 12:04:39 +0000 | lang_main:preprocessing:INFO | Number of entries for feature >>VorgangsBeschreibung<<: 123457\n",
|
||
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6772/6772 [00:33<00:00, 202.39it/s]\n",
|
||
"2025-01-15 12:05:12 +0000 | lang_main:io:INFO | Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\Pipe-Target_Feature_Step-5_analyse_feature.pkl\n",
|
||
"2025-01-15 12:05:12 +0000 | lang_main:base:INFO | Processing pipeline >>Target_Feature<< successfully ended after 5 steps.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"docs = preprocess_data(DATA_PTH, num_entries=1000)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9c03e73e-f0b1-4189-984f-5201b45beb8f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "17644baf-6328-49ff-a75d-d9e80fe4ba7b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ITERATIONS_PER_MODEL = 3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"id": "bbc25e1e-ba41-48ad-b28c-08e1ec11e731",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"No sentence-transformers model found with name aari1995/German_Semantic_STS_V2. Creating a new one with mean pooling.\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Benchmark for number of entries: 1000\n",
|
||
"Iterations per model: 3\n",
|
||
"Avg time for model >aari1995/German_Semantic_STS_V2< was:\t\twith loading: 194.963239 s\tencoding: 194.963216 s\n",
|
||
"Avg time for model >danielheinz/e5-base-sts-en-de< was:\t\twith loading: 52.009722 s\tencoding: 52.009704 s\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"run_benchmark(models_benchmark, docs, ITERATIONS_PER_MODEL)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"id": "af7bdda8-96b3-447b-a0c2-69a6dc0de6eb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sentence_transformers.SentenceTransformer import SentenceTransformer\n",
|
||
"from sentence_transformers.backend import export_optimized_onnx_model, export_dynamic_quantized_onnx_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "da2185f2-0f1b-4cb5-8b82-b5a0e040b26d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_name = STFRModelTypes.E5_BASE_STS_EN_DE\n",
|
||
"save_path = Path(r'A:\\Arbeitsaufgaben\\lang-models\\self-onnx')\n",
|
||
"assert save_path.exists()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "932a3d30-2208-4724-9f59-7f0b80e0183e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"No 'model.onnx' found in 'danielheinz/e5-base-sts-en-de'. Exporting the model to ONNX.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "3e9c67b867954bbbb582a0e159f4261e",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"tokenizer_config.json: 0%| | 0.00/1.15k [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "b2f440c838c746679512196ed29335b7",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "f9485bf6598e4a3dab62b057072d74b0",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"tokenizer.json: 0%| | 0.00/17.1M [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "72d382d4b62243c0a4fac64202940f7e",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"special_tokens_map.json: 0%| | 0.00/280 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "04f82e46ad554feeb79bba45813b0762",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"config.json: 0%| | 0.00/756 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Saving the exported ONNX model is heavily recommended to avoid having to export it again. Do so with `model.push_to_hub(<STFRModelTypes.E5_BASE_STS_EN_DE: 'danielheinz/e5-base-sts-en-de'>, create_pr=True)`.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"model_onnx = SentenceTransformer(model_name, backend='onnx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "cce0e867-4a6f-4fed-affe-e111f408dcf4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"save_base = save_path / 'base'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "9ecf8adf-0f82-40d5-9c99-0cafa287adc6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_onnx.save_pretrained(str(save_base), safe_serialization=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "8f40b663-e2c3-4437-8595-4ae192e8f725",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#save_optimised = save_path / 'optimised'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "5358b1b7-f6a3-4b07-9f59-5dbe4d93b8de",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"A:\\Arbeitsaufgaben\\tom-plugin\\.venv\\Lib\\site-packages\\optimum\\onnxruntime\\configuration.py:779: FutureWarning: disable_embed_layer_norm will be deprecated soon, use disable_embed_layer_norm_fusion instead, disable_embed_layer_norm_fusion is set to True.\n",
|
||
" warnings.warn(\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"export_optimized_onnx_model(model_onnx, optimization_config='O3', model_name_or_path=str(save_base))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"id": "40550829-38b6-474f-bfa8-df933aafd210",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"onnx_path = save_base / 'onnx/model_O3.onnx'\n",
|
||
"assert onnx_path.exists()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"id": "e10d1cf5-245e-4086-a116-0ced74ee6a4c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_kwargs = {'file_name': 'onnx/model.onnx', 'provider': 'CPUExecutionProvider', 'export': False}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"id": "302c27cd-7cb5-4111-87f4-101bc93ab1fa",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_optim = SentenceTransformer(str(save_base), model_kwargs=model_kwargs, device='cpu', backend='onnx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"id": "9627593f-f7db-46fa-ba52-dad5069b5ff7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"export_dynamic_quantized_onnx_model(model_optim, quantization_config='avx2', model_name_or_path=str(save_base))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"id": "08d20a3d-d195-4b93-848b-d9bcebfa3ab6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_kwargs = {'file_name': 'onnx/model_quint8_avx2.onnx', 'provider': 'CPUExecutionProvider', 'export': False}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"id": "bf22883a-1cdc-40b0-a1c2-f36714527d33",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"The ONNX file model_quint8_avx2.onnx is not a regular name used in optimum.onnxruntime, the ORTModel might not behave as expected.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"model_quant = SentenceTransformer(str(save_base), model_kwargs=model_kwargs, device='cpu', backend='onnx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"id": "837cfe87-e510-49b8-8abd-9bb2726f5fba",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sims, dur = benchmark_sims(model_quant, docs, batch_size=32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"id": "e750a93a-fa41-4b9f-91a1-74864327f5d4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"54.369837799997185"
|
||
]
|
||
},
|
||
"execution_count": 91,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dur"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5a3856d1-b113-4b23-99c0-58874bc5da1c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "17da253d-5483-4d29-b2cc-544304169a49",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from pathlib import Path"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "3373fa44-11d8-4b7c-9819-d26b31f3a579",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"False"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"p = Path(r'A:\\Arbeitsaufgaben\\test-download\\lang-models')\n",
|
||
"p.exists()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 129,
|
||
"id": "2f83ffca-674f-4821-91f2-1a1e40fd3bf9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t = 'test/123'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 130,
|
||
"id": "db4c538d-96c8-4e74-8545-4b12f544c796",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'123'"
|
||
]
|
||
},
|
||
"execution_count": 130,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"t.split('/')[-1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b538a84f-aeda-4994-8f6c-0cac075cf3b1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"id": "44051880-f113-4068-81a6-ed661a7c295b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sims, dur = benchmark_sims(model_optim, docs, batch_size=16)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"id": "9406098c-943a-49fa-bf07-ce06a6611c00",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"90.01054789999944"
|
||
]
|
||
},
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dur"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"id": "f1cf84aa-0a45-4eb2-a5f9-736ba1f4b284",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_ref = SentenceTransformer('danielheinz/e5-base-sts-en-de')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"id": "2d448adc-5fab-49f9-acb4-fe0357cc92f4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"74.87911759999952"
|
||
]
|
||
},
|
||
"execution_count": 71,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sims, dur = benchmark_sims(model_ref, docs)\n",
|
||
"dur"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1868f9e2-951a-41fc-98bd-6f39664cf439",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"id": "ab429620-8873-462a-b4d3-24f9da8ad8ed",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_name_new = 'mixedbread-ai/deepset-mxbai-embed-de-large-v1'\n",
|
||
"model_name_ref = 'all-mpnet-base-v2'\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b2af2548-b5db-45a6-b832-b16431ffdf4d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"id": "ab48155f-ec33-4002-ab53-98753473b4fb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"docs = [\n",
|
||
" 'Ölleckage durch undichten Ölsumpf',\n",
|
||
" 'Überprüfung der Schwingungsdämpfer',\n",
|
||
" 'Überprüfung der Kühlmittelsysteme',\n",
|
||
" 'Blockierung der Förderschnecke',\n",
|
||
" 'Überhitzung durch mangelnde Kühlmittelzirkulation',\n",
|
||
" 'Überprüfung der Hydraulik',\n",
|
||
" 'Ich gehe spazieren',\n",
|
||
" 'Heute um zwölf war ich unterwegs',\n",
|
||
" 'Ich gehe mit dem Hund raus',\n",
|
||
" 'Ich laufe im Park',\n",
|
||
" 'Ich laufe im Pakr',\n",
|
||
"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"id": "e27c5c84-6570-43fd-a38f-c0bbcc7573a2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def calc_similarities(model_name, docs):\n",
|
||
" model = load_models(model_name)\n",
|
||
" embds = model.encode(docs)\n",
|
||
" sims = model.similarity(embds, embds).numpy()\n",
|
||
" df = pd.DataFrame(data=sims, index=docs, columns=docs)\n",
|
||
" \n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "c0ba1525-1d96-4b26-865a-3f25898b4d64",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.422875</td>\n",
|
||
" <td>0.393899</td>\n",
|
||
" <td>0.230690</td>\n",
|
||
" <td>0.521921</td>\n",
|
||
" <td>0.275985</td>\n",
|
||
" <td>0.286303</td>\n",
|
||
" <td>0.401496</td>\n",
|
||
" <td>0.289342</td>\n",
|
||
" <td>0.343332</td>\n",
|
||
" <td>0.322299</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.422875</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.519197</td>\n",
|
||
" <td>0.274956</td>\n",
|
||
" <td>0.489307</td>\n",
|
||
" <td>0.445541</td>\n",
|
||
" <td>0.302303</td>\n",
|
||
" <td>0.401394</td>\n",
|
||
" <td>0.216164</td>\n",
|
||
" <td>0.357596</td>\n",
|
||
" <td>0.284001</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.393899</td>\n",
|
||
" <td>0.519197</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.315556</td>\n",
|
||
" <td>0.706944</td>\n",
|
||
" <td>0.486024</td>\n",
|
||
" <td>0.298462</td>\n",
|
||
" <td>0.270384</td>\n",
|
||
" <td>0.224679</td>\n",
|
||
" <td>0.253847</td>\n",
|
||
" <td>0.260867</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.230690</td>\n",
|
||
" <td>0.274956</td>\n",
|
||
" <td>0.315556</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.302503</td>\n",
|
||
" <td>0.249371</td>\n",
|
||
" <td>0.301220</td>\n",
|
||
" <td>0.239805</td>\n",
|
||
" <td>0.181490</td>\n",
|
||
" <td>0.296925</td>\n",
|
||
" <td>0.239862</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.521921</td>\n",
|
||
" <td>0.489307</td>\n",
|
||
" <td>0.706944</td>\n",
|
||
" <td>0.302503</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.405318</td>\n",
|
||
" <td>0.315011</td>\n",
|
||
" <td>0.385622</td>\n",
|
||
" <td>0.295077</td>\n",
|
||
" <td>0.295949</td>\n",
|
||
" <td>0.316965</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.275985</td>\n",
|
||
" <td>0.445541</td>\n",
|
||
" <td>0.486024</td>\n",
|
||
" <td>0.249371</td>\n",
|
||
" <td>0.405318</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.213862</td>\n",
|
||
" <td>0.186064</td>\n",
|
||
" <td>0.199054</td>\n",
|
||
" <td>0.183982</td>\n",
|
||
" <td>0.144403</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <td>0.286303</td>\n",
|
||
" <td>0.302303</td>\n",
|
||
" <td>0.298462</td>\n",
|
||
" <td>0.301220</td>\n",
|
||
" <td>0.315011</td>\n",
|
||
" <td>0.213862</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.467547</td>\n",
|
||
" <td>0.545281</td>\n",
|
||
" <td>0.554993</td>\n",
|
||
" <td>0.480685</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.401496</td>\n",
|
||
" <td>0.401394</td>\n",
|
||
" <td>0.270384</td>\n",
|
||
" <td>0.239805</td>\n",
|
||
" <td>0.385622</td>\n",
|
||
" <td>0.186064</td>\n",
|
||
" <td>0.467547</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.489523</td>\n",
|
||
" <td>0.481612</td>\n",
|
||
" <td>0.413731</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.289342</td>\n",
|
||
" <td>0.216164</td>\n",
|
||
" <td>0.224679</td>\n",
|
||
" <td>0.181490</td>\n",
|
||
" <td>0.295077</td>\n",
|
||
" <td>0.199054</td>\n",
|
||
" <td>0.545281</td>\n",
|
||
" <td>0.489523</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.524824</td>\n",
|
||
" <td>0.517329</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <td>0.343332</td>\n",
|
||
" <td>0.357596</td>\n",
|
||
" <td>0.253847</td>\n",
|
||
" <td>0.296925</td>\n",
|
||
" <td>0.295949</td>\n",
|
||
" <td>0.183982</td>\n",
|
||
" <td>0.554993</td>\n",
|
||
" <td>0.481612</td>\n",
|
||
" <td>0.524824</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.693493</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" <td>0.322299</td>\n",
|
||
" <td>0.284001</td>\n",
|
||
" <td>0.260867</td>\n",
|
||
" <td>0.239862</td>\n",
|
||
" <td>0.316965</td>\n",
|
||
" <td>0.144403</td>\n",
|
||
" <td>0.480685</td>\n",
|
||
" <td>0.413731</td>\n",
|
||
" <td>0.517329</td>\n",
|
||
" <td>0.693493</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Ölleckage durch undichten Ölsumpf \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.422875 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.393899 \n",
|
||
"Blockierung der Förderschnecke 0.230690 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.521921 \n",
|
||
"Überprüfung der Hydraulik 0.275985 \n",
|
||
"Ich gehe spazieren 0.286303 \n",
|
||
"Heute um zwölf war ich unterwegs 0.401496 \n",
|
||
"Ich gehe mit dem Hund raus 0.289342 \n",
|
||
"Ich laufe im Park 0.343332 \n",
|
||
"Ich laufe im Pakr 0.322299 \n",
|
||
"\n",
|
||
" Überprüfung der Schwingungsdämpfer \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.422875 \n",
|
||
"Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.519197 \n",
|
||
"Blockierung der Förderschnecke 0.274956 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.489307 \n",
|
||
"Überprüfung der Hydraulik 0.445541 \n",
|
||
"Ich gehe spazieren 0.302303 \n",
|
||
"Heute um zwölf war ich unterwegs 0.401394 \n",
|
||
"Ich gehe mit dem Hund raus 0.216164 \n",
|
||
"Ich laufe im Park 0.357596 \n",
|
||
"Ich laufe im Pakr 0.284001 \n",
|
||
"\n",
|
||
" Überprüfung der Kühlmittelsysteme \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.393899 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.519197 \n",
|
||
"Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"Blockierung der Förderschnecke 0.315556 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.706944 \n",
|
||
"Überprüfung der Hydraulik 0.486024 \n",
|
||
"Ich gehe spazieren 0.298462 \n",
|
||
"Heute um zwölf war ich unterwegs 0.270384 \n",
|
||
"Ich gehe mit dem Hund raus 0.224679 \n",
|
||
"Ich laufe im Park 0.253847 \n",
|
||
"Ich laufe im Pakr 0.260867 \n",
|
||
"\n",
|
||
" Blockierung der Förderschnecke \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.230690 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.274956 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.315556 \n",
|
||
"Blockierung der Förderschnecke 1.000000 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.302503 \n",
|
||
"Überprüfung der Hydraulik 0.249371 \n",
|
||
"Ich gehe spazieren 0.301220 \n",
|
||
"Heute um zwölf war ich unterwegs 0.239805 \n",
|
||
"Ich gehe mit dem Hund raus 0.181490 \n",
|
||
"Ich laufe im Park 0.296925 \n",
|
||
"Ich laufe im Pakr 0.239862 \n",
|
||
"\n",
|
||
" Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.521921 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.489307 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.706944 \n",
|
||
"Blockierung der Förderschnecke 0.302503 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 1.000000 \n",
|
||
"Überprüfung der Hydraulik 0.405318 \n",
|
||
"Ich gehe spazieren 0.315011 \n",
|
||
"Heute um zwölf war ich unterwegs 0.385622 \n",
|
||
"Ich gehe mit dem Hund raus 0.295077 \n",
|
||
"Ich laufe im Park 0.295949 \n",
|
||
"Ich laufe im Pakr 0.316965 \n",
|
||
"\n",
|
||
" Überprüfung der Hydraulik \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.275985 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.445541 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.486024 \n",
|
||
"Blockierung der Förderschnecke 0.249371 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.405318 \n",
|
||
"Überprüfung der Hydraulik 1.000000 \n",
|
||
"Ich gehe spazieren 0.213862 \n",
|
||
"Heute um zwölf war ich unterwegs 0.186064 \n",
|
||
"Ich gehe mit dem Hund raus 0.199054 \n",
|
||
"Ich laufe im Park 0.183982 \n",
|
||
"Ich laufe im Pakr 0.144403 \n",
|
||
"\n",
|
||
" Ich gehe spazieren \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.286303 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.302303 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.298462 \n",
|
||
"Blockierung der Förderschnecke 0.301220 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.315011 \n",
|
||
"Überprüfung der Hydraulik 0.213862 \n",
|
||
"Ich gehe spazieren 1.000000 \n",
|
||
"Heute um zwölf war ich unterwegs 0.467547 \n",
|
||
"Ich gehe mit dem Hund raus 0.545281 \n",
|
||
"Ich laufe im Park 0.554993 \n",
|
||
"Ich laufe im Pakr 0.480685 \n",
|
||
"\n",
|
||
" Heute um zwölf war ich unterwegs \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.401496 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.401394 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.270384 \n",
|
||
"Blockierung der Förderschnecke 0.239805 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.385622 \n",
|
||
"Überprüfung der Hydraulik 0.186064 \n",
|
||
"Ich gehe spazieren 0.467547 \n",
|
||
"Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"Ich gehe mit dem Hund raus 0.489523 \n",
|
||
"Ich laufe im Park 0.481612 \n",
|
||
"Ich laufe im Pakr 0.413731 \n",
|
||
"\n",
|
||
" Ich gehe mit dem Hund raus \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.289342 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.216164 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.224679 \n",
|
||
"Blockierung der Förderschnecke 0.181490 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.295077 \n",
|
||
"Überprüfung der Hydraulik 0.199054 \n",
|
||
"Ich gehe spazieren 0.545281 \n",
|
||
"Heute um zwölf war ich unterwegs 0.489523 \n",
|
||
"Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"Ich laufe im Park 0.524824 \n",
|
||
"Ich laufe im Pakr 0.517329 \n",
|
||
"\n",
|
||
" Ich laufe im Park \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.343332 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.357596 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.253847 \n",
|
||
"Blockierung der Förderschnecke 0.296925 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.295949 \n",
|
||
"Überprüfung der Hydraulik 0.183982 \n",
|
||
"Ich gehe spazieren 0.554993 \n",
|
||
"Heute um zwölf war ich unterwegs 0.481612 \n",
|
||
"Ich gehe mit dem Hund raus 0.524824 \n",
|
||
"Ich laufe im Park 1.000000 \n",
|
||
"Ich laufe im Pakr 0.693493 \n",
|
||
"\n",
|
||
" Ich laufe im Pakr \n",
|
||
"Ölleckage durch undichten Ölsumpf 0.322299 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.284001 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.260867 \n",
|
||
"Blockierung der Förderschnecke 0.239862 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.316965 \n",
|
||
"Überprüfung der Hydraulik 0.144403 \n",
|
||
"Ich gehe spazieren 0.480685 \n",
|
||
"Heute um zwölf war ich unterwegs 0.413731 \n",
|
||
"Ich gehe mit dem Hund raus 0.517329 \n",
|
||
"Ich laufe im Park 0.693493 \n",
|
||
"Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"calc_similarities(model_name_ref, docs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"id": "10b98b5a-21eb-4cc6-aadb-8ba7f7bd9192",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['Ölleckage durch undichten Ölsumpf',\n",
|
||
" 'Überprüfung der Schwingungsdämpfer',\n",
|
||
" 'Überprüfung der Kühlmittelsysteme',\n",
|
||
" 'Blockierung der Förderschnecke',\n",
|
||
" 'Überhitzung durch mangelnde Kühlmittelzirkulation',\n",
|
||
" 'Überprüfung der Hydraulik',\n",
|
||
" 'Ich gehe spazieren',\n",
|
||
" 'Heute um zwölf war ich unterwegs',\n",
|
||
" 'Ich gehe mit dem Hund raus',\n",
|
||
" 'Ich laufe im Park',\n",
|
||
" 'Ich laufe im Pakr']"
|
||
]
|
||
},
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"docs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"id": "976f1c18-476a-4790-865e-d41839485575",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['passage: Ölleckage durch undichten Ölsumpf',\n",
|
||
" 'passage: Überprüfung der Schwingungsdämpfer',\n",
|
||
" 'passage: Überprüfung der Kühlmittelsysteme',\n",
|
||
" 'passage: Blockierung der Förderschnecke',\n",
|
||
" 'passage: Überhitzung durch mangelnde Kühlmittelzirkulation',\n",
|
||
" 'passage: Überprüfung der Hydraulik',\n",
|
||
" 'passage: Ich gehe spazieren',\n",
|
||
" 'passage: Heute um zwölf war ich unterwegs',\n",
|
||
" 'passage: Ich gehe mit dem Hund raus',\n",
|
||
" 'passage: Ich laufe im Park',\n",
|
||
" 'passage: Ich laufe im Pakr']"
|
||
]
|
||
},
|
||
"execution_count": 70,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# adaption to new model\n",
|
||
"query = 'query: Retrieve semantically similar text: '\n",
|
||
"new_docs = []\n",
|
||
"for doc in docs:\n",
|
||
" new_doc = 'passage: ' + doc\n",
|
||
" new_docs.append(new_doc)\n",
|
||
"\n",
|
||
"new_docs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9022b95b-ac87-4dad-af16-91cb5677c582",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "c4fa6646-816c-4bb2-bb00-f8cfa0a5a154",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sentence_transformers.SentenceTransformer import SentenceTransformer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "c8bd3ce2-bf0b-4016-9d53-d51dd8d4d996",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dimensions = 1024\n",
|
||
"model = SentenceTransformer(STFRModelTypes.PARAPHRASE_MULTI_MPNET_BASE_V2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"id": "23304a9f-4026-4465-9049-88c97ea7bcdf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = model_quant"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "627b3c40-44b3-4d0d-88ca-6a8be2c2a266",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"id": "750946d3-61f3-487e-8e28-fe629cc1f6a4",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>passage: Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>passage: Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>passage: Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>passage: Blockierung der Förderschnecke</th>\n",
|
||
" <th>passage: Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>passage: Überprüfung der Hydraulik</th>\n",
|
||
" <th>passage: Ich gehe spazieren</th>\n",
|
||
" <th>passage: Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>passage: Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>passage: Ich laufe im Park</th>\n",
|
||
" <th>passage: Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.779681</td>\n",
|
||
" <td>0.784307</td>\n",
|
||
" <td>0.823721</td>\n",
|
||
" <td>0.830782</td>\n",
|
||
" <td>0.804758</td>\n",
|
||
" <td>0.670097</td>\n",
|
||
" <td>0.705543</td>\n",
|
||
" <td>0.686085</td>\n",
|
||
" <td>0.657670</td>\n",
|
||
" <td>0.724280</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.779681</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.896381</td>\n",
|
||
" <td>0.840342</td>\n",
|
||
" <td>0.785065</td>\n",
|
||
" <td>0.899921</td>\n",
|
||
" <td>0.703496</td>\n",
|
||
" <td>0.718232</td>\n",
|
||
" <td>0.688768</td>\n",
|
||
" <td>0.698420</td>\n",
|
||
" <td>0.731654</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.784307</td>\n",
|
||
" <td>0.896381</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.796926</td>\n",
|
||
" <td>0.833327</td>\n",
|
||
" <td>0.886805</td>\n",
|
||
" <td>0.656722</td>\n",
|
||
" <td>0.712874</td>\n",
|
||
" <td>0.667584</td>\n",
|
||
" <td>0.659513</td>\n",
|
||
" <td>0.721201</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.823721</td>\n",
|
||
" <td>0.840342</td>\n",
|
||
" <td>0.796926</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.793991</td>\n",
|
||
" <td>0.829363</td>\n",
|
||
" <td>0.668617</td>\n",
|
||
" <td>0.691422</td>\n",
|
||
" <td>0.687467</td>\n",
|
||
" <td>0.687888</td>\n",
|
||
" <td>0.734692</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.830782</td>\n",
|
||
" <td>0.785065</td>\n",
|
||
" <td>0.833327</td>\n",
|
||
" <td>0.793991</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.788585</td>\n",
|
||
" <td>0.674641</td>\n",
|
||
" <td>0.702350</td>\n",
|
||
" <td>0.690236</td>\n",
|
||
" <td>0.683493</td>\n",
|
||
" <td>0.741511</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.804758</td>\n",
|
||
" <td>0.899921</td>\n",
|
||
" <td>0.886805</td>\n",
|
||
" <td>0.829363</td>\n",
|
||
" <td>0.788585</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.690285</td>\n",
|
||
" <td>0.726948</td>\n",
|
||
" <td>0.705504</td>\n",
|
||
" <td>0.698003</td>\n",
|
||
" <td>0.739768</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich gehe spazieren</th>\n",
|
||
" <td>0.670097</td>\n",
|
||
" <td>0.703496</td>\n",
|
||
" <td>0.656722</td>\n",
|
||
" <td>0.668617</td>\n",
|
||
" <td>0.674641</td>\n",
|
||
" <td>0.690285</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.782636</td>\n",
|
||
" <td>0.839147</td>\n",
|
||
" <td>0.842675</td>\n",
|
||
" <td>0.794528</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.705543</td>\n",
|
||
" <td>0.718232</td>\n",
|
||
" <td>0.712874</td>\n",
|
||
" <td>0.691422</td>\n",
|
||
" <td>0.702350</td>\n",
|
||
" <td>0.726948</td>\n",
|
||
" <td>0.782636</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.752759</td>\n",
|
||
" <td>0.727476</td>\n",
|
||
" <td>0.754443</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.686085</td>\n",
|
||
" <td>0.688768</td>\n",
|
||
" <td>0.667584</td>\n",
|
||
" <td>0.687467</td>\n",
|
||
" <td>0.690236</td>\n",
|
||
" <td>0.705504</td>\n",
|
||
" <td>0.839147</td>\n",
|
||
" <td>0.752759</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.769349</td>\n",
|
||
" <td>0.764451</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich laufe im Park</th>\n",
|
||
" <td>0.657670</td>\n",
|
||
" <td>0.698420</td>\n",
|
||
" <td>0.659513</td>\n",
|
||
" <td>0.687888</td>\n",
|
||
" <td>0.683493</td>\n",
|
||
" <td>0.698003</td>\n",
|
||
" <td>0.842675</td>\n",
|
||
" <td>0.727476</td>\n",
|
||
" <td>0.769349</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.884060</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich laufe im Pakr</th>\n",
|
||
" <td>0.724280</td>\n",
|
||
" <td>0.731654</td>\n",
|
||
" <td>0.721201</td>\n",
|
||
" <td>0.734692</td>\n",
|
||
" <td>0.741511</td>\n",
|
||
" <td>0.739768</td>\n",
|
||
" <td>0.794528</td>\n",
|
||
" <td>0.754443</td>\n",
|
||
" <td>0.764451</td>\n",
|
||
" <td>0.884060</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" passage: Ölleckage durch undichten Ölsumpf \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.779681 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.784307 \n",
|
||
"passage: Blockierung der Förderschnecke 0.823721 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.830782 \n",
|
||
"passage: Überprüfung der Hydraulik 0.804758 \n",
|
||
"passage: Ich gehe spazieren 0.670097 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.705543 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.686085 \n",
|
||
"passage: Ich laufe im Park 0.657670 \n",
|
||
"passage: Ich laufe im Pakr 0.724280 \n",
|
||
"\n",
|
||
" passage: Überprüfung der Schwingungsdämpfer \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.779681 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.896381 \n",
|
||
"passage: Blockierung der Förderschnecke 0.840342 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.785065 \n",
|
||
"passage: Überprüfung der Hydraulik 0.899921 \n",
|
||
"passage: Ich gehe spazieren 0.703496 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.718232 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.688768 \n",
|
||
"passage: Ich laufe im Park 0.698420 \n",
|
||
"passage: Ich laufe im Pakr 0.731654 \n",
|
||
"\n",
|
||
" passage: Überprüfung der Kühlmittelsysteme \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.784307 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.896381 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"passage: Blockierung der Förderschnecke 0.796926 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.833327 \n",
|
||
"passage: Überprüfung der Hydraulik 0.886805 \n",
|
||
"passage: Ich gehe spazieren 0.656722 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.712874 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.667584 \n",
|
||
"passage: Ich laufe im Park 0.659513 \n",
|
||
"passage: Ich laufe im Pakr 0.721201 \n",
|
||
"\n",
|
||
" passage: Blockierung der Förderschnecke \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.823721 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.840342 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.796926 \n",
|
||
"passage: Blockierung der Förderschnecke 1.000000 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.793991 \n",
|
||
"passage: Überprüfung der Hydraulik 0.829363 \n",
|
||
"passage: Ich gehe spazieren 0.668617 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.691422 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.687467 \n",
|
||
"passage: Ich laufe im Park 0.687888 \n",
|
||
"passage: Ich laufe im Pakr 0.734692 \n",
|
||
"\n",
|
||
" passage: Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.830782 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.785065 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.833327 \n",
|
||
"passage: Blockierung der Förderschnecke 0.793991 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 1.000000 \n",
|
||
"passage: Überprüfung der Hydraulik 0.788585 \n",
|
||
"passage: Ich gehe spazieren 0.674641 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.702350 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.690236 \n",
|
||
"passage: Ich laufe im Park 0.683493 \n",
|
||
"passage: Ich laufe im Pakr 0.741511 \n",
|
||
"\n",
|
||
" passage: Überprüfung der Hydraulik \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.804758 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.899921 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.886805 \n",
|
||
"passage: Blockierung der Förderschnecke 0.829363 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.788585 \n",
|
||
"passage: Überprüfung der Hydraulik 1.000000 \n",
|
||
"passage: Ich gehe spazieren 0.690285 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.726948 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.705504 \n",
|
||
"passage: Ich laufe im Park 0.698003 \n",
|
||
"passage: Ich laufe im Pakr 0.739768 \n",
|
||
"\n",
|
||
" passage: Ich gehe spazieren \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.670097 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.703496 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.656722 \n",
|
||
"passage: Blockierung der Förderschnecke 0.668617 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.674641 \n",
|
||
"passage: Überprüfung der Hydraulik 0.690285 \n",
|
||
"passage: Ich gehe spazieren 1.000000 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.782636 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.839147 \n",
|
||
"passage: Ich laufe im Park 0.842675 \n",
|
||
"passage: Ich laufe im Pakr 0.794528 \n",
|
||
"\n",
|
||
" passage: Heute um zwölf war ich unterwegs \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.705543 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.718232 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.712874 \n",
|
||
"passage: Blockierung der Förderschnecke 0.691422 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.702350 \n",
|
||
"passage: Überprüfung der Hydraulik 0.726948 \n",
|
||
"passage: Ich gehe spazieren 0.782636 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.752759 \n",
|
||
"passage: Ich laufe im Park 0.727476 \n",
|
||
"passage: Ich laufe im Pakr 0.754443 \n",
|
||
"\n",
|
||
" passage: Ich gehe mit dem Hund raus \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.686085 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.688768 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.667584 \n",
|
||
"passage: Blockierung der Förderschnecke 0.687467 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.690236 \n",
|
||
"passage: Überprüfung der Hydraulik 0.705504 \n",
|
||
"passage: Ich gehe spazieren 0.839147 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.752759 \n",
|
||
"passage: Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"passage: Ich laufe im Park 0.769349 \n",
|
||
"passage: Ich laufe im Pakr 0.764451 \n",
|
||
"\n",
|
||
" passage: Ich laufe im Park \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.657670 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.698420 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.659513 \n",
|
||
"passage: Blockierung der Förderschnecke 0.687888 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.683493 \n",
|
||
"passage: Überprüfung der Hydraulik 0.698003 \n",
|
||
"passage: Ich gehe spazieren 0.842675 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.727476 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.769349 \n",
|
||
"passage: Ich laufe im Park 1.000000 \n",
|
||
"passage: Ich laufe im Pakr 0.884060 \n",
|
||
"\n",
|
||
" passage: Ich laufe im Pakr \n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.724280 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.731654 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.721201 \n",
|
||
"passage: Blockierung der Förderschnecke 0.734692 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.741511 \n",
|
||
"passage: Überprüfung der Hydraulik 0.739768 \n",
|
||
"passage: Ich gehe spazieren 0.794528 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.754443 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.764451 \n",
|
||
"passage: Ich laufe im Park 0.884060 \n",
|
||
"passage: Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#model = load_models(model_name_new)\n",
|
||
"embds = model.encode(new_docs, convert_to_numpy=False, convert_to_tensor=True)\n",
|
||
"sims = model.similarity(embds, embds).numpy()\n",
|
||
"sims.shape\n",
|
||
"df_new_model = pd.DataFrame(data=sims, index=new_docs, columns=new_docs)\n",
|
||
"df_new_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "57871f69-4478-4593-9931-717cf0f8e1ba",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([[1.0000001 , 0.77968144, 0.7843068 , 0.8237207 , 0.830782 ,\n",
|
||
" 0.8047581 , 0.6700973 , 0.7055429 , 0.6860854 , 0.6576705 ,\n",
|
||
" 0.72428024],\n",
|
||
" [0.77968144, 1. , 0.8963811 , 0.84034187, 0.7850649 ,\n",
|
||
" 0.8999206 , 0.7034964 , 0.7182317 , 0.68876797, 0.69842 ,\n",
|
||
" 0.73165405],\n",
|
||
" [0.7843068 , 0.8963811 , 1. , 0.7969257 , 0.8333273 ,\n",
|
||
" 0.8868046 , 0.6567219 , 0.71287364, 0.66758376, 0.6595131 ,\n",
|
||
" 0.7212007 ],\n",
|
||
" [0.8237207 , 0.84034187, 0.7969257 , 1.0000001 , 0.793991 ,\n",
|
||
" 0.82936305, 0.66861665, 0.6914221 , 0.68746734, 0.68788785,\n",
|
||
" 0.73469234],\n",
|
||
" [0.830782 , 0.7850649 , 0.8333273 , 0.793991 , 1.0000001 ,\n",
|
||
" 0.7885847 , 0.6746406 , 0.7023505 , 0.6902363 , 0.68349344,\n",
|
||
" 0.7415106 ],\n",
|
||
" [0.8047581 , 0.8999206 , 0.8868046 , 0.82936305, 0.7885847 ,\n",
|
||
" 1.0000002 , 0.6902847 , 0.72694767, 0.7055044 , 0.6980032 ,\n",
|
||
" 0.7397681 ],\n",
|
||
" [0.6700973 , 0.7034964 , 0.6567219 , 0.66861665, 0.6746406 ,\n",
|
||
" 0.6902847 , 1.0000002 , 0.782636 , 0.83914703, 0.842675 ,\n",
|
||
" 0.79452753],\n",
|
||
" [0.7055429 , 0.7182317 , 0.71287364, 0.6914221 , 0.7023505 ,\n",
|
||
" 0.72694767, 0.782636 , 0.9999998 , 0.752759 , 0.72747564,\n",
|
||
" 0.75444347],\n",
|
||
" [0.6860854 , 0.68876797, 0.66758376, 0.68746734, 0.6902363 ,\n",
|
||
" 0.7055044 , 0.83914703, 0.752759 , 1. , 0.76934934,\n",
|
||
" 0.76445085],\n",
|
||
" [0.6576705 , 0.69842 , 0.6595131 , 0.68788785, 0.68349344,\n",
|
||
" 0.6980032 , 0.842675 , 0.72747564, 0.76934934, 1.0000004 ,\n",
|
||
" 0.8840596 ],\n",
|
||
" [0.72428024, 0.73165405, 0.7212007 , 0.73469234, 0.7415106 ,\n",
|
||
" 0.7397681 , 0.79452753, 0.75444347, 0.76445085, 0.8840596 ,\n",
|
||
" 0.99999964]], dtype=float32)"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sims"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 139,
|
||
"id": "7877138b-26de-4c37-953e-223c859ed33e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['Ölleckage durch undichten Ölsumpf',\n",
|
||
" 'Überprüfung der Schwingungsdämpfer',\n",
|
||
" 'Überprüfung der Kühlmittelsysteme',\n",
|
||
" 'Blockierung der Förderschnecke',\n",
|
||
" 'Überhitzung durch mangelnde Kühlmittelzirkulation',\n",
|
||
" 'Überprüfung der Hydraulik',\n",
|
||
" 'Ich gehe spazieren',\n",
|
||
" 'Heute um zwölf war ich unterwegs',\n",
|
||
" 'Ich gehe mit dem Hund raus',\n",
|
||
" 'Ich laufe im Park',\n",
|
||
" 'Ich laufe im Pakr']"
|
||
]
|
||
},
|
||
"execution_count": 139,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"docs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "c50973c9-d6d0-41e7-9ed1-f89b63e27cc4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"No sentence-transformers model found with name aari1995/German_Semantic_STS_V2. Creating a new one with mean pooling.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"model_alt = load_models('aari1995/German_Semantic_STS_V2')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "c6166df4-4d04-451c-b218-49a26aada14f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def load_alt_model(model_name, docs):\n",
|
||
" model_alt = load_models(model_name)\n",
|
||
" embds = model_alt.encode(docs, convert_to_numpy=False, convert_to_tensor=True)\n",
|
||
" sims = model_alt.similarity(embds, embds).numpy()\n",
|
||
" df_alt_model = pd.DataFrame(data=sims, index=docs, columns=docs)\n",
|
||
" \n",
|
||
" return df_alt_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "12711499-ac0c-455a-a4c6-04b1efca9fbe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.422875</td>\n",
|
||
" <td>0.393899</td>\n",
|
||
" <td>0.230690</td>\n",
|
||
" <td>0.521921</td>\n",
|
||
" <td>0.275985</td>\n",
|
||
" <td>0.286303</td>\n",
|
||
" <td>0.401496</td>\n",
|
||
" <td>0.289342</td>\n",
|
||
" <td>0.343332</td>\n",
|
||
" <td>0.322299</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.422875</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.519197</td>\n",
|
||
" <td>0.274956</td>\n",
|
||
" <td>0.489307</td>\n",
|
||
" <td>0.445541</td>\n",
|
||
" <td>0.302303</td>\n",
|
||
" <td>0.401394</td>\n",
|
||
" <td>0.216164</td>\n",
|
||
" <td>0.357596</td>\n",
|
||
" <td>0.284001</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.393899</td>\n",
|
||
" <td>0.519197</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.315556</td>\n",
|
||
" <td>0.706944</td>\n",
|
||
" <td>0.486024</td>\n",
|
||
" <td>0.298462</td>\n",
|
||
" <td>0.270384</td>\n",
|
||
" <td>0.224679</td>\n",
|
||
" <td>0.253847</td>\n",
|
||
" <td>0.260867</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.230690</td>\n",
|
||
" <td>0.274956</td>\n",
|
||
" <td>0.315556</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.302503</td>\n",
|
||
" <td>0.249371</td>\n",
|
||
" <td>0.301220</td>\n",
|
||
" <td>0.239805</td>\n",
|
||
" <td>0.181490</td>\n",
|
||
" <td>0.296925</td>\n",
|
||
" <td>0.239862</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.521921</td>\n",
|
||
" <td>0.489307</td>\n",
|
||
" <td>0.706944</td>\n",
|
||
" <td>0.302503</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.405318</td>\n",
|
||
" <td>0.315011</td>\n",
|
||
" <td>0.385622</td>\n",
|
||
" <td>0.295077</td>\n",
|
||
" <td>0.295949</td>\n",
|
||
" <td>0.316965</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.275985</td>\n",
|
||
" <td>0.445541</td>\n",
|
||
" <td>0.486024</td>\n",
|
||
" <td>0.249371</td>\n",
|
||
" <td>0.405318</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.213862</td>\n",
|
||
" <td>0.186064</td>\n",
|
||
" <td>0.199054</td>\n",
|
||
" <td>0.183982</td>\n",
|
||
" <td>0.144403</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <td>0.286303</td>\n",
|
||
" <td>0.302303</td>\n",
|
||
" <td>0.298462</td>\n",
|
||
" <td>0.301220</td>\n",
|
||
" <td>0.315011</td>\n",
|
||
" <td>0.213862</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.467547</td>\n",
|
||
" <td>0.545281</td>\n",
|
||
" <td>0.554993</td>\n",
|
||
" <td>0.480685</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.401496</td>\n",
|
||
" <td>0.401394</td>\n",
|
||
" <td>0.270384</td>\n",
|
||
" <td>0.239805</td>\n",
|
||
" <td>0.385622</td>\n",
|
||
" <td>0.186064</td>\n",
|
||
" <td>0.467547</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.489523</td>\n",
|
||
" <td>0.481612</td>\n",
|
||
" <td>0.413731</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.289342</td>\n",
|
||
" <td>0.216164</td>\n",
|
||
" <td>0.224679</td>\n",
|
||
" <td>0.181490</td>\n",
|
||
" <td>0.295077</td>\n",
|
||
" <td>0.199054</td>\n",
|
||
" <td>0.545281</td>\n",
|
||
" <td>0.489523</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.524824</td>\n",
|
||
" <td>0.517329</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <td>0.343332</td>\n",
|
||
" <td>0.357596</td>\n",
|
||
" <td>0.253847</td>\n",
|
||
" <td>0.296925</td>\n",
|
||
" <td>0.295949</td>\n",
|
||
" <td>0.183982</td>\n",
|
||
" <td>0.554993</td>\n",
|
||
" <td>0.481612</td>\n",
|
||
" <td>0.524824</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.693493</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" <td>0.322299</td>\n",
|
||
" <td>0.284001</td>\n",
|
||
" <td>0.260867</td>\n",
|
||
" <td>0.239862</td>\n",
|
||
" <td>0.316965</td>\n",
|
||
" <td>0.144403</td>\n",
|
||
" <td>0.480685</td>\n",
|
||
" <td>0.413731</td>\n",
|
||
" <td>0.517329</td>\n",
|
||
" <td>0.693493</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Ölleckage durch undichten Ölsumpf \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.422875 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.393899 \n",
|
||
"Blockierung der Förderschnecke 0.230690 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.521921 \n",
|
||
"Überprüfung der Hydraulik 0.275985 \n",
|
||
"Ich gehe spazieren 0.286303 \n",
|
||
"Heute um zwölf war ich unterwegs 0.401496 \n",
|
||
"Ich gehe mit dem Hund raus 0.289342 \n",
|
||
"Ich laufe im Park 0.343332 \n",
|
||
"Ich laufe im Pakr 0.322299 \n",
|
||
"\n",
|
||
" Überprüfung der Schwingungsdämpfer \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.422875 \n",
|
||
"Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.519197 \n",
|
||
"Blockierung der Förderschnecke 0.274956 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.489307 \n",
|
||
"Überprüfung der Hydraulik 0.445541 \n",
|
||
"Ich gehe spazieren 0.302303 \n",
|
||
"Heute um zwölf war ich unterwegs 0.401394 \n",
|
||
"Ich gehe mit dem Hund raus 0.216164 \n",
|
||
"Ich laufe im Park 0.357596 \n",
|
||
"Ich laufe im Pakr 0.284001 \n",
|
||
"\n",
|
||
" Überprüfung der Kühlmittelsysteme \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.393899 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.519197 \n",
|
||
"Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"Blockierung der Förderschnecke 0.315556 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.706944 \n",
|
||
"Überprüfung der Hydraulik 0.486024 \n",
|
||
"Ich gehe spazieren 0.298462 \n",
|
||
"Heute um zwölf war ich unterwegs 0.270384 \n",
|
||
"Ich gehe mit dem Hund raus 0.224679 \n",
|
||
"Ich laufe im Park 0.253847 \n",
|
||
"Ich laufe im Pakr 0.260867 \n",
|
||
"\n",
|
||
" Blockierung der Förderschnecke \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.230690 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.274956 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.315556 \n",
|
||
"Blockierung der Förderschnecke 1.000000 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.302503 \n",
|
||
"Überprüfung der Hydraulik 0.249371 \n",
|
||
"Ich gehe spazieren 0.301220 \n",
|
||
"Heute um zwölf war ich unterwegs 0.239805 \n",
|
||
"Ich gehe mit dem Hund raus 0.181490 \n",
|
||
"Ich laufe im Park 0.296925 \n",
|
||
"Ich laufe im Pakr 0.239862 \n",
|
||
"\n",
|
||
" Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.521921 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.489307 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.706944 \n",
|
||
"Blockierung der Förderschnecke 0.302503 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 1.000000 \n",
|
||
"Überprüfung der Hydraulik 0.405318 \n",
|
||
"Ich gehe spazieren 0.315011 \n",
|
||
"Heute um zwölf war ich unterwegs 0.385622 \n",
|
||
"Ich gehe mit dem Hund raus 0.295077 \n",
|
||
"Ich laufe im Park 0.295949 \n",
|
||
"Ich laufe im Pakr 0.316965 \n",
|
||
"\n",
|
||
" Überprüfung der Hydraulik \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.275985 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.445541 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.486024 \n",
|
||
"Blockierung der Förderschnecke 0.249371 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.405318 \n",
|
||
"Überprüfung der Hydraulik 1.000000 \n",
|
||
"Ich gehe spazieren 0.213862 \n",
|
||
"Heute um zwölf war ich unterwegs 0.186064 \n",
|
||
"Ich gehe mit dem Hund raus 0.199054 \n",
|
||
"Ich laufe im Park 0.183982 \n",
|
||
"Ich laufe im Pakr 0.144403 \n",
|
||
"\n",
|
||
" Ich gehe spazieren \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.286303 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.302303 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.298462 \n",
|
||
"Blockierung der Förderschnecke 0.301220 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.315011 \n",
|
||
"Überprüfung der Hydraulik 0.213862 \n",
|
||
"Ich gehe spazieren 1.000000 \n",
|
||
"Heute um zwölf war ich unterwegs 0.467547 \n",
|
||
"Ich gehe mit dem Hund raus 0.545281 \n",
|
||
"Ich laufe im Park 0.554993 \n",
|
||
"Ich laufe im Pakr 0.480685 \n",
|
||
"\n",
|
||
" Heute um zwölf war ich unterwegs \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.401496 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.401394 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.270384 \n",
|
||
"Blockierung der Förderschnecke 0.239805 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.385622 \n",
|
||
"Überprüfung der Hydraulik 0.186064 \n",
|
||
"Ich gehe spazieren 0.467547 \n",
|
||
"Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"Ich gehe mit dem Hund raus 0.489523 \n",
|
||
"Ich laufe im Park 0.481612 \n",
|
||
"Ich laufe im Pakr 0.413731 \n",
|
||
"\n",
|
||
" Ich gehe mit dem Hund raus \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.289342 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.216164 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.224679 \n",
|
||
"Blockierung der Förderschnecke 0.181490 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.295077 \n",
|
||
"Überprüfung der Hydraulik 0.199054 \n",
|
||
"Ich gehe spazieren 0.545281 \n",
|
||
"Heute um zwölf war ich unterwegs 0.489523 \n",
|
||
"Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"Ich laufe im Park 0.524824 \n",
|
||
"Ich laufe im Pakr 0.517329 \n",
|
||
"\n",
|
||
" Ich laufe im Park \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.343332 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.357596 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.253847 \n",
|
||
"Blockierung der Förderschnecke 0.296925 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.295949 \n",
|
||
"Überprüfung der Hydraulik 0.183982 \n",
|
||
"Ich gehe spazieren 0.554993 \n",
|
||
"Heute um zwölf war ich unterwegs 0.481612 \n",
|
||
"Ich gehe mit dem Hund raus 0.524824 \n",
|
||
"Ich laufe im Park 1.000000 \n",
|
||
"Ich laufe im Pakr 0.693493 \n",
|
||
"\n",
|
||
" Ich laufe im Pakr \n",
|
||
"Ölleckage durch undichten Ölsumpf 0.322299 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.284001 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.260867 \n",
|
||
"Blockierung der Förderschnecke 0.239862 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.316965 \n",
|
||
"Überprüfung der Hydraulik 0.144403 \n",
|
||
"Ich gehe spazieren 0.480685 \n",
|
||
"Heute um zwölf war ich unterwegs 0.413731 \n",
|
||
"Ich gehe mit dem Hund raus 0.517329 \n",
|
||
"Ich laufe im Park 0.693493 \n",
|
||
"Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_ref = calc_similarities(model_name_ref, docs)\n",
|
||
"df_ref"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "8bfd0f03-b6ac-4327-8242-1e46aa1b11ce",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>passage: Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>passage: Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>passage: Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>passage: Blockierung der Förderschnecke</th>\n",
|
||
" <th>passage: Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>passage: Überprüfung der Hydraulik</th>\n",
|
||
" <th>passage: Ich gehe spazieren</th>\n",
|
||
" <th>passage: Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>passage: Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>passage: Ich laufe im Park</th>\n",
|
||
" <th>passage: Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.779681</td>\n",
|
||
" <td>0.784307</td>\n",
|
||
" <td>0.823721</td>\n",
|
||
" <td>0.830782</td>\n",
|
||
" <td>0.804758</td>\n",
|
||
" <td>0.670097</td>\n",
|
||
" <td>0.705543</td>\n",
|
||
" <td>0.686085</td>\n",
|
||
" <td>0.657670</td>\n",
|
||
" <td>0.724280</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.779681</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.896381</td>\n",
|
||
" <td>0.840342</td>\n",
|
||
" <td>0.785065</td>\n",
|
||
" <td>0.899921</td>\n",
|
||
" <td>0.703496</td>\n",
|
||
" <td>0.718232</td>\n",
|
||
" <td>0.688768</td>\n",
|
||
" <td>0.698420</td>\n",
|
||
" <td>0.731654</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.784307</td>\n",
|
||
" <td>0.896381</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.796926</td>\n",
|
||
" <td>0.833327</td>\n",
|
||
" <td>0.886805</td>\n",
|
||
" <td>0.656722</td>\n",
|
||
" <td>0.712874</td>\n",
|
||
" <td>0.667584</td>\n",
|
||
" <td>0.659513</td>\n",
|
||
" <td>0.721201</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.823721</td>\n",
|
||
" <td>0.840342</td>\n",
|
||
" <td>0.796926</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.793991</td>\n",
|
||
" <td>0.829363</td>\n",
|
||
" <td>0.668617</td>\n",
|
||
" <td>0.691422</td>\n",
|
||
" <td>0.687467</td>\n",
|
||
" <td>0.687888</td>\n",
|
||
" <td>0.734692</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.830782</td>\n",
|
||
" <td>0.785065</td>\n",
|
||
" <td>0.833327</td>\n",
|
||
" <td>0.793991</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.788585</td>\n",
|
||
" <td>0.674641</td>\n",
|
||
" <td>0.702350</td>\n",
|
||
" <td>0.690236</td>\n",
|
||
" <td>0.683493</td>\n",
|
||
" <td>0.741511</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.804758</td>\n",
|
||
" <td>0.899921</td>\n",
|
||
" <td>0.886805</td>\n",
|
||
" <td>0.829363</td>\n",
|
||
" <td>0.788585</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.690285</td>\n",
|
||
" <td>0.726948</td>\n",
|
||
" <td>0.705504</td>\n",
|
||
" <td>0.698003</td>\n",
|
||
" <td>0.739768</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich gehe spazieren</th>\n",
|
||
" <td>0.670097</td>\n",
|
||
" <td>0.703496</td>\n",
|
||
" <td>0.656722</td>\n",
|
||
" <td>0.668617</td>\n",
|
||
" <td>0.674641</td>\n",
|
||
" <td>0.690285</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.782636</td>\n",
|
||
" <td>0.839147</td>\n",
|
||
" <td>0.842675</td>\n",
|
||
" <td>0.794528</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.705543</td>\n",
|
||
" <td>0.718232</td>\n",
|
||
" <td>0.712874</td>\n",
|
||
" <td>0.691422</td>\n",
|
||
" <td>0.702350</td>\n",
|
||
" <td>0.726948</td>\n",
|
||
" <td>0.782636</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.752759</td>\n",
|
||
" <td>0.727476</td>\n",
|
||
" <td>0.754443</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.686085</td>\n",
|
||
" <td>0.688768</td>\n",
|
||
" <td>0.667584</td>\n",
|
||
" <td>0.687467</td>\n",
|
||
" <td>0.690236</td>\n",
|
||
" <td>0.705504</td>\n",
|
||
" <td>0.839147</td>\n",
|
||
" <td>0.752759</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.769349</td>\n",
|
||
" <td>0.764451</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich laufe im Park</th>\n",
|
||
" <td>0.657670</td>\n",
|
||
" <td>0.698420</td>\n",
|
||
" <td>0.659513</td>\n",
|
||
" <td>0.687888</td>\n",
|
||
" <td>0.683493</td>\n",
|
||
" <td>0.698003</td>\n",
|
||
" <td>0.842675</td>\n",
|
||
" <td>0.727476</td>\n",
|
||
" <td>0.769349</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.884060</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>passage: Ich laufe im Pakr</th>\n",
|
||
" <td>0.724280</td>\n",
|
||
" <td>0.731654</td>\n",
|
||
" <td>0.721201</td>\n",
|
||
" <td>0.734692</td>\n",
|
||
" <td>0.741511</td>\n",
|
||
" <td>0.739768</td>\n",
|
||
" <td>0.794528</td>\n",
|
||
" <td>0.754443</td>\n",
|
||
" <td>0.764451</td>\n",
|
||
" <td>0.884060</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" passage: Ölleckage durch undichten Ölsumpf \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.779681 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.784307 \n",
|
||
"passage: Blockierung der Förderschnecke 0.823721 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.830782 \n",
|
||
"passage: Überprüfung der Hydraulik 0.804758 \n",
|
||
"passage: Ich gehe spazieren 0.670097 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.705543 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.686085 \n",
|
||
"passage: Ich laufe im Park 0.657670 \n",
|
||
"passage: Ich laufe im Pakr 0.724280 \n",
|
||
"\n",
|
||
" passage: Überprüfung der Schwingungsdämpfer \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.779681 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.896381 \n",
|
||
"passage: Blockierung der Förderschnecke 0.840342 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.785065 \n",
|
||
"passage: Überprüfung der Hydraulik 0.899921 \n",
|
||
"passage: Ich gehe spazieren 0.703496 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.718232 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.688768 \n",
|
||
"passage: Ich laufe im Park 0.698420 \n",
|
||
"passage: Ich laufe im Pakr 0.731654 \n",
|
||
"\n",
|
||
" passage: Überprüfung der Kühlmittelsysteme \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.784307 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.896381 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"passage: Blockierung der Förderschnecke 0.796926 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.833327 \n",
|
||
"passage: Überprüfung der Hydraulik 0.886805 \n",
|
||
"passage: Ich gehe spazieren 0.656722 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.712874 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.667584 \n",
|
||
"passage: Ich laufe im Park 0.659513 \n",
|
||
"passage: Ich laufe im Pakr 0.721201 \n",
|
||
"\n",
|
||
" passage: Blockierung der Förderschnecke \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.823721 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.840342 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.796926 \n",
|
||
"passage: Blockierung der Förderschnecke 1.000000 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.793991 \n",
|
||
"passage: Überprüfung der Hydraulik 0.829363 \n",
|
||
"passage: Ich gehe spazieren 0.668617 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.691422 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.687467 \n",
|
||
"passage: Ich laufe im Park 0.687888 \n",
|
||
"passage: Ich laufe im Pakr 0.734692 \n",
|
||
"\n",
|
||
" passage: Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.830782 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.785065 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.833327 \n",
|
||
"passage: Blockierung der Förderschnecke 0.793991 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 1.000000 \n",
|
||
"passage: Überprüfung der Hydraulik 0.788585 \n",
|
||
"passage: Ich gehe spazieren 0.674641 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.702350 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.690236 \n",
|
||
"passage: Ich laufe im Park 0.683493 \n",
|
||
"passage: Ich laufe im Pakr 0.741511 \n",
|
||
"\n",
|
||
" passage: Überprüfung der Hydraulik \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.804758 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.899921 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.886805 \n",
|
||
"passage: Blockierung der Förderschnecke 0.829363 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.788585 \n",
|
||
"passage: Überprüfung der Hydraulik 1.000000 \n",
|
||
"passage: Ich gehe spazieren 0.690285 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.726948 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.705504 \n",
|
||
"passage: Ich laufe im Park 0.698003 \n",
|
||
"passage: Ich laufe im Pakr 0.739768 \n",
|
||
"\n",
|
||
" passage: Ich gehe spazieren \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.670097 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.703496 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.656722 \n",
|
||
"passage: Blockierung der Förderschnecke 0.668617 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.674641 \n",
|
||
"passage: Überprüfung der Hydraulik 0.690285 \n",
|
||
"passage: Ich gehe spazieren 1.000000 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.782636 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.839147 \n",
|
||
"passage: Ich laufe im Park 0.842675 \n",
|
||
"passage: Ich laufe im Pakr 0.794528 \n",
|
||
"\n",
|
||
" passage: Heute um zwölf war ich unterwegs \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.705543 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.718232 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.712874 \n",
|
||
"passage: Blockierung der Förderschnecke 0.691422 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.702350 \n",
|
||
"passage: Überprüfung der Hydraulik 0.726948 \n",
|
||
"passage: Ich gehe spazieren 0.782636 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.752759 \n",
|
||
"passage: Ich laufe im Park 0.727476 \n",
|
||
"passage: Ich laufe im Pakr 0.754443 \n",
|
||
"\n",
|
||
" passage: Ich gehe mit dem Hund raus \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.686085 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.688768 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.667584 \n",
|
||
"passage: Blockierung der Förderschnecke 0.687467 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.690236 \n",
|
||
"passage: Überprüfung der Hydraulik 0.705504 \n",
|
||
"passage: Ich gehe spazieren 0.839147 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.752759 \n",
|
||
"passage: Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"passage: Ich laufe im Park 0.769349 \n",
|
||
"passage: Ich laufe im Pakr 0.764451 \n",
|
||
"\n",
|
||
" passage: Ich laufe im Park \\\n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.657670 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.698420 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.659513 \n",
|
||
"passage: Blockierung der Förderschnecke 0.687888 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.683493 \n",
|
||
"passage: Überprüfung der Hydraulik 0.698003 \n",
|
||
"passage: Ich gehe spazieren 0.842675 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.727476 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.769349 \n",
|
||
"passage: Ich laufe im Park 1.000000 \n",
|
||
"passage: Ich laufe im Pakr 0.884060 \n",
|
||
"\n",
|
||
" passage: Ich laufe im Pakr \n",
|
||
"passage: Ölleckage durch undichten Ölsumpf 0.724280 \n",
|
||
"passage: Überprüfung der Schwingungsdämpfer 0.731654 \n",
|
||
"passage: Überprüfung der Kühlmittelsysteme 0.721201 \n",
|
||
"passage: Blockierung der Förderschnecke 0.734692 \n",
|
||
"passage: Überhitzung durch mangelnde Kühlmittel... 0.741511 \n",
|
||
"passage: Überprüfung der Hydraulik 0.739768 \n",
|
||
"passage: Ich gehe spazieren 0.794528 \n",
|
||
"passage: Heute um zwölf war ich unterwegs 0.754443 \n",
|
||
"passage: Ich gehe mit dem Hund raus 0.764451 \n",
|
||
"passage: Ich laufe im Park 0.884060 \n",
|
||
"passage: Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_new_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "d50deccd-08b8-4bdb-8411-d23c7a4d41f3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"No sentence-transformers model found with name aari1995/German_Semantic_STS_V2. Creating a new one with mean pooling.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.503683</td>\n",
|
||
" <td>0.541287</td>\n",
|
||
" <td>0.588917</td>\n",
|
||
" <td>0.611160</td>\n",
|
||
" <td>0.597156</td>\n",
|
||
" <td>0.440487</td>\n",
|
||
" <td>0.447261</td>\n",
|
||
" <td>0.434366</td>\n",
|
||
" <td>0.482150</td>\n",
|
||
" <td>0.536017</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.503683</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.687819</td>\n",
|
||
" <td>0.584369</td>\n",
|
||
" <td>0.551573</td>\n",
|
||
" <td>0.674399</td>\n",
|
||
" <td>0.474264</td>\n",
|
||
" <td>0.466305</td>\n",
|
||
" <td>0.437665</td>\n",
|
||
" <td>0.480334</td>\n",
|
||
" <td>0.504029</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.541287</td>\n",
|
||
" <td>0.687819</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.613677</td>\n",
|
||
" <td>0.766551</td>\n",
|
||
" <td>0.779090</td>\n",
|
||
" <td>0.434425</td>\n",
|
||
" <td>0.462885</td>\n",
|
||
" <td>0.410406</td>\n",
|
||
" <td>0.478038</td>\n",
|
||
" <td>0.470977</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.588917</td>\n",
|
||
" <td>0.584369</td>\n",
|
||
" <td>0.613677</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.616717</td>\n",
|
||
" <td>0.589420</td>\n",
|
||
" <td>0.460361</td>\n",
|
||
" <td>0.459320</td>\n",
|
||
" <td>0.411345</td>\n",
|
||
" <td>0.511038</td>\n",
|
||
" <td>0.511924</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.611160</td>\n",
|
||
" <td>0.551573</td>\n",
|
||
" <td>0.766551</td>\n",
|
||
" <td>0.616717</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.563489</td>\n",
|
||
" <td>0.475599</td>\n",
|
||
" <td>0.522965</td>\n",
|
||
" <td>0.406199</td>\n",
|
||
" <td>0.491175</td>\n",
|
||
" <td>0.512755</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.597156</td>\n",
|
||
" <td>0.674399</td>\n",
|
||
" <td>0.779090</td>\n",
|
||
" <td>0.589420</td>\n",
|
||
" <td>0.563489</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.448907</td>\n",
|
||
" <td>0.427661</td>\n",
|
||
" <td>0.434419</td>\n",
|
||
" <td>0.468770</td>\n",
|
||
" <td>0.490589</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <td>0.440487</td>\n",
|
||
" <td>0.474264</td>\n",
|
||
" <td>0.434425</td>\n",
|
||
" <td>0.460361</td>\n",
|
||
" <td>0.475599</td>\n",
|
||
" <td>0.448907</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.709718</td>\n",
|
||
" <td>0.708631</td>\n",
|
||
" <td>0.801886</td>\n",
|
||
" <td>0.719697</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.447261</td>\n",
|
||
" <td>0.466305</td>\n",
|
||
" <td>0.462885</td>\n",
|
||
" <td>0.459320</td>\n",
|
||
" <td>0.522965</td>\n",
|
||
" <td>0.427661</td>\n",
|
||
" <td>0.709718</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.570469</td>\n",
|
||
" <td>0.565510</td>\n",
|
||
" <td>0.590207</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.434366</td>\n",
|
||
" <td>0.437665</td>\n",
|
||
" <td>0.410406</td>\n",
|
||
" <td>0.411345</td>\n",
|
||
" <td>0.406199</td>\n",
|
||
" <td>0.434419</td>\n",
|
||
" <td>0.708631</td>\n",
|
||
" <td>0.570469</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.597758</td>\n",
|
||
" <td>0.544430</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <td>0.482150</td>\n",
|
||
" <td>0.480334</td>\n",
|
||
" <td>0.478038</td>\n",
|
||
" <td>0.511038</td>\n",
|
||
" <td>0.491175</td>\n",
|
||
" <td>0.468770</td>\n",
|
||
" <td>0.801886</td>\n",
|
||
" <td>0.565510</td>\n",
|
||
" <td>0.597758</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.840089</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" <td>0.536017</td>\n",
|
||
" <td>0.504029</td>\n",
|
||
" <td>0.470977</td>\n",
|
||
" <td>0.511924</td>\n",
|
||
" <td>0.512755</td>\n",
|
||
" <td>0.490589</td>\n",
|
||
" <td>0.719697</td>\n",
|
||
" <td>0.590207</td>\n",
|
||
" <td>0.544430</td>\n",
|
||
" <td>0.840089</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Ölleckage durch undichten Ölsumpf \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.503683 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.541287 \n",
|
||
"Blockierung der Förderschnecke 0.588917 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.611160 \n",
|
||
"Überprüfung der Hydraulik 0.597156 \n",
|
||
"Ich gehe spazieren 0.440487 \n",
|
||
"Heute um zwölf war ich unterwegs 0.447261 \n",
|
||
"Ich gehe mit dem Hund raus 0.434366 \n",
|
||
"Ich laufe im Park 0.482150 \n",
|
||
"Ich laufe im Pakr 0.536017 \n",
|
||
"\n",
|
||
" Überprüfung der Schwingungsdämpfer \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.503683 \n",
|
||
"Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.687819 \n",
|
||
"Blockierung der Förderschnecke 0.584369 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.551573 \n",
|
||
"Überprüfung der Hydraulik 0.674399 \n",
|
||
"Ich gehe spazieren 0.474264 \n",
|
||
"Heute um zwölf war ich unterwegs 0.466305 \n",
|
||
"Ich gehe mit dem Hund raus 0.437665 \n",
|
||
"Ich laufe im Park 0.480334 \n",
|
||
"Ich laufe im Pakr 0.504029 \n",
|
||
"\n",
|
||
" Überprüfung der Kühlmittelsysteme \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.541287 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.687819 \n",
|
||
"Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"Blockierung der Förderschnecke 0.613677 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.766551 \n",
|
||
"Überprüfung der Hydraulik 0.779090 \n",
|
||
"Ich gehe spazieren 0.434425 \n",
|
||
"Heute um zwölf war ich unterwegs 0.462885 \n",
|
||
"Ich gehe mit dem Hund raus 0.410406 \n",
|
||
"Ich laufe im Park 0.478038 \n",
|
||
"Ich laufe im Pakr 0.470977 \n",
|
||
"\n",
|
||
" Blockierung der Förderschnecke \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.588917 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.584369 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.613677 \n",
|
||
"Blockierung der Förderschnecke 1.000000 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.616717 \n",
|
||
"Überprüfung der Hydraulik 0.589420 \n",
|
||
"Ich gehe spazieren 0.460361 \n",
|
||
"Heute um zwölf war ich unterwegs 0.459320 \n",
|
||
"Ich gehe mit dem Hund raus 0.411345 \n",
|
||
"Ich laufe im Park 0.511038 \n",
|
||
"Ich laufe im Pakr 0.511924 \n",
|
||
"\n",
|
||
" Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.611160 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.551573 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.766551 \n",
|
||
"Blockierung der Förderschnecke 0.616717 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 1.000000 \n",
|
||
"Überprüfung der Hydraulik 0.563489 \n",
|
||
"Ich gehe spazieren 0.475599 \n",
|
||
"Heute um zwölf war ich unterwegs 0.522965 \n",
|
||
"Ich gehe mit dem Hund raus 0.406199 \n",
|
||
"Ich laufe im Park 0.491175 \n",
|
||
"Ich laufe im Pakr 0.512755 \n",
|
||
"\n",
|
||
" Überprüfung der Hydraulik \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.597156 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.674399 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.779090 \n",
|
||
"Blockierung der Förderschnecke 0.589420 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.563489 \n",
|
||
"Überprüfung der Hydraulik 1.000000 \n",
|
||
"Ich gehe spazieren 0.448907 \n",
|
||
"Heute um zwölf war ich unterwegs 0.427661 \n",
|
||
"Ich gehe mit dem Hund raus 0.434419 \n",
|
||
"Ich laufe im Park 0.468770 \n",
|
||
"Ich laufe im Pakr 0.490589 \n",
|
||
"\n",
|
||
" Ich gehe spazieren \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.440487 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.474264 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.434425 \n",
|
||
"Blockierung der Förderschnecke 0.460361 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.475599 \n",
|
||
"Überprüfung der Hydraulik 0.448907 \n",
|
||
"Ich gehe spazieren 1.000000 \n",
|
||
"Heute um zwölf war ich unterwegs 0.709718 \n",
|
||
"Ich gehe mit dem Hund raus 0.708631 \n",
|
||
"Ich laufe im Park 0.801886 \n",
|
||
"Ich laufe im Pakr 0.719697 \n",
|
||
"\n",
|
||
" Heute um zwölf war ich unterwegs \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.447261 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.466305 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.462885 \n",
|
||
"Blockierung der Förderschnecke 0.459320 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.522965 \n",
|
||
"Überprüfung der Hydraulik 0.427661 \n",
|
||
"Ich gehe spazieren 0.709718 \n",
|
||
"Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"Ich gehe mit dem Hund raus 0.570469 \n",
|
||
"Ich laufe im Park 0.565510 \n",
|
||
"Ich laufe im Pakr 0.590207 \n",
|
||
"\n",
|
||
" Ich gehe mit dem Hund raus \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.434366 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.437665 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.410406 \n",
|
||
"Blockierung der Förderschnecke 0.411345 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.406199 \n",
|
||
"Überprüfung der Hydraulik 0.434419 \n",
|
||
"Ich gehe spazieren 0.708631 \n",
|
||
"Heute um zwölf war ich unterwegs 0.570469 \n",
|
||
"Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"Ich laufe im Park 0.597758 \n",
|
||
"Ich laufe im Pakr 0.544430 \n",
|
||
"\n",
|
||
" Ich laufe im Park \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.482150 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.480334 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.478038 \n",
|
||
"Blockierung der Förderschnecke 0.511038 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.491175 \n",
|
||
"Überprüfung der Hydraulik 0.468770 \n",
|
||
"Ich gehe spazieren 0.801886 \n",
|
||
"Heute um zwölf war ich unterwegs 0.565510 \n",
|
||
"Ich gehe mit dem Hund raus 0.597758 \n",
|
||
"Ich laufe im Park 1.000000 \n",
|
||
"Ich laufe im Pakr 0.840089 \n",
|
||
"\n",
|
||
" Ich laufe im Pakr \n",
|
||
"Ölleckage durch undichten Ölsumpf 0.536017 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.504029 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.470977 \n",
|
||
"Blockierung der Förderschnecke 0.511924 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.512755 \n",
|
||
"Überprüfung der Hydraulik 0.490589 \n",
|
||
"Ich gehe spazieren 0.719697 \n",
|
||
"Heute um zwölf war ich unterwegs 0.590207 \n",
|
||
"Ich gehe mit dem Hund raus 0.544430 \n",
|
||
"Ich laufe im Park 0.840089 \n",
|
||
"Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"MODEL = 'aari1995/German_Semantic_STS_V2'\n",
|
||
"df_alt_model = load_alt_model(MODEL, docs)\n",
|
||
"df_alt_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "fd2e618c-2f83-4946-9c84-8852a3daebb7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.536319</td>\n",
|
||
" <td>0.429156</td>\n",
|
||
" <td>0.552233</td>\n",
|
||
" <td>0.434099</td>\n",
|
||
" <td>0.650479</td>\n",
|
||
" <td>0.162975</td>\n",
|
||
" <td>0.234531</td>\n",
|
||
" <td>0.152853</td>\n",
|
||
" <td>0.143636</td>\n",
|
||
" <td>0.307780</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.536319</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.688000</td>\n",
|
||
" <td>0.573079</td>\n",
|
||
" <td>0.453239</td>\n",
|
||
" <td>0.740294</td>\n",
|
||
" <td>0.125701</td>\n",
|
||
" <td>0.161781</td>\n",
|
||
" <td>0.121361</td>\n",
|
||
" <td>0.125878</td>\n",
|
||
" <td>0.239996</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.429156</td>\n",
|
||
" <td>0.688000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.387116</td>\n",
|
||
" <td>0.770696</td>\n",
|
||
" <td>0.642362</td>\n",
|
||
" <td>0.123575</td>\n",
|
||
" <td>0.132245</td>\n",
|
||
" <td>0.078901</td>\n",
|
||
" <td>0.103046</td>\n",
|
||
" <td>0.202816</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.552233</td>\n",
|
||
" <td>0.573079</td>\n",
|
||
" <td>0.387116</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.392658</td>\n",
|
||
" <td>0.481602</td>\n",
|
||
" <td>0.165187</td>\n",
|
||
" <td>0.190671</td>\n",
|
||
" <td>0.102584</td>\n",
|
||
" <td>0.165351</td>\n",
|
||
" <td>0.357348</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.434099</td>\n",
|
||
" <td>0.453239</td>\n",
|
||
" <td>0.770696</td>\n",
|
||
" <td>0.392658</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.400166</td>\n",
|
||
" <td>0.064790</td>\n",
|
||
" <td>0.122777</td>\n",
|
||
" <td>0.057485</td>\n",
|
||
" <td>0.093556</td>\n",
|
||
" <td>0.185604</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.650479</td>\n",
|
||
" <td>0.740294</td>\n",
|
||
" <td>0.642362</td>\n",
|
||
" <td>0.481602</td>\n",
|
||
" <td>0.400166</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.148099</td>\n",
|
||
" <td>0.194394</td>\n",
|
||
" <td>0.142136</td>\n",
|
||
" <td>0.129403</td>\n",
|
||
" <td>0.241858</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <td>0.162975</td>\n",
|
||
" <td>0.125701</td>\n",
|
||
" <td>0.123575</td>\n",
|
||
" <td>0.165187</td>\n",
|
||
" <td>0.064790</td>\n",
|
||
" <td>0.148099</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.604538</td>\n",
|
||
" <td>0.693227</td>\n",
|
||
" <td>0.605779</td>\n",
|
||
" <td>0.577280</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.234531</td>\n",
|
||
" <td>0.161781</td>\n",
|
||
" <td>0.132245</td>\n",
|
||
" <td>0.190671</td>\n",
|
||
" <td>0.122777</td>\n",
|
||
" <td>0.194394</td>\n",
|
||
" <td>0.604538</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.517527</td>\n",
|
||
" <td>0.488817</td>\n",
|
||
" <td>0.614568</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.152853</td>\n",
|
||
" <td>0.121361</td>\n",
|
||
" <td>0.078901</td>\n",
|
||
" <td>0.102584</td>\n",
|
||
" <td>0.057485</td>\n",
|
||
" <td>0.142136</td>\n",
|
||
" <td>0.693227</td>\n",
|
||
" <td>0.517527</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.481252</td>\n",
|
||
" <td>0.606504</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <td>0.143636</td>\n",
|
||
" <td>0.125878</td>\n",
|
||
" <td>0.103046</td>\n",
|
||
" <td>0.165351</td>\n",
|
||
" <td>0.093556</td>\n",
|
||
" <td>0.129403</td>\n",
|
||
" <td>0.605779</td>\n",
|
||
" <td>0.488817</td>\n",
|
||
" <td>0.481252</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.638209</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" <td>0.307780</td>\n",
|
||
" <td>0.239996</td>\n",
|
||
" <td>0.202816</td>\n",
|
||
" <td>0.357348</td>\n",
|
||
" <td>0.185604</td>\n",
|
||
" <td>0.241858</td>\n",
|
||
" <td>0.577280</td>\n",
|
||
" <td>0.614568</td>\n",
|
||
" <td>0.606504</td>\n",
|
||
" <td>0.638209</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Ölleckage durch undichten Ölsumpf \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.536319 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.429156 \n",
|
||
"Blockierung der Förderschnecke 0.552233 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.434099 \n",
|
||
"Überprüfung der Hydraulik 0.650479 \n",
|
||
"Ich gehe spazieren 0.162975 \n",
|
||
"Heute um zwölf war ich unterwegs 0.234531 \n",
|
||
"Ich gehe mit dem Hund raus 0.152853 \n",
|
||
"Ich laufe im Park 0.143636 \n",
|
||
"Ich laufe im Pakr 0.307780 \n",
|
||
"\n",
|
||
" Überprüfung der Schwingungsdämpfer \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.536319 \n",
|
||
"Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.688000 \n",
|
||
"Blockierung der Förderschnecke 0.573079 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.453239 \n",
|
||
"Überprüfung der Hydraulik 0.740294 \n",
|
||
"Ich gehe spazieren 0.125701 \n",
|
||
"Heute um zwölf war ich unterwegs 0.161781 \n",
|
||
"Ich gehe mit dem Hund raus 0.121361 \n",
|
||
"Ich laufe im Park 0.125878 \n",
|
||
"Ich laufe im Pakr 0.239996 \n",
|
||
"\n",
|
||
" Überprüfung der Kühlmittelsysteme \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.429156 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.688000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"Blockierung der Förderschnecke 0.387116 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.770696 \n",
|
||
"Überprüfung der Hydraulik 0.642362 \n",
|
||
"Ich gehe spazieren 0.123575 \n",
|
||
"Heute um zwölf war ich unterwegs 0.132245 \n",
|
||
"Ich gehe mit dem Hund raus 0.078901 \n",
|
||
"Ich laufe im Park 0.103046 \n",
|
||
"Ich laufe im Pakr 0.202816 \n",
|
||
"\n",
|
||
" Blockierung der Förderschnecke \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.552233 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.573079 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.387116 \n",
|
||
"Blockierung der Förderschnecke 1.000000 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.392658 \n",
|
||
"Überprüfung der Hydraulik 0.481602 \n",
|
||
"Ich gehe spazieren 0.165187 \n",
|
||
"Heute um zwölf war ich unterwegs 0.190671 \n",
|
||
"Ich gehe mit dem Hund raus 0.102584 \n",
|
||
"Ich laufe im Park 0.165351 \n",
|
||
"Ich laufe im Pakr 0.357348 \n",
|
||
"\n",
|
||
" Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.434099 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.453239 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.770696 \n",
|
||
"Blockierung der Förderschnecke 0.392658 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 1.000000 \n",
|
||
"Überprüfung der Hydraulik 0.400166 \n",
|
||
"Ich gehe spazieren 0.064790 \n",
|
||
"Heute um zwölf war ich unterwegs 0.122777 \n",
|
||
"Ich gehe mit dem Hund raus 0.057485 \n",
|
||
"Ich laufe im Park 0.093556 \n",
|
||
"Ich laufe im Pakr 0.185604 \n",
|
||
"\n",
|
||
" Überprüfung der Hydraulik \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.650479 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.740294 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.642362 \n",
|
||
"Blockierung der Förderschnecke 0.481602 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.400166 \n",
|
||
"Überprüfung der Hydraulik 1.000000 \n",
|
||
"Ich gehe spazieren 0.148099 \n",
|
||
"Heute um zwölf war ich unterwegs 0.194394 \n",
|
||
"Ich gehe mit dem Hund raus 0.142136 \n",
|
||
"Ich laufe im Park 0.129403 \n",
|
||
"Ich laufe im Pakr 0.241858 \n",
|
||
"\n",
|
||
" Ich gehe spazieren \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.162975 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.125701 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.123575 \n",
|
||
"Blockierung der Förderschnecke 0.165187 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.064790 \n",
|
||
"Überprüfung der Hydraulik 0.148099 \n",
|
||
"Ich gehe spazieren 1.000000 \n",
|
||
"Heute um zwölf war ich unterwegs 0.604538 \n",
|
||
"Ich gehe mit dem Hund raus 0.693227 \n",
|
||
"Ich laufe im Park 0.605779 \n",
|
||
"Ich laufe im Pakr 0.577280 \n",
|
||
"\n",
|
||
" Heute um zwölf war ich unterwegs \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.234531 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.161781 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.132245 \n",
|
||
"Blockierung der Förderschnecke 0.190671 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.122777 \n",
|
||
"Überprüfung der Hydraulik 0.194394 \n",
|
||
"Ich gehe spazieren 0.604538 \n",
|
||
"Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"Ich gehe mit dem Hund raus 0.517527 \n",
|
||
"Ich laufe im Park 0.488817 \n",
|
||
"Ich laufe im Pakr 0.614568 \n",
|
||
"\n",
|
||
" Ich gehe mit dem Hund raus \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.152853 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.121361 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.078901 \n",
|
||
"Blockierung der Förderschnecke 0.102584 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.057485 \n",
|
||
"Überprüfung der Hydraulik 0.142136 \n",
|
||
"Ich gehe spazieren 0.693227 \n",
|
||
"Heute um zwölf war ich unterwegs 0.517527 \n",
|
||
"Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"Ich laufe im Park 0.481252 \n",
|
||
"Ich laufe im Pakr 0.606504 \n",
|
||
"\n",
|
||
" Ich laufe im Park \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.143636 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.125878 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.103046 \n",
|
||
"Blockierung der Förderschnecke 0.165351 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.093556 \n",
|
||
"Überprüfung der Hydraulik 0.129403 \n",
|
||
"Ich gehe spazieren 0.605779 \n",
|
||
"Heute um zwölf war ich unterwegs 0.488817 \n",
|
||
"Ich gehe mit dem Hund raus 0.481252 \n",
|
||
"Ich laufe im Park 1.000000 \n",
|
||
"Ich laufe im Pakr 0.638209 \n",
|
||
"\n",
|
||
" Ich laufe im Pakr \n",
|
||
"Ölleckage durch undichten Ölsumpf 0.307780 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.239996 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.202816 \n",
|
||
"Blockierung der Förderschnecke 0.357348 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.185604 \n",
|
||
"Überprüfung der Hydraulik 0.241858 \n",
|
||
"Ich gehe spazieren 0.577280 \n",
|
||
"Heute um zwölf war ich unterwegs 0.614568 \n",
|
||
"Ich gehe mit dem Hund raus 0.606504 \n",
|
||
"Ich laufe im Park 0.638209 \n",
|
||
"Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"MODEL = 'paraphrase-multilingual-mpnet-base-v2'\n",
|
||
"df_alt_model2 = load_alt_model(MODEL, docs)\n",
|
||
"df_alt_model2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e6d41e31-f31e-46a0-bb54-769c6c5ffbd9",
|
||
"metadata": {},
|
||
"source": [
|
||
"---"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "4f3c063c-713c-4163-93fc-d9eca7817d23",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b83e5bf6-e3ed-46bc-aab5-df1545086edf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "526c4e99-cb51-4a79-a728-cbcd45d2e012",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"id": "6d8d0732-4eac-40e6-8d5e-e8c588e22417",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sentence_transformers.SentenceTransformer import SentenceTransformer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"id": "eaef7f12-60f6-4705-bf20-6b62a937045b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "dc5cb3bab92044cfa2d71561e3edf996",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"configuration_bert.py: 0%| | 0.00/8.24k [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-bert-implementation:\n",
|
||
"- configuration_bert.py\n",
|
||
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"#model = SentenceTransformer('jinaai/jina-embeddings-v2-base-de', trust_remote_code=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "bcbd2b6a-7398-49b0-a7c7-51176e2e4a69",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = load_models(STFRModelTypes.JINAAI_BASE_DE_V2, trust_remote=True, use_onnx=True)\n",
|
||
"if model.max_seq_length > 1024:\n",
|
||
" model.max_seq_length = 1024"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "d8aebacc-5048-49c5-b225-0b77ad621ba8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['Ölleckage durch undichten Ölsumpf',\n",
|
||
" 'Überprüfung der Schwingungsdämpfer',\n",
|
||
" 'Überprüfung der Kühlmittelsysteme',\n",
|
||
" 'Blockierung der Förderschnecke',\n",
|
||
" 'Überhitzung durch mangelnde Kühlmittelzirkulation',\n",
|
||
" 'Überprüfung der Hydraulik',\n",
|
||
" 'Ich gehe spazieren',\n",
|
||
" 'Heute um zwölf war ich unterwegs',\n",
|
||
" 'Ich gehe mit dem Hund raus',\n",
|
||
" 'Ich laufe im Park',\n",
|
||
" 'Ich laufe im Pakr']"
|
||
]
|
||
},
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"docs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "dd2c5f0a-8e59-40a0-bdf4-c8361647408f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"embeddings = model.encode(docs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"id": "150d1810-1615-4171-ba65-05499c915958",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sims = model.similarity(embeddings, embeddings).numpy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "6b50f5a9-8b94-4b8a-9b39-35e8c6117010",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import torch\n",
|
||
"from transformers import AutoModel\n",
|
||
"from numpy.linalg import norm\n",
|
||
"\n",
|
||
"from sentence_transformers.util import cos_sim"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "2a0064f9-ba55-458d-aaba-aff8fcab42f0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))\n",
|
||
"model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-de', trust_remote_code=True, torch_dtype=torch.float32)\n",
|
||
"embeddings = model.encode(docs, max_length=1024)\n",
|
||
"#print(cos_sim(embeddings, embeddings))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "af0ee7f7-2c29-4852-9ef3-b6177474d5b3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#sims = cos_sim(embeddings, embeddings).numpy()\n",
|
||
"df_jinaai_model = pd.DataFrame(data=sims, index=docs, columns=docs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "275fba08-52a9-48f9-ab7a-965eec84ad70",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.300847</td>\n",
|
||
" <td>0.219643</td>\n",
|
||
" <td>0.418877</td>\n",
|
||
" <td>0.303103</td>\n",
|
||
" <td>0.335572</td>\n",
|
||
" <td>0.291617</td>\n",
|
||
" <td>0.269322</td>\n",
|
||
" <td>0.249258</td>\n",
|
||
" <td>0.248138</td>\n",
|
||
" <td>0.299106</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.300847</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.690428</td>\n",
|
||
" <td>0.395861</td>\n",
|
||
" <td>0.311964</td>\n",
|
||
" <td>0.664567</td>\n",
|
||
" <td>0.104605</td>\n",
|
||
" <td>0.123543</td>\n",
|
||
" <td>0.098958</td>\n",
|
||
" <td>0.068270</td>\n",
|
||
" <td>0.231832</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.219643</td>\n",
|
||
" <td>0.690428</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.216548</td>\n",
|
||
" <td>0.406239</td>\n",
|
||
" <td>0.674355</td>\n",
|
||
" <td>0.118593</td>\n",
|
||
" <td>0.100127</td>\n",
|
||
" <td>0.045652</td>\n",
|
||
" <td>0.112418</td>\n",
|
||
" <td>0.174270</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.418877</td>\n",
|
||
" <td>0.395861</td>\n",
|
||
" <td>0.216548</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.372174</td>\n",
|
||
" <td>0.256570</td>\n",
|
||
" <td>0.161816</td>\n",
|
||
" <td>0.111119</td>\n",
|
||
" <td>0.134641</td>\n",
|
||
" <td>0.244183</td>\n",
|
||
" <td>0.238573</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.303103</td>\n",
|
||
" <td>0.311964</td>\n",
|
||
" <td>0.406239</td>\n",
|
||
" <td>0.372174</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.287111</td>\n",
|
||
" <td>0.118686</td>\n",
|
||
" <td>0.140934</td>\n",
|
||
" <td>0.104992</td>\n",
|
||
" <td>0.207168</td>\n",
|
||
" <td>0.147596</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.335572</td>\n",
|
||
" <td>0.664567</td>\n",
|
||
" <td>0.674355</td>\n",
|
||
" <td>0.256570</td>\n",
|
||
" <td>0.287111</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.142494</td>\n",
|
||
" <td>0.028539</td>\n",
|
||
" <td>0.014706</td>\n",
|
||
" <td>0.132489</td>\n",
|
||
" <td>0.233765</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <td>0.291617</td>\n",
|
||
" <td>0.104605</td>\n",
|
||
" <td>0.118593</td>\n",
|
||
" <td>0.161816</td>\n",
|
||
" <td>0.118686</td>\n",
|
||
" <td>0.142494</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.429125</td>\n",
|
||
" <td>0.552410</td>\n",
|
||
" <td>0.713886</td>\n",
|
||
" <td>0.528205</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.269322</td>\n",
|
||
" <td>0.123543</td>\n",
|
||
" <td>0.100127</td>\n",
|
||
" <td>0.111119</td>\n",
|
||
" <td>0.140934</td>\n",
|
||
" <td>0.028539</td>\n",
|
||
" <td>0.429125</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.388809</td>\n",
|
||
" <td>0.457369</td>\n",
|
||
" <td>0.408289</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.249258</td>\n",
|
||
" <td>0.098958</td>\n",
|
||
" <td>0.045652</td>\n",
|
||
" <td>0.134641</td>\n",
|
||
" <td>0.104992</td>\n",
|
||
" <td>0.014706</td>\n",
|
||
" <td>0.552410</td>\n",
|
||
" <td>0.388809</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.533601</td>\n",
|
||
" <td>0.460647</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <td>0.248138</td>\n",
|
||
" <td>0.068270</td>\n",
|
||
" <td>0.112418</td>\n",
|
||
" <td>0.244183</td>\n",
|
||
" <td>0.207168</td>\n",
|
||
" <td>0.132489</td>\n",
|
||
" <td>0.713886</td>\n",
|
||
" <td>0.457369</td>\n",
|
||
" <td>0.533601</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.632991</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" <td>0.299106</td>\n",
|
||
" <td>0.231832</td>\n",
|
||
" <td>0.174270</td>\n",
|
||
" <td>0.238573</td>\n",
|
||
" <td>0.147596</td>\n",
|
||
" <td>0.233765</td>\n",
|
||
" <td>0.528205</td>\n",
|
||
" <td>0.408289</td>\n",
|
||
" <td>0.460647</td>\n",
|
||
" <td>0.632991</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Ölleckage durch undichten Ölsumpf \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.300847 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.219643 \n",
|
||
"Blockierung der Förderschnecke 0.418877 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.303103 \n",
|
||
"Überprüfung der Hydraulik 0.335572 \n",
|
||
"Ich gehe spazieren 0.291617 \n",
|
||
"Heute um zwölf war ich unterwegs 0.269322 \n",
|
||
"Ich gehe mit dem Hund raus 0.249258 \n",
|
||
"Ich laufe im Park 0.248138 \n",
|
||
"Ich laufe im Pakr 0.299106 \n",
|
||
"\n",
|
||
" Überprüfung der Schwingungsdämpfer \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.300847 \n",
|
||
"Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.690428 \n",
|
||
"Blockierung der Förderschnecke 0.395861 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.311964 \n",
|
||
"Überprüfung der Hydraulik 0.664567 \n",
|
||
"Ich gehe spazieren 0.104605 \n",
|
||
"Heute um zwölf war ich unterwegs 0.123543 \n",
|
||
"Ich gehe mit dem Hund raus 0.098958 \n",
|
||
"Ich laufe im Park 0.068270 \n",
|
||
"Ich laufe im Pakr 0.231832 \n",
|
||
"\n",
|
||
" Überprüfung der Kühlmittelsysteme \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.219643 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.690428 \n",
|
||
"Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"Blockierung der Förderschnecke 0.216548 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.406239 \n",
|
||
"Überprüfung der Hydraulik 0.674355 \n",
|
||
"Ich gehe spazieren 0.118593 \n",
|
||
"Heute um zwölf war ich unterwegs 0.100127 \n",
|
||
"Ich gehe mit dem Hund raus 0.045652 \n",
|
||
"Ich laufe im Park 0.112418 \n",
|
||
"Ich laufe im Pakr 0.174270 \n",
|
||
"\n",
|
||
" Blockierung der Förderschnecke \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.418877 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.395861 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.216548 \n",
|
||
"Blockierung der Förderschnecke 1.000000 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.372174 \n",
|
||
"Überprüfung der Hydraulik 0.256570 \n",
|
||
"Ich gehe spazieren 0.161816 \n",
|
||
"Heute um zwölf war ich unterwegs 0.111119 \n",
|
||
"Ich gehe mit dem Hund raus 0.134641 \n",
|
||
"Ich laufe im Park 0.244183 \n",
|
||
"Ich laufe im Pakr 0.238573 \n",
|
||
"\n",
|
||
" Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.303103 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.311964 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.406239 \n",
|
||
"Blockierung der Förderschnecke 0.372174 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 1.000000 \n",
|
||
"Überprüfung der Hydraulik 0.287111 \n",
|
||
"Ich gehe spazieren 0.118686 \n",
|
||
"Heute um zwölf war ich unterwegs 0.140934 \n",
|
||
"Ich gehe mit dem Hund raus 0.104992 \n",
|
||
"Ich laufe im Park 0.207168 \n",
|
||
"Ich laufe im Pakr 0.147596 \n",
|
||
"\n",
|
||
" Überprüfung der Hydraulik \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.335572 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.664567 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.674355 \n",
|
||
"Blockierung der Förderschnecke 0.256570 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.287111 \n",
|
||
"Überprüfung der Hydraulik 1.000000 \n",
|
||
"Ich gehe spazieren 0.142494 \n",
|
||
"Heute um zwölf war ich unterwegs 0.028539 \n",
|
||
"Ich gehe mit dem Hund raus 0.014706 \n",
|
||
"Ich laufe im Park 0.132489 \n",
|
||
"Ich laufe im Pakr 0.233765 \n",
|
||
"\n",
|
||
" Ich gehe spazieren \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.291617 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.104605 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.118593 \n",
|
||
"Blockierung der Förderschnecke 0.161816 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.118686 \n",
|
||
"Überprüfung der Hydraulik 0.142494 \n",
|
||
"Ich gehe spazieren 1.000000 \n",
|
||
"Heute um zwölf war ich unterwegs 0.429125 \n",
|
||
"Ich gehe mit dem Hund raus 0.552410 \n",
|
||
"Ich laufe im Park 0.713886 \n",
|
||
"Ich laufe im Pakr 0.528205 \n",
|
||
"\n",
|
||
" Heute um zwölf war ich unterwegs \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.269322 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.123543 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.100127 \n",
|
||
"Blockierung der Förderschnecke 0.111119 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.140934 \n",
|
||
"Überprüfung der Hydraulik 0.028539 \n",
|
||
"Ich gehe spazieren 0.429125 \n",
|
||
"Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"Ich gehe mit dem Hund raus 0.388809 \n",
|
||
"Ich laufe im Park 0.457369 \n",
|
||
"Ich laufe im Pakr 0.408289 \n",
|
||
"\n",
|
||
" Ich gehe mit dem Hund raus \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.249258 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.098958 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.045652 \n",
|
||
"Blockierung der Förderschnecke 0.134641 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.104992 \n",
|
||
"Überprüfung der Hydraulik 0.014706 \n",
|
||
"Ich gehe spazieren 0.552410 \n",
|
||
"Heute um zwölf war ich unterwegs 0.388809 \n",
|
||
"Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"Ich laufe im Park 0.533601 \n",
|
||
"Ich laufe im Pakr 0.460647 \n",
|
||
"\n",
|
||
" Ich laufe im Park \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.248138 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.068270 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.112418 \n",
|
||
"Blockierung der Förderschnecke 0.244183 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.207168 \n",
|
||
"Überprüfung der Hydraulik 0.132489 \n",
|
||
"Ich gehe spazieren 0.713886 \n",
|
||
"Heute um zwölf war ich unterwegs 0.457369 \n",
|
||
"Ich gehe mit dem Hund raus 0.533601 \n",
|
||
"Ich laufe im Park 1.000000 \n",
|
||
"Ich laufe im Pakr 0.632991 \n",
|
||
"\n",
|
||
" Ich laufe im Pakr \n",
|
||
"Ölleckage durch undichten Ölsumpf 0.299106 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.231832 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.174270 \n",
|
||
"Blockierung der Förderschnecke 0.238573 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.147596 \n",
|
||
"Überprüfung der Hydraulik 0.233765 \n",
|
||
"Ich gehe spazieren 0.528205 \n",
|
||
"Heute um zwölf war ich unterwegs 0.408289 \n",
|
||
"Ich gehe mit dem Hund raus 0.460647 \n",
|
||
"Ich laufe im Park 0.632991 \n",
|
||
"Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_jinaai_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "6439531e-657f-49e0-bc2e-5e108b4f54b3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Ölleckage durch undichten Ölsumpf</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.067791</td>\n",
|
||
" <td>0.160749</td>\n",
|
||
" <td>0.179933</td>\n",
|
||
" <td>0.210402</td>\n",
|
||
" <td>0.298898</td>\n",
|
||
" <td>0.114172</td>\n",
|
||
" <td>0.028209</td>\n",
|
||
" <td>0.024582</td>\n",
|
||
" <td>0.013934</td>\n",
|
||
" <td>0.107545</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Schwingungsdämpfer</th>\n",
|
||
" <td>0.067791</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.536279</td>\n",
|
||
" <td>0.220822</td>\n",
|
||
" <td>0.143585</td>\n",
|
||
" <td>0.591945</td>\n",
|
||
" <td>-0.103104</td>\n",
|
||
" <td>-0.096927</td>\n",
|
||
" <td>-0.102214</td>\n",
|
||
" <td>-0.088568</td>\n",
|
||
" <td>-0.005696</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Kühlmittelsysteme</th>\n",
|
||
" <td>0.160749</td>\n",
|
||
" <td>0.536279</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.058088</td>\n",
|
||
" <td>0.477660</td>\n",
|
||
" <td>0.518238</td>\n",
|
||
" <td>-0.054177</td>\n",
|
||
" <td>-0.041462</td>\n",
|
||
" <td>-0.132903</td>\n",
|
||
" <td>-0.010588</td>\n",
|
||
" <td>-0.007899</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Blockierung der Förderschnecke</th>\n",
|
||
" <td>0.179933</td>\n",
|
||
" <td>0.220822</td>\n",
|
||
" <td>0.058088</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.126646</td>\n",
|
||
" <td>0.076466</td>\n",
|
||
" <td>-0.021669</td>\n",
|
||
" <td>-0.005974</td>\n",
|
||
" <td>-0.007590</td>\n",
|
||
" <td>0.028118</td>\n",
|
||
" <td>0.035968</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überhitzung durch mangelnde Kühlmittelzirkulation</th>\n",
|
||
" <td>0.210402</td>\n",
|
||
" <td>0.143585</td>\n",
|
||
" <td>0.477660</td>\n",
|
||
" <td>0.126646</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.148309</td>\n",
|
||
" <td>-0.052826</td>\n",
|
||
" <td>-0.079253</td>\n",
|
||
" <td>-0.090977</td>\n",
|
||
" <td>0.016930</td>\n",
|
||
" <td>-0.007476</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Überprüfung der Hydraulik</th>\n",
|
||
" <td>0.298898</td>\n",
|
||
" <td>0.591945</td>\n",
|
||
" <td>0.518238</td>\n",
|
||
" <td>0.076466</td>\n",
|
||
" <td>0.148309</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>-0.066718</td>\n",
|
||
" <td>-0.098302</td>\n",
|
||
" <td>-0.108190</td>\n",
|
||
" <td>-0.046841</td>\n",
|
||
" <td>0.014736</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe spazieren</th>\n",
|
||
" <td>0.114172</td>\n",
|
||
" <td>-0.103104</td>\n",
|
||
" <td>-0.054177</td>\n",
|
||
" <td>-0.021669</td>\n",
|
||
" <td>-0.052826</td>\n",
|
||
" <td>-0.066718</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.313149</td>\n",
|
||
" <td>0.522301</td>\n",
|
||
" <td>0.511742</td>\n",
|
||
" <td>0.573060</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Heute um zwölf war ich unterwegs</th>\n",
|
||
" <td>0.028209</td>\n",
|
||
" <td>-0.096927</td>\n",
|
||
" <td>-0.041462</td>\n",
|
||
" <td>-0.005974</td>\n",
|
||
" <td>-0.079253</td>\n",
|
||
" <td>-0.098302</td>\n",
|
||
" <td>0.313149</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.265599</td>\n",
|
||
" <td>0.238650</td>\n",
|
||
" <td>0.338099</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich gehe mit dem Hund raus</th>\n",
|
||
" <td>0.024582</td>\n",
|
||
" <td>-0.102214</td>\n",
|
||
" <td>-0.132903</td>\n",
|
||
" <td>-0.007590</td>\n",
|
||
" <td>-0.090977</td>\n",
|
||
" <td>-0.108190</td>\n",
|
||
" <td>0.522301</td>\n",
|
||
" <td>0.265599</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.313890</td>\n",
|
||
" <td>0.369566</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Park</th>\n",
|
||
" <td>0.013934</td>\n",
|
||
" <td>-0.088568</td>\n",
|
||
" <td>-0.010588</td>\n",
|
||
" <td>0.028118</td>\n",
|
||
" <td>0.016930</td>\n",
|
||
" <td>-0.046841</td>\n",
|
||
" <td>0.511742</td>\n",
|
||
" <td>0.238650</td>\n",
|
||
" <td>0.313890</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.543645</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ich laufe im Pakr</th>\n",
|
||
" <td>0.107545</td>\n",
|
||
" <td>-0.005696</td>\n",
|
||
" <td>-0.007899</td>\n",
|
||
" <td>0.035968</td>\n",
|
||
" <td>-0.007476</td>\n",
|
||
" <td>0.014736</td>\n",
|
||
" <td>0.573060</td>\n",
|
||
" <td>0.338099</td>\n",
|
||
" <td>0.369566</td>\n",
|
||
" <td>0.543645</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Ölleckage durch undichten Ölsumpf \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 1.000000 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.067791 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.160749 \n",
|
||
"Blockierung der Förderschnecke 0.179933 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.210402 \n",
|
||
"Überprüfung der Hydraulik 0.298898 \n",
|
||
"Ich gehe spazieren 0.114172 \n",
|
||
"Heute um zwölf war ich unterwegs 0.028209 \n",
|
||
"Ich gehe mit dem Hund raus 0.024582 \n",
|
||
"Ich laufe im Park 0.013934 \n",
|
||
"Ich laufe im Pakr 0.107545 \n",
|
||
"\n",
|
||
" Überprüfung der Schwingungsdämpfer \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.067791 \n",
|
||
"Überprüfung der Schwingungsdämpfer 1.000000 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.536279 \n",
|
||
"Blockierung der Förderschnecke 0.220822 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.143585 \n",
|
||
"Überprüfung der Hydraulik 0.591945 \n",
|
||
"Ich gehe spazieren -0.103104 \n",
|
||
"Heute um zwölf war ich unterwegs -0.096927 \n",
|
||
"Ich gehe mit dem Hund raus -0.102214 \n",
|
||
"Ich laufe im Park -0.088568 \n",
|
||
"Ich laufe im Pakr -0.005696 \n",
|
||
"\n",
|
||
" Überprüfung der Kühlmittelsysteme \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.160749 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.536279 \n",
|
||
"Überprüfung der Kühlmittelsysteme 1.000000 \n",
|
||
"Blockierung der Förderschnecke 0.058088 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.477660 \n",
|
||
"Überprüfung der Hydraulik 0.518238 \n",
|
||
"Ich gehe spazieren -0.054177 \n",
|
||
"Heute um zwölf war ich unterwegs -0.041462 \n",
|
||
"Ich gehe mit dem Hund raus -0.132903 \n",
|
||
"Ich laufe im Park -0.010588 \n",
|
||
"Ich laufe im Pakr -0.007899 \n",
|
||
"\n",
|
||
" Blockierung der Förderschnecke \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.179933 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.220822 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.058088 \n",
|
||
"Blockierung der Förderschnecke 1.000000 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.126646 \n",
|
||
"Überprüfung der Hydraulik 0.076466 \n",
|
||
"Ich gehe spazieren -0.021669 \n",
|
||
"Heute um zwölf war ich unterwegs -0.005974 \n",
|
||
"Ich gehe mit dem Hund raus -0.007590 \n",
|
||
"Ich laufe im Park 0.028118 \n",
|
||
"Ich laufe im Pakr 0.035968 \n",
|
||
"\n",
|
||
" Überhitzung durch mangelnde Kühlmittelzirkulation \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.210402 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.143585 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.477660 \n",
|
||
"Blockierung der Förderschnecke 0.126646 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 1.000000 \n",
|
||
"Überprüfung der Hydraulik 0.148309 \n",
|
||
"Ich gehe spazieren -0.052826 \n",
|
||
"Heute um zwölf war ich unterwegs -0.079253 \n",
|
||
"Ich gehe mit dem Hund raus -0.090977 \n",
|
||
"Ich laufe im Park 0.016930 \n",
|
||
"Ich laufe im Pakr -0.007476 \n",
|
||
"\n",
|
||
" Überprüfung der Hydraulik \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.298898 \n",
|
||
"Überprüfung der Schwingungsdämpfer 0.591945 \n",
|
||
"Überprüfung der Kühlmittelsysteme 0.518238 \n",
|
||
"Blockierung der Förderschnecke 0.076466 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.148309 \n",
|
||
"Überprüfung der Hydraulik 1.000000 \n",
|
||
"Ich gehe spazieren -0.066718 \n",
|
||
"Heute um zwölf war ich unterwegs -0.098302 \n",
|
||
"Ich gehe mit dem Hund raus -0.108190 \n",
|
||
"Ich laufe im Park -0.046841 \n",
|
||
"Ich laufe im Pakr 0.014736 \n",
|
||
"\n",
|
||
" Ich gehe spazieren \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.114172 \n",
|
||
"Überprüfung der Schwingungsdämpfer -0.103104 \n",
|
||
"Überprüfung der Kühlmittelsysteme -0.054177 \n",
|
||
"Blockierung der Förderschnecke -0.021669 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation -0.052826 \n",
|
||
"Überprüfung der Hydraulik -0.066718 \n",
|
||
"Ich gehe spazieren 1.000000 \n",
|
||
"Heute um zwölf war ich unterwegs 0.313149 \n",
|
||
"Ich gehe mit dem Hund raus 0.522301 \n",
|
||
"Ich laufe im Park 0.511742 \n",
|
||
"Ich laufe im Pakr 0.573060 \n",
|
||
"\n",
|
||
" Heute um zwölf war ich unterwegs \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.028209 \n",
|
||
"Überprüfung der Schwingungsdämpfer -0.096927 \n",
|
||
"Überprüfung der Kühlmittelsysteme -0.041462 \n",
|
||
"Blockierung der Förderschnecke -0.005974 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation -0.079253 \n",
|
||
"Überprüfung der Hydraulik -0.098302 \n",
|
||
"Ich gehe spazieren 0.313149 \n",
|
||
"Heute um zwölf war ich unterwegs 1.000000 \n",
|
||
"Ich gehe mit dem Hund raus 0.265599 \n",
|
||
"Ich laufe im Park 0.238650 \n",
|
||
"Ich laufe im Pakr 0.338099 \n",
|
||
"\n",
|
||
" Ich gehe mit dem Hund raus \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.024582 \n",
|
||
"Überprüfung der Schwingungsdämpfer -0.102214 \n",
|
||
"Überprüfung der Kühlmittelsysteme -0.132903 \n",
|
||
"Blockierung der Förderschnecke -0.007590 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation -0.090977 \n",
|
||
"Überprüfung der Hydraulik -0.108190 \n",
|
||
"Ich gehe spazieren 0.522301 \n",
|
||
"Heute um zwölf war ich unterwegs 0.265599 \n",
|
||
"Ich gehe mit dem Hund raus 1.000000 \n",
|
||
"Ich laufe im Park 0.313890 \n",
|
||
"Ich laufe im Pakr 0.369566 \n",
|
||
"\n",
|
||
" Ich laufe im Park \\\n",
|
||
"Ölleckage durch undichten Ölsumpf 0.013934 \n",
|
||
"Überprüfung der Schwingungsdämpfer -0.088568 \n",
|
||
"Überprüfung der Kühlmittelsysteme -0.010588 \n",
|
||
"Blockierung der Förderschnecke 0.028118 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation 0.016930 \n",
|
||
"Überprüfung der Hydraulik -0.046841 \n",
|
||
"Ich gehe spazieren 0.511742 \n",
|
||
"Heute um zwölf war ich unterwegs 0.238650 \n",
|
||
"Ich gehe mit dem Hund raus 0.313890 \n",
|
||
"Ich laufe im Park 1.000000 \n",
|
||
"Ich laufe im Pakr 0.543645 \n",
|
||
"\n",
|
||
" Ich laufe im Pakr \n",
|
||
"Ölleckage durch undichten Ölsumpf 0.107545 \n",
|
||
"Überprüfung der Schwingungsdämpfer -0.005696 \n",
|
||
"Überprüfung der Kühlmittelsysteme -0.007899 \n",
|
||
"Blockierung der Förderschnecke 0.035968 \n",
|
||
"Überhitzung durch mangelnde Kühlmittelzirkulation -0.007476 \n",
|
||
"Überprüfung der Hydraulik 0.014736 \n",
|
||
"Ich gehe spazieren 0.573060 \n",
|
||
"Heute um zwölf war ich unterwegs 0.338099 \n",
|
||
"Ich gehe mit dem Hund raus 0.369566 \n",
|
||
"Ich laufe im Park 0.543645 \n",
|
||
"Ich laufe im Pakr 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# without ONNX\n",
|
||
"df_jinaai_model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6fa5e5e1-d240-4c03-9476-ea8481b775dc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "493ee59a-3b77-4874-aab1-38a06f46d626",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"id": "32881e82-4c14-4cbe-9465-a33adeba3e58",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from pathlib import Path"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"id": "3299252d-6c3b-46d4-a059-00377ff8b174",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_pth = Path(r'A:\\Arbeitsaufgaben\\lang-data\\in\\02_202307\\Export4.csv')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"id": "d05a6ad9-83a3-4868-b9fc-62e6fc58f37a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"assert data_pth.exists()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"id": "f47bbcea-1097-403f-a0b6-07c46ab972a2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from lang_main.pipelines import predefined\n",
|
||
"from lang_main.analysis import preprocessing as preproc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"id": "372dfded-8c6b-4a52-905d-904bb5d2c6e5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pipe_target_feat = predefined.build_base_target_feature_pipe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "696a5c03-92b1-4247-82a2-010cd9d2c1f9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2025-01-15 15:29:13 +0000 | lang_main:base:INFO | Starting pipeline >>Target_Feature<<...\n",
|
||
"INFO:lang_main.pipelines:Starting pipeline >>Target_Feature<<...\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | Loaded dataset successfully.\n",
|
||
"INFO:lang_main.analysis.preprocessing:Loaded dataset successfully.\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | Dataset properties: number of entries: 129020, number of features 20\n",
|
||
"INFO:lang_main.analysis.preprocessing:Dataset properties: number of entries: 129020, number of features 20\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | Number of duplicates over all features: 84\n",
|
||
"INFO:lang_main.analysis.preprocessing:Number of duplicates over all features: 84\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | Number of duplicates over subset >>['VorgangsID', 'ObjektID']<<: 725\n",
|
||
"INFO:lang_main.analysis.preprocessing:Number of duplicates over subset >>['VorgangsID', 'ObjektID']<<: 725\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | Removed all duplicates from dataset successfully.\n",
|
||
"INFO:lang_main.analysis.preprocessing:Removed all duplicates from dataset successfully.\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | New Dataset properties: number of entries: 128211, number of features 20\n",
|
||
"INFO:lang_main.analysis.preprocessing:New Dataset properties: number of entries: 128211, number of features 20\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:preprocessing:INFO | Removed NA entries for features >>['VorgangsBeschreibung']<< from dataset successfully.\n",
|
||
"INFO:lang_main.analysis.preprocessing:Removed NA entries for features >>['VorgangsBeschreibung']<< from dataset successfully.\n",
|
||
"2025-01-15 15:29:14 +0000 | lang_main:io:INFO | Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\Pipe-Target_Feature_Step-3_remove_NA.pkl\n",
|
||
"INFO:lang_main.shared:Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\Pipe-Target_Feature_Step-3_remove_NA.pkl\n",
|
||
"2025-01-15 15:29:15 +0000 | lang_main:shared:INFO | Successfully applied entry-wise cleansing procedure >>clean_string_slim<< for features >>['VorgangsBeschreibung']<<\n",
|
||
"INFO:lang_main.analysis.preprocessing:Successfully applied entry-wise cleansing procedure >>clean_string_slim<< for features >>['VorgangsBeschreibung']<<\n",
|
||
"2025-01-15 15:29:15 +0000 | lang_main:io:INFO | Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\TIMELINE.pkl\n",
|
||
"INFO:lang_main.shared:Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\TIMELINE.pkl\n",
|
||
"2025-01-15 15:29:15 +0000 | lang_main:preprocessing:INFO | Number of entries for feature >>VorgangsBeschreibung<<: 123457\n",
|
||
"INFO:lang_main.analysis.preprocessing:Number of entries for feature >>VorgangsBeschreibung<<: 123457\n",
|
||
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6772/6772 [00:35<00:00, 192.48it/s]\n",
|
||
"2025-01-15 15:29:50 +0000 | lang_main:io:INFO | Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\Pipe-Target_Feature_Step-5_analyse_feature.pkl\n",
|
||
"INFO:lang_main.shared:Saved file successfully under A:\\Arbeitsaufgaben\\lang-data\\out\\Pipe-Target_Feature_Step-5_analyse_feature.pkl\n",
|
||
"2025-01-15 15:29:50 +0000 | lang_main:base:INFO | Processing pipeline >>Target_Feature<< successfully ended after 5 steps.\n",
|
||
"INFO:lang_main.pipelines:Processing pipeline >>Target_Feature<< successfully ended after 5 steps.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"ret = pipe_target_feat.run(starting_values=(data_pth,))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"id": "c8b9c5af-5397-49ec-8cf6-b10745909e85",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = ret[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"id": "4953b8af-fba7-471a-8c4a-1a3ca6c9d3ab",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>batched_idxs</th>\n",
|
||
" <th>entry</th>\n",
|
||
" <th>len</th>\n",
|
||
" <th>num_occur</th>\n",
|
||
" <th>assoc_obj_ids</th>\n",
|
||
" <th>num_assoc_obj_ids</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>[232, 241, 242, 244, 247, 249, 268, 269, 289, ...</td>\n",
|
||
" <td>Tägliche Wartungstätigkeiten nach Vorgabe des ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>92592</td>\n",
|
||
" <td>[0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53...</td>\n",
|
||
" <td>206</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>[37, 50, 57, 61, 129, 245, 246, 266, 353, 378,...</td>\n",
|
||
" <td>Wöchentliche Sichtkontrolle / Reinigung</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>1654</td>\n",
|
||
" <td>[301, 304, 305, 313, 314, 331, 332, 510, 511, ...</td>\n",
|
||
" <td>18</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>131</th>\n",
|
||
" <td>[179, 196, 216, 350, 355, 408, 426, 427, 428, ...</td>\n",
|
||
" <td>Tägliche Überprüfung der Ölabscheider</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1616</td>\n",
|
||
" <td>[0, 970, 2134, 2137]</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>[224, 276, 277, 278, 279, 280, 281, 282, 283, ...</td>\n",
|
||
" <td>Wöchentliche Kontrolle der WC-Anlagen</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1265</td>\n",
|
||
" <td>[1352, 1353, 1354, 1684, 1685, 1686, 1687, 168...</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>[191, 192, 194, 243, 248, 254, 296, 300, 302, ...</td>\n",
|
||
" <td>Halbjährliche Kontrolle des Stabbreithalters</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>687</td>\n",
|
||
" <td>[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6...</td>\n",
|
||
" <td>166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3721</th>\n",
|
||
" <td>[48008]</td>\n",
|
||
" <td>AS 64</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1139]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5085</th>\n",
|
||
" <td>[79568]</td>\n",
|
||
" <td>Triax</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[250]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6280</th>\n",
|
||
" <td>[116518]</td>\n",
|
||
" <td>fehlt</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1662]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3733</th>\n",
|
||
" <td>[48167]</td>\n",
|
||
" <td>95</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1139]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3736</th>\n",
|
||
" <td>[48170]</td>\n",
|
||
" <td>x</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>[1139]</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6772 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" batched_idxs \\\n",
|
||
"162 [232, 241, 242, 244, 247, 249, 268, 269, 289, ... \n",
|
||
"33 [37, 50, 57, 61, 129, 245, 246, 266, 353, 378,... \n",
|
||
"131 [179, 196, 216, 350, 355, 408, 426, 427, 428, ... \n",
|
||
"160 [224, 276, 277, 278, 279, 280, 281, 282, 283, ... \n",
|
||
"140 [191, 192, 194, 243, 248, 254, 296, 300, 302, ... \n",
|
||
"... ... \n",
|
||
"3721 [48008] \n",
|
||
"5085 [79568] \n",
|
||
"6280 [116518] \n",
|
||
"3733 [48167] \n",
|
||
"3736 [48170] \n",
|
||
"\n",
|
||
" entry len num_occur \\\n",
|
||
"162 Tägliche Wartungstätigkeiten nach Vorgabe des ... 66 92592 \n",
|
||
"33 Wöchentliche Sichtkontrolle / Reinigung 39 1654 \n",
|
||
"131 Tägliche Überprüfung der Ölabscheider 37 1616 \n",
|
||
"160 Wöchentliche Kontrolle der WC-Anlagen 37 1265 \n",
|
||
"140 Halbjährliche Kontrolle des Stabbreithalters 44 687 \n",
|
||
"... ... .. ... \n",
|
||
"3721 AS 64 5 1 \n",
|
||
"5085 Triax 5 1 \n",
|
||
"6280 fehlt 5 1 \n",
|
||
"3733 95 2 1 \n",
|
||
"3736 x 1 1 \n",
|
||
"\n",
|
||
" assoc_obj_ids num_assoc_obj_ids \n",
|
||
"162 [0, 17, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53... 206 \n",
|
||
"33 [301, 304, 305, 313, 314, 331, 332, 510, 511, ... 18 \n",
|
||
"131 [0, 970, 2134, 2137] 4 \n",
|
||
"160 [1352, 1353, 1354, 1684, 1685, 1686, 1687, 168... 11 \n",
|
||
"140 [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 6... 166 \n",
|
||
"... ... ... \n",
|
||
"3721 [1139] 1 \n",
|
||
"5085 [250] 1 \n",
|
||
"6280 [1662] 1 \n",
|
||
"3733 [1139] 1 \n",
|
||
"3736 [1139] 1 \n",
|
||
"\n",
|
||
"[6772 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 100,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5315fdc7-79b9-40d7-b06b-60797b389f3c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"id": "aec149fc-1064-4fb9-9e50-37ca7e8148bc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cropped = df.iloc[:1000]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"id": "88f02b12-82de-423a-aaeb-1d7cfdf6d15e",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"entries = tuple(cropped['entry'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"id": "4283f2d4-1e69-4363-8aef-bfd14b88eb94",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'query: Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers'"
|
||
]
|
||
},
|
||
"execution_count": 103,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# adaption to new model\n",
|
||
"query = 'query: Retrieve semantically similar text: '\n",
|
||
"new_entries = []\n",
|
||
"for doc in entries:\n",
|
||
" new_doc = 'query: ' + doc\n",
|
||
" new_entries.append(new_doc)\n",
|
||
"new_entries[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 121,
|
||
"id": "1693c388-a1b5-4f9f-bcf0-86433c0b225f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dimensions = 1024\n",
|
||
"model = SentenceTransformer(STFRModelTypes.E5_BASE_STS_EN_DE)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 119,
|
||
"id": "0121f989-8c31-40e1-8c78-7c05869f7d08",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = model_quant"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 122,
|
||
"id": "a06970d6-728d-4fa6-99be-df07db37e271",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"52.8 s ± 1.09 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%%timeit\n",
|
||
"#model = load_models(model_name_new)\n",
|
||
"embds = model.encode(entries, convert_to_numpy=False, convert_to_tensor=True)\n",
|
||
"sims = model.similarity(embds, embds).numpy()\n",
|
||
"sims.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 120,
|
||
"id": "24617ed1-e48e-448a-a064-3e043bd46fac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"44.3 s ± 490 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%%timeit\n",
|
||
"#model = load_models(model_name_new)\n",
|
||
"embds = model.encode(entries, convert_to_numpy=False, convert_to_tensor=True)\n",
|
||
"sims = model.similarity(embds, embds).numpy()\n",
|
||
"sims.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8834ba82-9a4c-40c9-aeeb-cc3c4fbc7660",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3039073a-8920-4098-8a94-6039da1fdab5",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Test base model and alternative"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 157,
|
||
"id": "104df32f-a716-48cc-8af5-f6c60ae735dd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2025-01-09 12:33:23 +0000 | lang_main:preprocessing:INFO | Start merging of similarity candidates...\n",
|
||
"2025-01-09 12:34:04 +0000 | lang_main:preprocessing:INFO | Similarity candidates merged successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"base_model_res = preproc.merge_similarity_duplicates(cropped, model_ref, cos_sim_threshold=0.8)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 159,
|
||
"id": "dc0c1a27-b379-4aa1-bbe5-92305a182f8a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"base_model_res = base_model_res[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 160,
|
||
"id": "dd6b1e4b-c368-436c-9421-c6be185ed32e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"707"
|
||
]
|
||
},
|
||
"execution_count": 160,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(base_model_res)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "30baecfb-59ae-4e83-b74c-1f9390218561",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"THRESHOLD = 0.88"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 164,
|
||
"id": "80807637-d1d1-442c-a11e-6ad8300ed21b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2025-01-09 12:40:55 +0000 | lang_main:preprocessing:INFO | Start merging of similarity candidates...\n",
|
||
"2025-01-09 12:43:22 +0000 | lang_main:preprocessing:INFO | Similarity candidates merged successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"base_model_alt = preproc.merge_similarity_duplicates(cropped, model_alt, cos_sim_threshold=THRESHOLD)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 165,
|
||
"id": "73e557ee-fe47-4dfd-9001-3a280b510440",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"base_model_alt = base_model_alt[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 166,
|
||
"id": "c7399c19-a6eb-48fa-810b-cb9a8842f2a4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"425"
|
||
]
|
||
},
|
||
"execution_count": 166,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(base_model_alt)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d0f20e31-a471-491a-8ece-b85b9d297b78",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "64901ecf-17f1-4dc4-a03f-46920334758f",
|
||
"metadata": {},
|
||
"source": [
|
||
"- paraphrase-multilingual-mpnet-base-v2\n",
|
||
"- paraphrase-multilingual-MiniLM-L12-v2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "70286ffa-e1b1-44a5-89bd-6f9152b6a535",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fa1e11c7-ab37-40d5-8226-7be6ee308601",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "8740ae6e-2d3c-4d95-86ef-87d4b04f3157",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sentence_transformers.util import cos_sim"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"id": "d0c3a950-2102-40a7-975c-2a98d381a586",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import torch\n",
|
||
"from transformers import AutoModel\n",
|
||
"from numpy.linalg import norm\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "ebe337c7-8b21-421f-8095-f68ae37ce8d6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = load_models(STFRModelTypes.JINAAI_BASE_DE_V2, trust_remote=True, use_onnx=True)\n",
|
||
"if model.max_seq_length > 1024:\n",
|
||
" model.max_seq_length = 1024"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "29840a14-bf17-4cc4-bb4f-c3b00f2422ec",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "6fcbf4af-a678-4b0c-920c-27f3f3e2dee0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"embeddings = model.encode(entries)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"id": "9971f273-c4df-43c8-8abd-f07be4c8a3d4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sims, duration = benchmark_sims(model, entries)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"id": "2bdd781f-4beb-4d3a-8a77-61e56be0baae",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"54.76219579999997"
|
||
]
|
||
},
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"duration"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"id": "300c9bfe-62ce-4eb9-b707-23dee83d8f29",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 112,
|
||
"id": "6817f87c-7ce4-4300-9aac-667f678b7e5d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"THRESHOLD = 0.85\n",
|
||
"sim = sims.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"id": "f773f222-b60b-458f-8b98-47ac969e3afc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.fill_diagonal(sim, 0)\n",
|
||
"sim = np.triu(sim)\n",
|
||
"#arr = np.where(sim > THRESHOLD, sim, 0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"id": "78b24c4e-0d9d-4496-ad99-5084a06ef441",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"499500"
|
||
]
|
||
},
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"np.count_nonzero(sim)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"id": "b3475c04-a4cf-48a8-92f5-0762ba8d16a0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"idx = np.argwhere(sim >= THRESHOLD)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 116,
|
||
"id": "a0861792-2d52-4b2f-bd7f-6d5db85ce78d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([ 1, 15])"
|
||
]
|
||
},
|
||
"execution_count": 116,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"idx[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 117,
|
||
"id": "c97564aa-c75c-4428-8137-6c0aa94c3679",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Text pair with >>0.9210827350616455<<:\n",
|
||
"Wöchentliche Sichtkontrolle / Reinigung\n",
|
||
"---\n",
|
||
"Wöchentliche Sichtprüfung / Reinigung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8897931575775146<<:\n",
|
||
"Wöchentliche Sichtkontrolle / Reinigung\n",
|
||
"---\n",
|
||
"Monatliche Sichtkontrolle / Reinigung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8792937397956848<<:\n",
|
||
"Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"---\n",
|
||
"Monatliche Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9156877994537354<<:\n",
|
||
"Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"---\n",
|
||
"Prüfung von: Hr. Förster - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9321640133857727<<:\n",
|
||
"Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"---\n",
|
||
"Prüfung von: - Scharnier - Dichtung - Schließvorrichtung - Schloß - Türgriff - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8715114593505859<<:\n",
|
||
"Wöchentliche Sichtprüfung / Reinigung\n",
|
||
"---\n",
|
||
"Monatliche Sichtprüfung / Reinigung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8931483030319214<<:\n",
|
||
"Analyse von: Kesselwasser - Speisewasser - Kondensat Wasserverbrauch überprüfen\n",
|
||
"---\n",
|
||
"Tägliche Kontrolle: Analyse von: Kesselwasser - Speisewasser - Kondensat - Wasserverbrauch überprüfen\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9389513731002808<<:\n",
|
||
"Tägliche Interne Wartungstätigkeiten durch die Maschinenbediener der Laserabteilung (Arbeitspläne müssen abgearbeitet werden)\n",
|
||
"---\n",
|
||
"Wöchentliche Interne Wartungstätigkeiten durch die Maschinenbediener der Laserabteilung (Arbeitspläne müssen abgearbeitet werden)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8958828449249268<<:\n",
|
||
"Tägliche Interne Wartungstätigkeiten durch die Maschinenbediener der Laserabteilung (Arbeitspläne müssen abgearbeitet werden)\n",
|
||
"---\n",
|
||
"Monatliche Interne Wartungstätigkeiten durch die Maschinenbediener der Laserabteilung (Arbeitspläne müssen abgearbeitet werden)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.956008791923523<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Rollenkette zweifach - kontrollieren gegebenenfalls reinigen und schmieren\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Rollenkette zweifach - kontrollieren gegebenfalls reinigen und schmieren\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9408925771713257<<:\n",
|
||
"Vorgaben aus Brückner Wartungsplan (siehe Extradaten)\n",
|
||
"---\n",
|
||
"Vorgaben aus Brückner Wartungsplan siehe Extradaten\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8816083669662476<<:\n",
|
||
"Monatliche Kontrolle des Pflasterschrank Bei Bedarf an Verbandsmaterial bitte Ticket an das Magazin. (Auflistung des Verbandsmaterial findet man unter den Extradaten (UTT intern) des Objektes)\n",
|
||
"---\n",
|
||
"Monatliche Kontrolle des Pflasterschrank Bei Bedarf an Verbandsmaterial bitte Ticket an das Magazin.\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9160060882568359<<:\n",
|
||
"Wöchentliche Kontrolle der digitalen Feuchte und Temperatursensoren mit Hilfe des Efficio-System. (Messwerte abgleichen - Vorgabe Weberei 65% / 22°C)\n",
|
||
"---\n",
|
||
"Wöchentliche Kontrolle der digitalen Feuchte und Temperatursensoren mit Hilfe des Efficio-System. (Messwerte abgleichen - Vorgabe Weberei 65% / 22°C) Ahmetaj: Hinweis: derzeit nicht abgleichbar da Erneuerung der Steuerung durch Elektriker Anlage wird aber beobachtet\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9480097889900208<<:\n",
|
||
"2-wöchige Reinigung & Sichtkontrolle (Technische Einrichtungen / Luftdruckkontrolle)\n",
|
||
"---\n",
|
||
"2-Wöchentliche Reinigung & Sichtkontrolle (Technische Einrichtungen / Luftdruckkontrolle)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8540077209472656<<:\n",
|
||
"2-wöchige Reinigung & Sichtkontrolle (Technische Einrichtungen / Luftdruckkontrolle)\n",
|
||
"---\n",
|
||
"2-wöchige Sichtkontrolle (Technische Einrichtungen / Luftdruckkontrolle)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9614907503128052<<:\n",
|
||
"Wöchentliche Kontrolle Sprinkleranlage\n",
|
||
"---\n",
|
||
"Wöchentliche Kontrolle der Sprinkleranlage\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9066085815429688<<:\n",
|
||
"Monatliche Sichtkontrolle / Reinigung\n",
|
||
"---\n",
|
||
"Monatliche Sichtprüfung / Reinigung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9329557418823242<<:\n",
|
||
"Monatliche Kontrolle / Reinigung / Abschmierung von: Ventilatoren und Motoren -wird alle 3 Monate durchgeführt - Erledigungsdatum wird am Motor angeschrieben - ansonsten nach Bedarf / Wechseln der UV-Röhren (nach Betriebsstunden) / Wäscherkontrolle (Reinigung bei Bedarf) / Trommeln\n",
|
||
"---\n",
|
||
"Monatliche Kontrolle / Reinigung / Abschmierung von: Ventilatoren und Motoren -wird alle 3 Monate durchgeführt - Erledigungsdatum wird am Motor angeschrieben - ansonsten nach Bedarf / Wäscherkontrolle (Reinigung bei Bedarf) / Trommeln\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9679458141326904<<:\n",
|
||
"- Reinigen des Gerätes von Außen mit einem feuchten Reinigungstuch - keine Lösungsmittel oder \"scharfe\" Reiniger verwenden. - Sichtprüfung des Luftreinigers (Filter und Geräteinneres) auf Verschmutzung und/oder Beschädigungen - Prüfen, dass alle Schrauben fest sitzen. - Vorfilter auf Verunreinigungen prüfen (evtl. tauschen) - Sicherstellung, dass keine Ansaug- und Ausblasöffnungen bedeckt sind und keine Verschmutzungen/lose Fremdkörper auf der Ausblasöffnung liegen.\n",
|
||
"---\n",
|
||
"- Reinigen des Gerätes von Außen mit einem feuchten Reinigungstuch - keine Lösungsmittel oder \"scharfe\" Reiniger verwenden. - Sichtprüfung des Luftreinigers (Filter und Geräteinneres) auf Verschmutzung und/oder Beschädigungen - Prüfen, dass alle Schrauben fest sitzen. - Vorfilter auf Verunreinigungen prüfen (evtl. tauschen) - Sicherstellung, dass keine Ansaug- und Ausblasöffnungen bedeckt sind und keine Verschmutzungen/lose Fremdkörper auf dem Ausblas liegen.\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8935885429382324<<:\n",
|
||
"Überprüfung von: - Indikator Testomat - Wasserhärte - Filter - Vordruck Enthärtung\n",
|
||
"---\n",
|
||
"Tägliche Überprüfung von: Indikator Testomat / Wasserhärte / Filter / Vordruck Enthärtung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9557639956474304<<:\n",
|
||
"Wöchentliche Kontrolle Klimagerät Inneneinheit: Kontrollieren der Filert auf Verschmutzung / Kontrollieren der Temperatur Außeneinheit: Luftansaugseite: Sauberkeit der Filter kontrollieren / Auf Laufgeräusche achten Befeuchter: Kontrollieren der Feuchte / Optische Kontrolle von Ein, Auslassventil und Dampfzylinder (Verkalkung)\n",
|
||
"---\n",
|
||
"Wöchentliche Kontrolle Klimagerät Inneneinheit: Kontrollieren der Filter auf Verschmutzung / Kontrollieren der Temperatur Außeneinheit: Luftansaugseite: Sauberkeit der Filter kontrollieren / Auf Laufgeräusche achten Befeuchter: Kontrollieren der Feuchte / Optische Kontrolle von Ein, Auslassventil und Dampfzylinder (Verkalkung)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8623278141021729<<:\n",
|
||
"Prüfung von: Hr. Förster - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"---\n",
|
||
"Prüfung von: - Scharnier - Dichtung - Schließvorrichtung - Schloß - Türgriff - allgemeine Funktion - Schmierung - Festhaltevorrichtung\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9198158979415894<<:\n",
|
||
"Jährliche Wartung der RWA-Klappen\n",
|
||
"---\n",
|
||
"Jährliche Wartung/Prüfung der RWA-Klappen\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9503133296966553<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Mahlo (siehe Extradaten)\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Mahlo (siehe Vorbelegung Extradaten)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9231461882591248<<:\n",
|
||
"Wöchentliche Interne Wartungstätigkeiten durch die Maschinenbediener der Laserabteilung (Arbeitspläne müssen abgearbeitet werden)\n",
|
||
"---\n",
|
||
"Monatliche Interne Wartungstätigkeiten durch die Maschinenbediener der Laserabteilung (Arbeitspläne müssen abgearbeitet werden)\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9229674339294434<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Stehlager SI-40, mit Hochtemperatureinsatz - Gewindestiftbefestigung - nachschmieren Stehlager SI-40, mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FUU-80 mit Hochtemperatureinsatz - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz - Gewindebefestigung - nachschmieren\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Stehlager SI-40, Gewindestiftbefestigung - nachschmieren Stehlager SI-40, Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz -Gewindestiftbefestigung - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.894595205783844<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Stehlager SI-40, mit Hochtemperatureinsatz - Gewindestiftbefestigung - nachschmieren Stehlager SI-40, mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FUU-80 mit Hochtemperatureinsatz - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz - Gewindebefestigung - nachschmieren\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Flanschlager-quadrat Lagereinsatz Hochtemperatur - nachschmieren Stehlager SI-40 Gewindestiftbefestigung - nachschmieren Stehlager SI-40 Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 Gewindestiftbefestigung - nachschmieren Flanschlager-quadrat FI-40 Exzenterringbefestigung - nachschmieren\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.896432638168335<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Stehlager SI-40, mit Hochtemperatureinsatz - Gewindestiftbefestigung - nachschmieren Stehlager SI-40, mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FUU-80 mit Hochtemperatureinsatz - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz - Gewindebefestigung - nachschmieren\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Flanschlager-quadrat FUU-80 mit Hochtemperatureinsatz - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz - Gewindebefestigung - nachschmieren\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.9396674036979675<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Stehlager SI-40, Gewindestiftbefestigung - nachschmieren Stehlager SI-40, Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz -Gewindestiftbefestigung - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Flanschlager-quadrat Lagereinsatz Hochtemperatur - nachschmieren Stehlager SI-40 Gewindestiftbefestigung - nachschmieren Stehlager SI-40 Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 Gewindestiftbefestigung - nachschmieren Flanschlager-quadrat FI-40 Exzenterringbefestigung - nachschmieren\n",
|
||
"\n",
|
||
"\n",
|
||
"Text pair with >>0.8566708564758301<<:\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Stehlager SI-40, Gewindestiftbefestigung - nachschmieren Stehlager SI-40, Exzenterringbefestigung - nachschmieren Flanschlager-quadrat FIS-40 mit Hochtemperatureinsatz -Gewindestiftbefestigung - nachschmieren Flanschlager-quadrat FI-40 mit Hochtemperatureinsatz - Exzenterringbefestigung - nachschmieren\n",
|
||
"---\n",
|
||
"Vorgabe aus Wartungsplan Firma Menzel (siehe Vorbelegung Extradaten) Flanschlager-quadrat FIS-40, Gewindestiftbefestigung - nachschmieren Flanschlager-quadrat FIS-40, Exzenterringbefestigung - nachschmieren\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"count = 0\n",
|
||
"for entry in idx:\n",
|
||
" if count == 30:\n",
|
||
" break\n",
|
||
" txt1 = entries[entry[0]]\n",
|
||
" txt2 = entries[entry[1]]\n",
|
||
" value = sim[entry[0],entry[1]]\n",
|
||
" print(f'Text pair with >>{value}<<:\\n{txt1}\\n---\\n{txt2}\\n\\n')\n",
|
||
"\n",
|
||
" count += 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5235664b-0903-4f6f-b266-6cf95f2a8879",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"docs = [\n",
|
||
" 'Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers',\n",
|
||
" 'Monatliche Prüfung von: - Scharniere - Dichtung - Schließvorrichtung - Schloß - Beschlag - allgemeine Funktion - Schmierung - Festhaltevorrichtung',\n",
|
||
"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 293,
|
||
"id": "fc275740-6399-4cc9-bb17-6f597b096b5f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(2, 2)"
|
||
]
|
||
},
|
||
"execution_count": 293,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"embds = model_alt.encode(docs, normalize_embeddings=True)\n",
|
||
"sims_t = model_alt.similarity(embds, embds).numpy()\n",
|
||
"sims_t.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 294,
|
||
"id": "a553ff5a-bc07-4ddf-bd03-8f557d3c738d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([[0.99999976, 0.7587665 ],\n",
|
||
" [0.75876653, 1.0000002 ]], dtype=float32)"
|
||
]
|
||
},
|
||
"execution_count": 294,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sims_t"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7d5b165e-297e-46ad-a16d-ad2b2c6a5802",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "06e89222-cc74-4cce-9af0-474e9c26d224",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d0baa781-3a60-45da-ae29-45cd76efec97",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.10"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|