{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "text1 = \"Betriebssicherheitsüberprüfung\"\n", "text3 = \"Ich habe die Betriebssicherheitsüberprüfung durchgeführt.\"\n", "text2 = \"die Betriebssicherheitsüberprüfung durchgeführt\"\n", "#text2 = \"Nach dem Batterie-Wechsel gingen alle Lichter aus\"\n", "sentences = [text1, text2]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "text1 = \"Wöchentliche Sichtkontrolle / Reinigung\"\n", "text3 = \"3-monatliche Sichtkontrolle / Reinigung\"\n", "text2 = \"Wöchentliche Sichtkontrolle / Reinigun\"\n", "#text2 = \"Nach dem Batterie-Wechsel gingen alle Lichter aus\"\n", "sentences = [text1, text2]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "text1 = \"Wöchentliche Sichtkontrolle / Reinigung\"\n", "text3 = \"Tägliche Kontrolle der Wasseraufbereitungsanlagen\"\n", "text2 = \"Wöchentliche Kontrolle der Wasseraufbereitungsanlagen\"\n", "text4 = \"Täglihce Kontolle der Wasseraufberitungsanlagen\"\n", "#text2 = \"Nach dem Batterie-Wechsel gingen alle Lichter aus\"\n", "sentences = [text1, text2]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#text1 = 'Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers\\n'\n", "#text3 = 'Tägliche Wartungstätigkeiten nach Vorgabe des Maschinenherstellers'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\foersterflorian\\mambaforge\\envs\\test\\Lib\\site-packages\\torch\\_utils.py:776: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Cosine-Similarity t1+2: tensor([[0.4740]])\n", "Cosine-Similarity t1+3: tensor([[0.4360]])\n", "Cosine-Similarity t2+3: tensor([[0.9494]])\n", "Cosine-Similarity t2+4: tensor([[0.7007]])\n" ] }, { "data": { "text/plain": [ "'\\n# Print the embeddings\\nfor sentence, embedding in zip(sentences, sentence_embeddings):\\n print(\"Sentence:\", sentence)\\n print(\"Embedding:\", embedding)\\n print(\"\")\\n'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sentence_transformers import SentenceTransformer, util\n", "\n", "#model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", "#model = SentenceTransformer(\"all-mpnet-base-v2 \")\n", "model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')\n", "\n", "# Our sentences we like to encode\n", "\"\"\"\n", "sentences = [\n", " \"This framework generates embeddings for each input sentence\",\n", " \"Sentences are passed as a list of string.\",\n", " \"The quick brown fox jumps over the lazy dog.\",\n", "]\n", "\"\"\"\n", "\n", "# Sentences are encoded by calling model.encode()\n", "sentence_embeddings = model.encode(sentences)\n", "t1 = model.encode(text1)\n", "t2 = model.encode(text2)\n", "t3 = model.encode(text3)\n", "t4 = model.encode(text4)\n", "\n", "cos_sim = util.cos_sim(t1, t2)\n", "print(\"Cosine-Similarity t1+2:\", cos_sim)\n", "cos_sim = util.cos_sim(t1, t3)\n", "print(\"Cosine-Similarity t1+3:\", cos_sim)\n", "cos_sim = util.cos_sim(t2, t3)\n", "print(\"Cosine-Similarity t2+3:\", cos_sim)\n", "cos_sim = util.cos_sim(t2, t4)\n", "print(\"Cosine-Similarity t2+4:\", cos_sim)\n", "\n", "\"\"\"\n", "# Print the embeddings\n", "for sentence, embedding in zip(sentences, sentence_embeddings):\n", " print(\"Sentence:\", sentence)\n", " print(\"Embedding:\", embedding)\n", " print(\"\")\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.ndarray" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(t4)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "isinstance(model, int)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7007368206977844" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cos_sim.item()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cosine-Similarity: tensor([[0.6153]])\n" ] } ], "source": [ "from sentence_transformers import SentenceTransformer, util\n", "\n", "model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", "\n", "# Sentences are encoded by calling model.encode()\n", "emb1 = model.encode(\"This is a red cat with a hat.\")\n", "emb2 = model.encode(\"Have you seen my red cat?\")\n", "\n", "cos_sim = util.cos_sim(emb1, emb2)\n", "print(\"Cosine-Similarity:\", cos_sim)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 2 }