78 lines
2.3 KiB
Python
78 lines
2.3 KiB
Python
import pytest
|
|
|
|
from lang_main import model_loader
|
|
from lang_main.analysis import graphs, tokens
|
|
from lang_main.types import SpacyModelTypes
|
|
|
|
SENTENCE = (
|
|
'Ich ging am 22.05. mit ID 0912393 schnell über die Wiese zu einem Menschen, '
|
|
'um ihm zu helfen. Ich konnte nicht mit ansehen, wie er Probleme beim Tragen '
|
|
'seiner Tasche hatte.'
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope='module')
|
|
def spacy_model():
|
|
model = model_loader.load_spacy(
|
|
model_name=SpacyModelTypes.DE_CORE_NEWS_SM,
|
|
)
|
|
return model
|
|
|
|
|
|
def test_pre_clean_word():
|
|
string = 'Öl3bad2024prüfung'
|
|
assert tokens.pre_clean_word(string) == 'Ölbadprüfung'
|
|
|
|
|
|
def test_is_str_date():
|
|
string = '22.05.'
|
|
assert tokens.is_str_date(string, fuzzy=True)
|
|
string = '22.05.2024'
|
|
assert tokens.is_str_date(string)
|
|
string = '22-05-2024'
|
|
assert tokens.is_str_date(string)
|
|
string = '9009090909'
|
|
assert not tokens.is_str_date(string)
|
|
string = 'hello347'
|
|
assert not tokens.is_str_date(string)
|
|
|
|
|
|
# TODO: depends on fixed Constants
|
|
def test_obtain_relevant_descendants(spacy_model):
|
|
doc = spacy_model(SENTENCE)
|
|
sent1 = tuple(doc.sents)[0] # first sentence
|
|
word1 = sent1[1] # word "ging" (POS:VERB)
|
|
descendants1 = ('ID', '0912393', 'schnell', 'Wiese', 'Menschen')
|
|
rel_descs = tokens.obtain_relevant_descendants(word1)
|
|
rel_descs = tuple((token.text for token in rel_descs))
|
|
assert descendants1 == rel_descs
|
|
|
|
sent2 = tuple(doc.sents)[1] # first sentence
|
|
word2 = sent2[1] # word "konnte" (POS:AUX)
|
|
descendants2 = ('Probleme', 'Tragen', 'Tasche')
|
|
rel_descs = tokens.obtain_relevant_descendants(word2)
|
|
rel_descs = tuple((token.text for token in rel_descs))
|
|
assert descendants2 == rel_descs
|
|
|
|
|
|
def test_add_doc_info_to_graph(spacy_model):
|
|
doc = spacy_model(SENTENCE)
|
|
tk_graph = graphs.TokenGraph()
|
|
tokens.add_doc_info_to_graph(tk_graph, doc, weight=2)
|
|
assert len(tk_graph.nodes) == 11
|
|
assert len(tk_graph.edges) == 16
|
|
assert '0912393' in tk_graph.nodes
|
|
|
|
|
|
def test_build_token_graph(
|
|
data_merge_similarity_duplicates,
|
|
spacy_model,
|
|
data_tk_graph_built,
|
|
):
|
|
tk_graph, _ = tokens.build_token_graph(
|
|
data=data_merge_similarity_duplicates,
|
|
model=spacy_model,
|
|
)
|
|
assert len(tk_graph.nodes) == len(data_tk_graph_built.nodes)
|
|
assert len(tk_graph.edges) == len(data_tk_graph_built.edges)
|