STRF for similarity duplicates, time analysis pipeline, enhanced config
This commit is contained in:
BIN
test-notebooks/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
Normal file
BIN
test-notebooks/dashboard/Pipe-TargetFeature_Step-3_remove_NA.pkl
Normal file
Binary file not shown.
Binary file not shown.
@@ -1,28 +1,42 @@
|
||||
from typing import cast
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
from dash import (
|
||||
Dash,
|
||||
html,
|
||||
dcc,
|
||||
callback,
|
||||
Output,
|
||||
Input,
|
||||
Output,
|
||||
State,
|
||||
callback,
|
||||
dash_table,
|
||||
dcc,
|
||||
html,
|
||||
)
|
||||
import plotly.express as px
|
||||
import pandas as pd
|
||||
from lang_main import load_pickle
|
||||
from lang_main.types import ObjectID, TimelineCandidates
|
||||
from pandas import DataFrame
|
||||
|
||||
from lang_main import load_pickle
|
||||
from lang_main.types import TimelineCandidates, ObjectID
|
||||
|
||||
#df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
|
||||
|
||||
# ** data
|
||||
data = cast(DataFrame, load_pickle('./data.pkl'))
|
||||
cands = cast(TimelineCandidates, load_pickle('./map_candidates.pkl'))
|
||||
texts = cast(dict[ObjectID, str], load_pickle('./map_texts.pkl'))
|
||||
p_df = Path(r'.\test-notebooks\dashboard\Pipe-TargetFeature_Step-3_remove_NA.pkl')
|
||||
p_tl = Path(
|
||||
r'.\test-notebooks\dashboard\Pipe-Timeline_Analysis_Step-4_get_timeline_candidates.pkl'
|
||||
)
|
||||
ret = cast(DataFrame, load_pickle(p_df))
|
||||
data = ret[0]
|
||||
ret = cast(tuple[TimelineCandidates, dict[ObjectID, str]], load_pickle(p_tl))
|
||||
cands = ret[0]
|
||||
texts = ret[1]
|
||||
|
||||
# p_df = Path(r'.\test-notebooks\dashboard\data.pkl')
|
||||
# p_cands = Path(r'.\test-notebooks\dashboard\map_candidates.pkl')
|
||||
# p_map = Path(r'.\test-notebooks\dashboard\map_texts.pkl')
|
||||
# data = cast(DataFrame, load_pickle(p_df))
|
||||
# cands = cast(TimelineCandidates, load_pickle(p_cands))
|
||||
# texts = cast(dict[ObjectID, str], load_pickle(p_map))
|
||||
|
||||
table_feats = [
|
||||
'ErstellungsDatum',
|
||||
'ErledigungsDatum',
|
||||
@@ -52,25 +66,28 @@ hover_data = {
|
||||
app = Dash(prevent_initial_callbacks=True)
|
||||
|
||||
app.layout = [
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign':'center'}),
|
||||
html.Div(children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
placeholder="ObjektID auswählen...",
|
||||
)
|
||||
]),
|
||||
html.Div(children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
]),
|
||||
html.Div(children=[
|
||||
dash_table.DataTable(id='table-candidates')
|
||||
]),
|
||||
html.H1(children='Demo Zeitreihenanalyse', style={'textAlign': 'center'}),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H2('Wählen Sie ein Objekt aus (ObjektID):'),
|
||||
dcc.Dropdown(
|
||||
list(cands.keys()),
|
||||
id='dropdown-selection',
|
||||
placeholder='ObjektID auswählen...',
|
||||
),
|
||||
]
|
||||
),
|
||||
html.Div(
|
||||
children=[
|
||||
html.H3(id='object_text'),
|
||||
dcc.Dropdown(id='choice-candidates'),
|
||||
dcc.Graph(id='graph-output'),
|
||||
]
|
||||
),
|
||||
html.Div(children=[dash_table.DataTable(id='table-candidates')]),
|
||||
]
|
||||
|
||||
|
||||
@callback(
|
||||
Output('object_text', 'children'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
@@ -82,6 +99,7 @@ def update_obj_text(obj_id):
|
||||
headline = f'HObjektText: {obj_text}'
|
||||
return headline
|
||||
|
||||
|
||||
@callback(
|
||||
Output('choice-candidates', 'options'),
|
||||
Input('dropdown-selection', 'value'),
|
||||
@@ -90,9 +108,10 @@ def update_obj_text(obj_id):
|
||||
def update_choice_candidates(obj_id):
|
||||
obj_id = int(obj_id)
|
||||
cands_obj_id = cands[obj_id]
|
||||
choices = list(range(1, len(cands_obj_id)+1))
|
||||
choices = list(range(1, len(cands_obj_id) + 1))
|
||||
return choices
|
||||
|
||||
|
||||
@callback(
|
||||
Output('graph-output', 'figure'),
|
||||
Input('choice-candidates', 'value'),
|
||||
@@ -106,7 +125,7 @@ def update_timeline(index, obj_id):
|
||||
title = f'HObjektText: {obj_text}'
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index)-1]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index()
|
||||
# figure
|
||||
@@ -117,22 +136,18 @@ def update_timeline(index, obj_id):
|
||||
title=title,
|
||||
hover_data=hover_data,
|
||||
)
|
||||
fig.update_traces(
|
||||
mode='markers+lines',
|
||||
marker=markers,
|
||||
marker_symbol='diamond'
|
||||
)
|
||||
fig.update_traces(mode='markers+lines', marker=markers, marker_symbol='diamond')
|
||||
fig.update_xaxes(
|
||||
tickformat="%B\n%Y",
|
||||
tickformat='%B\n%Y',
|
||||
rangeslider_visible=True,
|
||||
)
|
||||
fig.update_yaxes(type='category')
|
||||
fig.update_layout(hovermode="x unified")
|
||||
fig.update_layout(hovermode='x unified')
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
[Output('table-candidates', 'data'),
|
||||
Output('table-candidates', 'columns')],
|
||||
[Output('table-candidates', 'data'), Output('table-candidates', 'columns')],
|
||||
Input('choice-candidates', 'value'),
|
||||
State('dropdown-selection', 'value'),
|
||||
prevent_initial_call=True,
|
||||
@@ -141,19 +156,20 @@ def update_table_candidates(index, obj_id):
|
||||
obj_id = int(obj_id)
|
||||
# cands
|
||||
cands_obj_id = cands[obj_id]
|
||||
cands_choice = cands_obj_id[int(index)-1]
|
||||
cands_choice = cands_obj_id[int(index) - 1]
|
||||
# data
|
||||
df = data.loc[list(cands_choice)].sort_index()
|
||||
df = (df
|
||||
.filter(items=table_feats, axis=1)
|
||||
.sort_values(by='ErstellungsDatum', ascending=True))
|
||||
cols = [{"name": i, "id": i} for i in df.columns]
|
||||
df = df.filter(items=table_feats, axis=1).sort_values(
|
||||
by='ErstellungsDatum', ascending=True
|
||||
)
|
||||
cols = [{'name': i, 'id': i} for i in df.columns]
|
||||
# convert dates to strings
|
||||
for col in table_feats_dates:
|
||||
df[col] = df[col].dt.strftime(r'%Y-%m-%d')
|
||||
|
||||
|
||||
table_data = df.to_dict('records')
|
||||
return table_data, cols
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
||||
app.run(debug=True)
|
||||
|
||||
56
test-notebooks/dashboard/lang_main_config.toml
Normal file
56
test-notebooks/dashboard/lang_main_config.toml
Normal file
@@ -0,0 +1,56 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
[control]
|
||||
preprocessing = true
|
||||
preprocessing_skip = false
|
||||
token_analysis = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis = false
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
input_features = [
|
||||
'VorgangsTypName',
|
||||
'VorgangsArtText',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user