prepare timeline postprocessing

This commit is contained in:
Florian Förster 2024-05-08 16:37:47 +02:00
parent 5ed9435c66
commit df16b29191
2 changed files with 123 additions and 37 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@ results/
data/
datasets/
**/spacy*/output/
**/iframe_figures/
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@ -242,7 +242,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
@ -257,7 +257,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 24,
"metadata": {},
"outputs": [
{
@ -306,7 +306,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 25,
"metadata": {},
"outputs": [
{
@ -316,7 +316,7 @@
" dtype=object)"
]
},
"execution_count": 10,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@ -327,7 +327,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 26,
"metadata": {},
"outputs": [
{
@ -348,7 +348,7 @@
"Name: count, Length: 366, dtype: int64"
]
},
"execution_count": 11,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@ -366,7 +366,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 27,
"metadata": {},
"outputs": [
{
@ -409,7 +409,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@ -423,7 +423,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@ -432,7 +432,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 30,
"metadata": {},
"outputs": [
{
@ -483,7 +483,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 31,
"metadata": {},
"outputs": [
{
@ -585,7 +585,7 @@
"[1 rows x 21 columns]"
]
},
"execution_count": 17,
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@ -596,7 +596,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 32,
"metadata": {},
"outputs": [
{
@ -1501,7 +1501,7 @@
"[26 rows x 21 columns]"
]
},
"execution_count": 19,
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@ -1526,7 +1526,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 33,
"metadata": {},
"outputs": [
{
@ -1538,7 +1538,7 @@
"Name: count, dtype: int64"
]
},
"execution_count": 20,
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@ -1550,7 +1550,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@ -1565,7 +1565,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 35,
"metadata": {},
"outputs": [
{
@ -1592,7 +1592,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 36,
"metadata": {},
"outputs": [
{
@ -1761,7 +1761,7 @@
"[3 rows x 21 columns]"
]
},
"execution_count": 22,
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@ -1773,38 +1773,123 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"import plotly.express as px\n",
"import plotly.io as pio\n",
"pio.renderers.default = 'iframe'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"def filter_df(data, obj_id, cands):\n",
" res = data.loc[cands]\n",
" texts = res['HObjektText']\n",
" obj_text = texts.loc[~(texts.isna())].iat[0]\n",
" obj_text = obj_text.strip(r' ,.:')\n",
" return obj_text"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"text = filter_df(anlys_data, 640, [14720, 27393, 3878])"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'plotly'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[23], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mplotly\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexpress\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpx\u001b[39;00m\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'plotly'"
"data": {
"text/plain": [
"'R35, Schnelllauftor'"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.express as px"
"text"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": []
"source": [
"t = res['ErstellungsDatum'].sort_values(ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"14720 2019-07-03\n",
"27393 2019-09-11\n",
"3878 2021-11-24\n",
"Name: ErstellungsDatum, dtype: datetime64[ns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<iframe\n",
" scrolling=\"no\"\n",
" width=\"100%\"\n",
" height=\"545px\"\n",
" src=\"iframe_figures/figure_57.html\"\n",
" frameborder=\"0\"\n",
" allowfullscreen\n",
"></iframe>\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.scatter(data_frame=res, x='ErstellungsDatum')\n",
"fig.update_yaxes(type='category')"
]
},
{
"cell_type": "code",