fix in notebooks plus join

This commit is contained in:
Michał Kozłowski 2023-01-07 15:21:57 +01:00
parent 1f08432fbf
commit 9088a3dc10
3 changed files with 88 additions and 13 deletions

View File

@ -343,7 +343,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.15" "version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:41:22) [MSC v.1929 64 bit (AMD64)]"
}, },
"orig_nbformat": 4, "orig_nbformat": 4,
"vscode": { "vscode": {

View File

@ -262,24 +262,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 95, "execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"b = Image.open(requests.get(f\"https:{a['image_url'][0]}\", stream=True).raw)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "UnidentifiedImageError", "data": {
"evalue": "cannot identify image file <_io.BytesIO object at 0x0000021DA1EA14F0>", "text/plain": [
"output_type": "error", "(1024, 1486)"
"traceback": [ ]
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", },
"\u001b[1;31mUnidentifiedImageError\u001b[0m Traceback (most recent call last)", "execution_count": 108,
"Cell \u001b[1;32mIn[95], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m b \u001b[39m=\u001b[39m Image\u001b[39m.\u001b[39;49mopen(requests\u001b[39m.\u001b[39;49mget(\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mhttps:\u001b[39;49m\u001b[39m{\u001b[39;49;00ma[\u001b[39m'\u001b[39;49m\u001b[39mimage_url\u001b[39;49m\u001b[39m'\u001b[39;49m][\u001b[39m4\u001b[39;49m]\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m, stream\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39;49mraw)\n", "metadata": {},
"File \u001b[1;32mc:\\Users\\PC\\anaconda3\\envs\\um\\lib\\site-packages\\PIL\\Image.py:3147\u001b[0m, in \u001b[0;36mopen\u001b[1;34m(fp, mode, formats)\u001b[0m\n\u001b[0;32m 3145\u001b[0m \u001b[39mfor\u001b[39;00m message \u001b[39min\u001b[39;00m accept_warnings:\n\u001b[0;32m 3146\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message)\n\u001b[1;32m-> 3147\u001b[0m \u001b[39mraise\u001b[39;00m UnidentifiedImageError(\n\u001b[0;32m 3148\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mcannot identify image file \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m (filename \u001b[39mif\u001b[39;00m filename \u001b[39melse\u001b[39;00m fp)\n\u001b[0;32m 3149\u001b[0m )\n", "output_type": "execute_result"
"\u001b[1;31mUnidentifiedImageError\u001b[0m: cannot identify image file <_io.BytesIO object at 0x0000021DA1EA14F0>"
]
} }
], ],
"source": [ "source": [
"b = Image.open(requests.get(f\"https:{a['image_url'][4]}\", stream=True).raw)" "b."
] ]
}, },
{ {

68
join.ipynb Normal file
View File

@ -0,0 +1,68 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"yellow = pd.read_csv(\"../wikisource-data/yellow.tsv\", sep=\"\\t\")\n",
"yellow_c = pd.read_csv(\"../wikisource-data/yellow-continue-yellow.tsv.tsv\", sep=\"\\t\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"whole = pd.concat([yellow, yellow_c], axis=0)\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"whole.to_csv(\"./yellow-full.tsv\", sep=\"\\t\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "um",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "876e189cbbe99a9a838ece62aae1013186c4bb7e0254a10cfa2f9b2381853efb"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}