Usuń 'run.ipynb'
This commit is contained in:
parent
03f141983c
commit
b0478d6de5
118
run.ipynb
118
run.ipynb
@ -1,118 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 30,
|
|
||||||
"id": "52034f8c",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import re\n",
|
|
||||||
"import csv"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"id": "f8526769",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"states = [\"Alaska\", \"Alabama\", \"Arkansas\", \"American Samoa\", \"Arizona\", \"California\", \"Colorado\", \"Connecticut\", \"District \", \"of Columbia\", \"Delaware\", \"Florida\", \"Georgia\", \"Guam\", \"Hawaii\", \"Iowa\", \"Idaho\", \"Illinois\", \"Indiana\", \"Kansas\", \"Kentucky\", \"Louisiana\", \"Massachusetts\", \"Maryland\", \"Maine\", \"Michigan\", \"Minnesota\", \"Missouri\", \"Mississippi\", \"Montana\", \"North Carolina\", \"North Dakota\", \"Nebraska\", \"New Hampshire\", \"New Jersey\", \"New Mexico\", \"Nevada\", \"New York\", \"Ohio\", \"Oklahoma\", \"Oregon\", \"Pennsylvania\", \"Puerto Rico\", \"Rhode Island\", \"South Carolina\", \"South Dakota\", \"Tennessee\", \"Texas\", \"Utah\", \"Virginia\", \"Virgin Islands\", \"Vermont\", \"Washington\", \"Wisconsin\", \"West Virginia\", \"Wyoming\"]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"id": "e2195a35",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"rgx = re.compile(r'\\b(' + '|'.join(states) + r')\\b')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 44,
|
|
||||||
"id": "edee6c83",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def nda(path_in, path_out):\n",
|
|
||||||
" results = []\n",
|
|
||||||
" with open(path_in, 'r', encoding='utf-8') as in_file,\\\n",
|
|
||||||
" open(path_out, 'w') as out_file:\n",
|
|
||||||
" for line in in_file.readlines():\n",
|
|
||||||
" line = line.replace('.', ' ').replace(',', ' ').lower()\n",
|
|
||||||
" words = line.split()\n",
|
|
||||||
" jur = rgx.search(line)\n",
|
|
||||||
" if jur:\n",
|
|
||||||
" results.append('jurisdiction=' + jur.group().replace(' ', '_'))\n",
|
|
||||||
" #else:\n",
|
|
||||||
" # results.append('\\n')\n",
|
|
||||||
" date = re.findall(r'(\\d+-\\d+-\\d+)',line)\n",
|
|
||||||
" if date:\n",
|
|
||||||
" results.append('effective_date=' + jur.group().replace(' ', '_'))\n",
|
|
||||||
" results.append('\\n')\n",
|
|
||||||
"\n",
|
|
||||||
" for r in results:\n",
|
|
||||||
" out_file.write(r + '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 46,
|
|
||||||
"id": "319f7898",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"ename": "FileNotFoundError",
|
|
||||||
"evalue": "[Errno 2] No such file or directory: 'train/in.tsv'",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
||||||
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
||||||
"Input \u001b[1;32mIn [46]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m#pliki\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mnda\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtrain/in.tsv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtrain/out.tsv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m nda(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdev-0/in.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdev-0/out.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 4\u001b[0m nda(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest-A/in.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest-A/out.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
|
||||||
"Input \u001b[1;32mIn [44]\u001b[0m, in \u001b[0;36mnda\u001b[1;34m(path_in, path_out)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnda\u001b[39m(path_in, path_out):\n\u001b[0;32m 2\u001b[0m results \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpath_in\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m in_file,\\\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mopen\u001b[39m(path_out, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m out_file:\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m line \u001b[38;5;129;01min\u001b[39;00m in_file\u001b[38;5;241m.\u001b[39mreadlines():\n\u001b[0;32m 6\u001b[0m line \u001b[38;5;241m=\u001b[39m line\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mlower()\n",
|
|
||||||
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'train/in.tsv'"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"#pliki\n",
|
|
||||||
"nda('train/in.tsv', 'train/out.tsv')\n",
|
|
||||||
"nda('dev-0/in.tsv', 'dev-0/out.tsv')\n",
|
|
||||||
"nda('test-A/in.tsv', 'test-A/out.tsv')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "056e463f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.9.12"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user