Usuń 'run.py'

2022-05-03 20:58:10 +02:00 · 2022-05-03 20:58:10 +02:00 · e5dd8a1bd8
commit e5dd8a1bd8
parent 72a63348bd
1 changed files with 0 additions and 97 deletions
--- a/run.py
+++ b/run.py
@ -1,97 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "405da850",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "616f3992",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "states = [\"Alaska\", \"Alabama\", \"Arkansas\", \"American Samoa\", \"Arizona\", \"California\", \"Colorado\", \"Connecticut\", \"District \", \"of Columbia\", \"Delaware\", \"Florida\", \"Georgia\", \"Guam\", \"Hawaii\", \"Iowa\", \"Idaho\", \"Illinois\", \"Indiana\", \"Kansas\", \"Kentucky\", \"Louisiana\", \"Massachusetts\", \"Maryland\", \"Maine\", \"Michigan\", \"Minnesota\", \"Missouri\", \"Mississippi\", \"Montana\", \"North Carolina\", \"North Dakota\", \"Nebraska\", \"New Hampshire\", \"New Jersey\", \"New Mexico\", \"Nevada\", \"New York\", \"Ohio\", \"Oklahoma\", \"Oregon\", \"Pennsylvania\", \"Puerto Rico\", \"Rhode Island\", \"South Carolina\", \"South Dakota\", \"Tennessee\", \"Texas\", \"Utah\", \"Virginia\", \"Virgin Islands\", \"Vermont\", \"Washington\", \"Wisconsin\", \"West Virginia\", \"Wyoming\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "dfa9b7c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "rgx = re.compile(r'\\b(' + '|'.join(states) + r')\\b')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "05c88c78",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def nda(path_in, path_out):\n",
-    "    #path_in = lzma.open(path_in).read().decode()\n",
-    "    results = []\n",
-    "    with open(path_in, 'r', encoding='utf-8') as file:\n",
-    "        #removeAccents(path_in)\n",
-    "        for line in file.readlines():\n",
-    "            line = line.replace('.', ' ').replace(',', ' ').lower()\n",
-    "            words = line.split()\n",
-    "            jur = rgx.search(line)\n",
-    "            if jur:\n",
-    "                results.append('jurisdiction=' + jur.group().replace(' ', '_'))\n",
-    "            #else:\n",
-    "            #    results.append('\\n')\n",
-    "            date = re.findall(r'(\\d+-\\d+-\\d+)',line)\n",
-    "            if date:\n",
-    "                results.append('effective_date=' + jur.group().replace(' ', '_'))\n",
-    "            results.append('\\n')\n",
-    "    with open(path_out, 'w') as file:\n",
-    "        for r in results:\n",
-    "            file.write(r + '\\n')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "71adc3b1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#pliki\n",
-    "nda('dev-0/in.tsv', 'dev-0/out.tsv')\n",
-    "nda('train/in.tsv', 'train/out.tsv')\n",
-    "nda('test-A/in.tsv', 'test-A/out.tsv')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}