forked from kubapok/retroc2
1106 lines
36 KiB
Plaintext
1106 lines
36 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 59,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import gensim\n",
|
|||
|
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
|||
|
"from sklearn.linear_model import LinearRegression"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1985.493151</td>\n",
|
|||
|
" <td>1985.495890</td>\n",
|
|||
|
" <td>PRZEKRÓJ</td>\n",
|
|||
|
" <td>MBC</td>\n",
|
|||
|
" <td>nowią część kultury. U nas już nikt ich nie ch...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1926.473973</td>\n",
|
|||
|
" <td>1926.476712</td>\n",
|
|||
|
" <td>NADWIŚLANIN</td>\n",
|
|||
|
" <td>KPBC</td>\n",
|
|||
|
" <td>hlstorja znana w okresie piramid, jak wlaśclcl...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2013.961644</td>\n",
|
|||
|
" <td>2013.964384</td>\n",
|
|||
|
" <td>SĄD APELACYJNY W ŁODZI I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>działek. Idąc dalej w swych hipotetycznych roz...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1925.000000</td>\n",
|
|||
|
" <td>1926.000000</td>\n",
|
|||
|
" <td>GAZETA BANKOWA</td>\n",
|
|||
|
" <td>eBUW</td>\n",
|
|||
|
" <td>w Warszawie o stosunkach domowych dziatwy szko...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>1981.000000</td>\n",
|
|||
|
" <td>1982.000000</td>\n",
|
|||
|
" <td>CIA</td>\n",
|
|||
|
" <td>ZBC</td>\n",
|
|||
|
" <td>\\\\'iykład: \"Cywilizacyjna Koncepcja dziejów ¥e...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107458</td>\n",
|
|||
|
" <td>2013.057534</td>\n",
|
|||
|
" <td>2013.060274</td>\n",
|
|||
|
" <td>SĄD REJONOWY DLA WROCŁAWIA-ŚRÓDMIEŚCIA WE WROC...</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>M. (2) na rzecz powoda M. S. kwotę 5003,66 zł ...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107459</td>\n",
|
|||
|
" <td>2013.021918</td>\n",
|
|||
|
" <td>2013.024658</td>\n",
|
|||
|
" <td>WYROK W SPRAWIE KIO</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>Zintegrowanego Systemu Informatycznego (ZSI), ...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107460</td>\n",
|
|||
|
" <td>2013.920548</td>\n",
|
|||
|
" <td>2013.923288</td>\n",
|
|||
|
" <td>SĄD OKRĘGOWY W PIOTRKOWIE TRYBUNALSKIM IV WYDZ...</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>prokurator. Wyrokowi temu powołując się na prz...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107461</td>\n",
|
|||
|
" <td>2013.082192</td>\n",
|
|||
|
" <td>2013.084931</td>\n",
|
|||
|
" <td>SĄD REJONOWY W JELENIEJ GÓRZE I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>07 lipca 2010 r. świadczą o tym, że nie wszyst...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107462</td>\n",
|
|||
|
" <td>2013.098630</td>\n",
|
|||
|
" <td>2013.101370</td>\n",
|
|||
|
" <td>SĄD OKRĘGOWY W ELBLĄGU I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>zatem niezdolności do pracy było schorzenie sa...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>107463 rows × 5 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" 0 1 \\\n",
|
|||
|
"0 1985.493151 1985.495890 \n",
|
|||
|
"1 1926.473973 1926.476712 \n",
|
|||
|
"2 2013.961644 2013.964384 \n",
|
|||
|
"3 1925.000000 1926.000000 \n",
|
|||
|
"4 1981.000000 1982.000000 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 2013.057534 2013.060274 \n",
|
|||
|
"107459 2013.021918 2013.024658 \n",
|
|||
|
"107460 2013.920548 2013.923288 \n",
|
|||
|
"107461 2013.082192 2013.084931 \n",
|
|||
|
"107462 2013.098630 2013.101370 \n",
|
|||
|
"\n",
|
|||
|
" 2 3 \\\n",
|
|||
|
"0 PRZEKRÓJ MBC \n",
|
|||
|
"1 NADWIŚLANIN KPBC \n",
|
|||
|
"2 SĄD APELACYJNY W ŁODZI I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"3 GAZETA BANKOWA eBUW \n",
|
|||
|
"4 CIA ZBC \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 SĄD REJONOWY DLA WROCŁAWIA-ŚRÓDMIEŚCIA WE WROC... SAOS \n",
|
|||
|
"107459 WYROK W SPRAWIE KIO SAOS \n",
|
|||
|
"107460 SĄD OKRĘGOWY W PIOTRKOWIE TRYBUNALSKIM IV WYDZ... SAOS \n",
|
|||
|
"107461 SĄD REJONOWY W JELENIEJ GÓRZE I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"107462 SĄD OKRĘGOWY W ELBLĄGU I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"\n",
|
|||
|
" 4 \n",
|
|||
|
"0 nowią część kultury. U nas już nikt ich nie ch... \n",
|
|||
|
"1 hlstorja znana w okresie piramid, jak wlaśclcl... \n",
|
|||
|
"2 działek. Idąc dalej w swych hipotetycznych roz... \n",
|
|||
|
"3 w Warszawie o stosunkach domowych dziatwy szko... \n",
|
|||
|
"4 \\\\'iykład: \"Cywilizacyjna Koncepcja dziejów ¥e... \n",
|
|||
|
"... ... \n",
|
|||
|
"107458 M. (2) na rzecz powoda M. S. kwotę 5003,66 zł ... \n",
|
|||
|
"107459 Zintegrowanego Systemu Informatycznego (ZSI), ... \n",
|
|||
|
"107460 prokurator. Wyrokowi temu powołując się na prz... \n",
|
|||
|
"107461 07 lipca 2010 r. świadczą o tym, że nie wszyst... \n",
|
|||
|
"107462 zatem niezdolności do pracy było schorzenie sa... \n",
|
|||
|
"\n",
|
|||
|
"[107463 rows x 5 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df = pd.read_csv('./train/train.tsv', header=None, sep='\\t')\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1985.493151</td>\n",
|
|||
|
" <td>1985.495890</td>\n",
|
|||
|
" <td>PRZEKRÓJ</td>\n",
|
|||
|
" <td>MBC</td>\n",
|
|||
|
" <td>nowią część kultury. U nas już nikt ich nie ch...</td>\n",
|
|||
|
" <td>1985.494521</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1926.473973</td>\n",
|
|||
|
" <td>1926.476712</td>\n",
|
|||
|
" <td>NADWIŚLANIN</td>\n",
|
|||
|
" <td>KPBC</td>\n",
|
|||
|
" <td>hlstorja znana w okresie piramid, jak wlaśclcl...</td>\n",
|
|||
|
" <td>1926.475342</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2013.961644</td>\n",
|
|||
|
" <td>2013.964384</td>\n",
|
|||
|
" <td>SĄD APELACYJNY W ŁODZI I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>działek. Idąc dalej w swych hipotetycznych roz...</td>\n",
|
|||
|
" <td>2013.963014</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1925.000000</td>\n",
|
|||
|
" <td>1926.000000</td>\n",
|
|||
|
" <td>GAZETA BANKOWA</td>\n",
|
|||
|
" <td>eBUW</td>\n",
|
|||
|
" <td>w Warszawie o stosunkach domowych dziatwy szko...</td>\n",
|
|||
|
" <td>1925.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>1981.000000</td>\n",
|
|||
|
" <td>1982.000000</td>\n",
|
|||
|
" <td>CIA</td>\n",
|
|||
|
" <td>ZBC</td>\n",
|
|||
|
" <td>\\\\'iykład: \"Cywilizacyjna Koncepcja dziejów ¥e...</td>\n",
|
|||
|
" <td>1981.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107458</td>\n",
|
|||
|
" <td>2013.057534</td>\n",
|
|||
|
" <td>2013.060274</td>\n",
|
|||
|
" <td>SĄD REJONOWY DLA WROCŁAWIA-ŚRÓDMIEŚCIA WE WROC...</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>M. (2) na rzecz powoda M. S. kwotę 5003,66 zł ...</td>\n",
|
|||
|
" <td>2013.058904</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107459</td>\n",
|
|||
|
" <td>2013.021918</td>\n",
|
|||
|
" <td>2013.024658</td>\n",
|
|||
|
" <td>WYROK W SPRAWIE KIO</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>Zintegrowanego Systemu Informatycznego (ZSI), ...</td>\n",
|
|||
|
" <td>2013.023288</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107460</td>\n",
|
|||
|
" <td>2013.920548</td>\n",
|
|||
|
" <td>2013.923288</td>\n",
|
|||
|
" <td>SĄD OKRĘGOWY W PIOTRKOWIE TRYBUNALSKIM IV WYDZ...</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>prokurator. Wyrokowi temu powołując się na prz...</td>\n",
|
|||
|
" <td>2013.921918</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107461</td>\n",
|
|||
|
" <td>2013.082192</td>\n",
|
|||
|
" <td>2013.084931</td>\n",
|
|||
|
" <td>SĄD REJONOWY W JELENIEJ GÓRZE I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>07 lipca 2010 r. świadczą o tym, że nie wszyst...</td>\n",
|
|||
|
" <td>2013.083562</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107462</td>\n",
|
|||
|
" <td>2013.098630</td>\n",
|
|||
|
" <td>2013.101370</td>\n",
|
|||
|
" <td>SĄD OKRĘGOWY W ELBLĄGU I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>zatem niezdolności do pracy było schorzenie sa...</td>\n",
|
|||
|
" <td>2013.100000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>107463 rows × 6 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" 0 1 \\\n",
|
|||
|
"0 1985.493151 1985.495890 \n",
|
|||
|
"1 1926.473973 1926.476712 \n",
|
|||
|
"2 2013.961644 2013.964384 \n",
|
|||
|
"3 1925.000000 1926.000000 \n",
|
|||
|
"4 1981.000000 1982.000000 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 2013.057534 2013.060274 \n",
|
|||
|
"107459 2013.021918 2013.024658 \n",
|
|||
|
"107460 2013.920548 2013.923288 \n",
|
|||
|
"107461 2013.082192 2013.084931 \n",
|
|||
|
"107462 2013.098630 2013.101370 \n",
|
|||
|
"\n",
|
|||
|
" 2 3 \\\n",
|
|||
|
"0 PRZEKRÓJ MBC \n",
|
|||
|
"1 NADWIŚLANIN KPBC \n",
|
|||
|
"2 SĄD APELACYJNY W ŁODZI I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"3 GAZETA BANKOWA eBUW \n",
|
|||
|
"4 CIA ZBC \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 SĄD REJONOWY DLA WROCŁAWIA-ŚRÓDMIEŚCIA WE WROC... SAOS \n",
|
|||
|
"107459 WYROK W SPRAWIE KIO SAOS \n",
|
|||
|
"107460 SĄD OKRĘGOWY W PIOTRKOWIE TRYBUNALSKIM IV WYDZ... SAOS \n",
|
|||
|
"107461 SĄD REJONOWY W JELENIEJ GÓRZE I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"107462 SĄD OKRĘGOWY W ELBLĄGU I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"\n",
|
|||
|
" 4 mean \n",
|
|||
|
"0 nowią część kultury. U nas już nikt ich nie ch... 1985.494521 \n",
|
|||
|
"1 hlstorja znana w okresie piramid, jak wlaśclcl... 1926.475342 \n",
|
|||
|
"2 działek. Idąc dalej w swych hipotetycznych roz... 2013.963014 \n",
|
|||
|
"3 w Warszawie o stosunkach domowych dziatwy szko... 1925.500000 \n",
|
|||
|
"4 \\\\'iykład: \"Cywilizacyjna Koncepcja dziejów ¥e... 1981.500000 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 M. (2) na rzecz powoda M. S. kwotę 5003,66 zł ... 2013.058904 \n",
|
|||
|
"107459 Zintegrowanego Systemu Informatycznego (ZSI), ... 2013.023288 \n",
|
|||
|
"107460 prokurator. Wyrokowi temu powołując się na prz... 2013.921918 \n",
|
|||
|
"107461 07 lipca 2010 r. świadczą o tym, że nie wszyst... 2013.083562 \n",
|
|||
|
"107462 zatem niezdolności do pracy było schorzenie sa... 2013.100000 \n",
|
|||
|
"\n",
|
|||
|
"[107463 rows x 6 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df['mean'] = (df.iloc[:, 0] + df.iloc[:, 1])/2\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"df['tokenized'] = [list(set(gensim.utils.tokenize(x, lowercase = True))) for x in df[4]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <th>tokenized</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1985.493151</td>\n",
|
|||
|
" <td>1985.495890</td>\n",
|
|||
|
" <td>PRZEKRÓJ</td>\n",
|
|||
|
" <td>MBC</td>\n",
|
|||
|
" <td>nowią część kultury. U nas już nikt ich nie ch...</td>\n",
|
|||
|
" <td>1985.494521</td>\n",
|
|||
|
" <td>[o, poparzonego, pa, prawda, zdecydowała, btll...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1926.473973</td>\n",
|
|||
|
" <td>1926.476712</td>\n",
|
|||
|
" <td>NADWIŚLANIN</td>\n",
|
|||
|
" <td>KPBC</td>\n",
|
|||
|
" <td>hlstorja znana w okresie piramid, jak wlaśclcl...</td>\n",
|
|||
|
" <td>1926.475342</td>\n",
|
|||
|
" <td>[o, szynki, got, nie, rowy, wynikuwymierzonego...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2013.961644</td>\n",
|
|||
|
" <td>2013.964384</td>\n",
|
|||
|
" <td>SĄD APELACYJNY W ŁODZI I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>działek. Idąc dalej w swych hipotetycznych roz...</td>\n",
|
|||
|
" <td>2013.963014</td>\n",
|
|||
|
" <td>[o, zrealizowania, a, życiowego, uwagę, wiadom...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1925.000000</td>\n",
|
|||
|
" <td>1926.000000</td>\n",
|
|||
|
" <td>GAZETA BANKOWA</td>\n",
|
|||
|
" <td>eBUW</td>\n",
|
|||
|
" <td>w Warszawie o stosunkach domowych dziatwy szko...</td>\n",
|
|||
|
" <td>1925.500000</td>\n",
|
|||
|
" <td>[o, obszerna, uwzględnia, handel, idinia, praw...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>1981.000000</td>\n",
|
|||
|
" <td>1982.000000</td>\n",
|
|||
|
" <td>CIA</td>\n",
|
|||
|
" <td>ZBC</td>\n",
|
|||
|
" <td>\\\\'iykład: \"Cywilizacyjna Koncepcja dziejów ¥e...</td>\n",
|
|||
|
" <td>1981.500000</td>\n",
|
|||
|
" <td>[o, marazynu, ouiedlowym, nie, dijjliot, ti, w...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107458</td>\n",
|
|||
|
" <td>2013.057534</td>\n",
|
|||
|
" <td>2013.060274</td>\n",
|
|||
|
" <td>SĄD REJONOWY DLA WROCŁAWIA-ŚRÓDMIEŚCIA WE WROC...</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>M. (2) na rzecz powoda M. S. kwotę 5003,66 zł ...</td>\n",
|
|||
|
" <td>2013.058904</td>\n",
|
|||
|
" <td>[uzasadnieniu, o, a, gdyby, kwotę, skład, zast...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107459</td>\n",
|
|||
|
" <td>2013.021918</td>\n",
|
|||
|
" <td>2013.024658</td>\n",
|
|||
|
" <td>WYROK W SPRAWIE KIO</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>Zintegrowanego Systemu Informatycznego (ZSI), ...</td>\n",
|
|||
|
" <td>2013.023288</td>\n",
|
|||
|
" <td>[o, lit, a, ingerencji, uwagę, zastrzeżeniem, ...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107460</td>\n",
|
|||
|
" <td>2013.920548</td>\n",
|
|||
|
" <td>2013.923288</td>\n",
|
|||
|
" <td>SĄD OKRĘGOWY W PIOTRKOWIE TRYBUNALSKIM IV WYDZ...</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>prokurator. Wyrokowi temu powołując się na prz...</td>\n",
|
|||
|
" <td>2013.921918</td>\n",
|
|||
|
" <td>[o, uzasadnieniu, odmiennego, zarówno, a, życi...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107461</td>\n",
|
|||
|
" <td>2013.082192</td>\n",
|
|||
|
" <td>2013.084931</td>\n",
|
|||
|
" <td>SĄD REJONOWY W JELENIEJ GÓRZE I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>07 lipca 2010 r. świadczą o tym, że nie wszyst...</td>\n",
|
|||
|
" <td>2013.083562</td>\n",
|
|||
|
" <td>[o, który, roboty, wszystkie, a, wspólnotę, bu...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>107462</td>\n",
|
|||
|
" <td>2013.098630</td>\n",
|
|||
|
" <td>2013.101370</td>\n",
|
|||
|
" <td>SĄD OKRĘGOWY W ELBLĄGU I WYDZIAŁ CYWILNY</td>\n",
|
|||
|
" <td>SAOS</td>\n",
|
|||
|
" <td>zatem niezdolności do pracy było schorzenie sa...</td>\n",
|
|||
|
" <td>2013.100000</td>\n",
|
|||
|
" <td>[o, który, nieszczęśliwy, uzasadnieniu, a, mia...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>107463 rows × 7 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" 0 1 \\\n",
|
|||
|
"0 1985.493151 1985.495890 \n",
|
|||
|
"1 1926.473973 1926.476712 \n",
|
|||
|
"2 2013.961644 2013.964384 \n",
|
|||
|
"3 1925.000000 1926.000000 \n",
|
|||
|
"4 1981.000000 1982.000000 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 2013.057534 2013.060274 \n",
|
|||
|
"107459 2013.021918 2013.024658 \n",
|
|||
|
"107460 2013.920548 2013.923288 \n",
|
|||
|
"107461 2013.082192 2013.084931 \n",
|
|||
|
"107462 2013.098630 2013.101370 \n",
|
|||
|
"\n",
|
|||
|
" 2 3 \\\n",
|
|||
|
"0 PRZEKRÓJ MBC \n",
|
|||
|
"1 NADWIŚLANIN KPBC \n",
|
|||
|
"2 SĄD APELACYJNY W ŁODZI I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"3 GAZETA BANKOWA eBUW \n",
|
|||
|
"4 CIA ZBC \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 SĄD REJONOWY DLA WROCŁAWIA-ŚRÓDMIEŚCIA WE WROC... SAOS \n",
|
|||
|
"107459 WYROK W SPRAWIE KIO SAOS \n",
|
|||
|
"107460 SĄD OKRĘGOWY W PIOTRKOWIE TRYBUNALSKIM IV WYDZ... SAOS \n",
|
|||
|
"107461 SĄD REJONOWY W JELENIEJ GÓRZE I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"107462 SĄD OKRĘGOWY W ELBLĄGU I WYDZIAŁ CYWILNY SAOS \n",
|
|||
|
"\n",
|
|||
|
" 4 mean \\\n",
|
|||
|
"0 nowią część kultury. U nas już nikt ich nie ch... 1985.494521 \n",
|
|||
|
"1 hlstorja znana w okresie piramid, jak wlaśclcl... 1926.475342 \n",
|
|||
|
"2 działek. Idąc dalej w swych hipotetycznych roz... 2013.963014 \n",
|
|||
|
"3 w Warszawie o stosunkach domowych dziatwy szko... 1925.500000 \n",
|
|||
|
"4 \\\\'iykład: \"Cywilizacyjna Koncepcja dziejów ¥e... 1981.500000 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"107458 M. (2) na rzecz powoda M. S. kwotę 5003,66 zł ... 2013.058904 \n",
|
|||
|
"107459 Zintegrowanego Systemu Informatycznego (ZSI), ... 2013.023288 \n",
|
|||
|
"107460 prokurator. Wyrokowi temu powołując się na prz... 2013.921918 \n",
|
|||
|
"107461 07 lipca 2010 r. świadczą o tym, że nie wszyst... 2013.083562 \n",
|
|||
|
"107462 zatem niezdolności do pracy było schorzenie sa... 2013.100000 \n",
|
|||
|
"\n",
|
|||
|
" tokenized \n",
|
|||
|
"0 [o, poparzonego, pa, prawda, zdecydowała, btll... \n",
|
|||
|
"1 [o, szynki, got, nie, rowy, wynikuwymierzonego... \n",
|
|||
|
"2 [o, zrealizowania, a, życiowego, uwagę, wiadom... \n",
|
|||
|
"3 [o, obszerna, uwzględnia, handel, idinia, praw... \n",
|
|||
|
"4 [o, marazynu, ouiedlowym, nie, dijjliot, ti, w... \n",
|
|||
|
"... ... \n",
|
|||
|
"107458 [uzasadnieniu, o, a, gdyby, kwotę, skład, zast... \n",
|
|||
|
"107459 [o, lit, a, ingerencji, uwagę, zastrzeżeniem, ... \n",
|
|||
|
"107460 [o, uzasadnieniu, odmiennego, zarówno, a, życi... \n",
|
|||
|
"107461 [o, który, roboty, wszystkie, a, wspólnotę, bu... \n",
|
|||
|
"107462 [o, który, nieszczęśliwy, uzasadnieniu, a, mia... \n",
|
|||
|
"\n",
|
|||
|
"[107463 rows x 7 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 54,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"tokenized = df['tokenized'].values"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 55,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"vect = TfidfVectorizer()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 57,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x_train_vect = vect.fit_transform(df[4])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 60,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"reg = LinearRegression().fit(x_train_vect, df['mean'])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 61,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pickle\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 62,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"filename = 'finalized_model.sav'\n",
|
|||
|
"pickle.dump(reg, open(filename, 'wb'))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 63,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>potoku Brodawka swe- o an13.gonistt:, Zd7islaw...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Zll zgrazę .wi.la Okropne d.,.,je przyniósł na...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>działalność wielu placówek kul nie naj istotni...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>po przeglosie warianty s' z', stawszy sit) fon...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td><a i naturaL n ll c h warunk6w, jest gł6umJ(m ...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11557</td>\n",
|
|||
|
" <td>mOlna było nie tylko zobaczyc sylwetkę auta, a...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11558</td>\n",
|
|||
|
" <td>musi bye i bt'd7ie naszI!! Krolestwo Polskie. ...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11559</td>\n",
|
|||
|
" <td>Rzeszy a nawet z P()II- moina %Q,dac jcdunie p...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11560</td>\n",
|
|||
|
" <td>zatopionych okret6w podejrnowano juz dawnlej, ...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11561</td>\n",
|
|||
|
" <td>lista szczęśliwców: Nagrodę główną superzestaw...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>11562 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" 0\n",
|
|||
|
"0 potoku Brodawka swe- o an13.gonistt:, Zd7islaw...\n",
|
|||
|
"1 Zll zgrazę .wi.la Okropne d.,.,je przyniósł na...\n",
|
|||
|
"2 działalność wielu placówek kul nie naj istotni...\n",
|
|||
|
"3 po przeglosie warianty s' z', stawszy sit) fon...\n",
|
|||
|
"4 <a i naturaL n ll c h warunk6w, jest gł6umJ(m ...\n",
|
|||
|
"... ...\n",
|
|||
|
"11557 mOlna było nie tylko zobaczyc sylwetkę auta, a...\n",
|
|||
|
"11558 musi bye i bt'd7ie naszI!! Krolestwo Polskie. ...\n",
|
|||
|
"11559 Rzeszy a nawet z P()II- moina %Q,dac jcdunie p...\n",
|
|||
|
"11560 zatopionych okret6w podejrnowano juz dawnlej, ...\n",
|
|||
|
"11561 lista szczęśliwców: Nagrodę główną superzestaw...\n",
|
|||
|
"\n",
|
|||
|
"[11562 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 63,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"x_test = pd.read_csv('./dev-1/in.tsv', header=None, sep='\\t')\n",
|
|||
|
"x_test"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 66,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1983.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1956.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>1958.105479</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1973.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>1970.883562</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11558</td>\n",
|
|||
|
" <td>2000.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11559</td>\n",
|
|||
|
" <td>1906.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11560</td>\n",
|
|||
|
" <td>1937.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11561</td>\n",
|
|||
|
" <td>1928.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <td>11562</td>\n",
|
|||
|
" <td>1997.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>11563 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" 0\n",
|
|||
|
"0 1983.500000\n",
|
|||
|
"1 1956.500000\n",
|
|||
|
"2 1958.105479\n",
|
|||
|
"3 1973.500000\n",
|
|||
|
"4 1970.883562\n",
|
|||
|
"... ...\n",
|
|||
|
"11558 2000.500000\n",
|
|||
|
"11559 1906.500000\n",
|
|||
|
"11560 1937.500000\n",
|
|||
|
"11561 1928.500000\n",
|
|||
|
"11562 1997.500000\n",
|
|||
|
"\n",
|
|||
|
"[11563 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 66,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"y_test = pd.read_csv('./dev-1/expected.tsv', header=None, sep='\\t')\n",
|
|||
|
"y_test"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 68,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x_test_vect = vect.transform(x_test[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 78,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"(11562, 4401862)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 78,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"x_test_vect.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 70,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred = reg.predict(x_test_vect)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 76,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"11562"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 76,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"len(y_pred)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 73,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from sklearn.metrics import mean_squared_error"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 90,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"11562"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 90,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"len(y_test[0:11562])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 82,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"42.68002861698133"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 82,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"mean_squared_error(y_test[0:11562], y_pred, squared=False)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 91,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x_test_dev0 = pd.read_csv('./dev-0/in.tsv', header=None, sep='\\t')\n",
|
|||
|
"y_test_dev0 = pd.read_csv('./dev-0/expected.tsv', header=None, sep='\\t')\n",
|
|||
|
"x_test_dev0_vect = vect.transform(x_test_dev0[0])\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 92,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred_dev_0 = reg.predict(x_test_dev0_vect)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 93,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"19998"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 93,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"len(y_pred_dev_0)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"20000"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"len(y_test_dev0)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 97,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"66.03561497032095"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 97,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"mean_squared_error(y_test_dev0[0:19998], y_pred_dev_0, squared=False)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 101,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"pd.DataFrame(y_pred_dev_0).to_csv('./dev-0/out.tsv', header=None, sep='\\t', index=False)\n",
|
|||
|
"pd.DataFrame(y_pred).to_csv('./dev-1/out.tsv', header=None, sep='\\t', index=False)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.7.4"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|