ium_s437622/IUM.ipynb

1091 lines
38 KiB
Plaintext
Raw Permalink Normal View History

2021-03-21 18:41:56 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
2021-03-21 19:12:28 +01:00
"id": "stunning-architecture",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/pogoda/.kaggle/kaggle.json'\n",
"Downloading chess.zip to /home/pogoda/dev/IUM\n",
"100%|██████████████████████████████████████| 2.77M/2.77M [00:00<00:00, 9.89MB/s]\n",
"100%|██████████████████████████████████████| 2.77M/2.77M [00:00<00:00, 9.05MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d datasnaek/chess"
]
},
{
"cell_type": "code",
"execution_count": 2,
2021-03-21 19:12:28 +01:00
"id": "sharp-burton",
2021-03-21 18:41:56 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: chess.zip\n",
" inflating: games.csv \n"
]
}
],
"source": [
"!unzip -o chess.zip "
]
},
{
"cell_type": "code",
"execution_count": 3,
2021-03-21 19:12:28 +01:00
"id": "multiple-elder",
"metadata": {
"scrolled": true
},
2021-03-21 18:41:56 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply\r",
"\r\n",
"TZJHLljE,FALSE,1.50421E+12,1.50421E+12,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4,D10,Slav Defense: Exchange Variation,5\r",
"\r\n",
"l1NXvwaE,TRUE,1.50413E+12,1.50413E+12,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6 Qe5+ Nxe5 c4 Bb4+,B00,Nimzowitsch Defense: Kennedy Variation,4\r",
"\r\n",
"mIICvQHh,TRUE,1.50413E+12,1.50413E+12,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc6 bxc6 Ra6 Nc4 a4 c3 a3 Nxa3 Rxa3 Rxa3 c4 dxc4 d5 cxd5 Qxd5 exd5 Be6 Ra8+ Ke7 Bc5+ Kf6 Bxf8 Kg6 Bxg7 Kxg7 dxe6 Kh6 exf7 Nf6 Rxh8 Nh5 Bxh5 Kg5 Rxh7 Kf5 Qf3+ Ke6 Bg4+ Kd6 Rh6+ Kc5 Qe3+ Kb5 c4+ Kb4 Qc3+ Ka4 Bd1#,C20,King's Pawn Game: Leonardis Variation,3\r",
"\r\n",
"kWKvrqYL,TRUE,1.50411E+12,1.50411E+12,61,mate,white,20+0,daniamurashov,1439,adivanov2009,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O-O O-O-O Nb5 Nb4 Rc1 Nxa2 Ra1 Nb4 Nxa7+ Kb8 Nb5 Bxc2 Bxc7+ Kc8 Qd2 Qc6 Na7+ Kd7 Nxc6 bxc6 Bxd8 Kxd8 Qxb4 e5 Qb8+ Ke7 dxe5 Be4 Ra7+ Ke6 Qe8+ Kf5 Qxf7+ Nf6 Nh4+ Kg5 g3 Ng4 Qf4+ Kh5 Qxg4+ Kh6 Qf4+ g5 Qf6+ Bg6 Nxg6 Bg7 Qxg7#,D02,Queen's Pawn Game: Zukertort Variation,3\r",
"\r\n"
]
}
],
"source": [
"!head -n 5 games.csv "
]
},
2021-03-21 19:12:28 +01:00
{
"cell_type": "raw",
"id": "pressed-vancouver",
"metadata": {},
"source": [
"id - identyfikator gry\n",
"rated - czy gra rankingowa\n",
"created_at - timestamp utworzenia gry\n",
"last_move_at - timestamp ostatniego ruchu\n",
"turns - liczba posunięć\n",
"victory_status - sposób zakończenia rozgrywki\n",
"winner - wygrany gracz\n",
"increment_code - czas na partię i dodawany czas\n",
"white_id - id grającego białymi\n",
"white_ranking - ranking grającego białymi\n",
"black_id - id grającego czarnymi\n",
"black_ranking - ranking grającego czarnymi\n",
"moves - lista wykonanych posunięć\n",
"opening_eco - kod otwarcia\n",
"opening_name - nazwa otwarcia\n",
"opening_ply - liczba książkowych posunięć"
]
},
2021-03-21 18:41:56 +01:00
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 50,
2021-03-21 19:12:28 +01:00
"id": "talented-beatles",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>rated</th>\n",
" <th>created_at</th>\n",
" <th>last_move_at</th>\n",
" <th>turns</th>\n",
" <th>victory_status</th>\n",
" <th>winner</th>\n",
" <th>increment_code</th>\n",
" <th>white_id</th>\n",
" <th>white_rating</th>\n",
" <th>black_id</th>\n",
" <th>black_rating</th>\n",
" <th>moves</th>\n",
" <th>opening_eco</th>\n",
" <th>opening_name</th>\n",
" <th>opening_ply</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>TZJHLljE</td>\n",
" <td>False</td>\n",
" <td>1.504210e+12</td>\n",
" <td>1.504210e+12</td>\n",
" <td>13</td>\n",
" <td>outoftime</td>\n",
" <td>white</td>\n",
" <td>15+2</td>\n",
" <td>bourgris</td>\n",
" <td>1500</td>\n",
" <td>a-00</td>\n",
" <td>1191</td>\n",
" <td>d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...</td>\n",
" <td>D10</td>\n",
" <td>Slav Defense: Exchange Variation</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>l1NXvwaE</td>\n",
" <td>True</td>\n",
" <td>1.504130e+12</td>\n",
" <td>1.504130e+12</td>\n",
" <td>16</td>\n",
" <td>resign</td>\n",
" <td>black</td>\n",
" <td>5+10</td>\n",
" <td>a-00</td>\n",
" <td>1322</td>\n",
" <td>skinnerua</td>\n",
" <td>1261</td>\n",
" <td>d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...</td>\n",
" <td>B00</td>\n",
" <td>Nimzowitsch Defense: Kennedy Variation</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>mIICvQHh</td>\n",
" <td>True</td>\n",
" <td>1.504130e+12</td>\n",
" <td>1.504130e+12</td>\n",
" <td>61</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>5+10</td>\n",
" <td>ischia</td>\n",
" <td>1496</td>\n",
" <td>a-00</td>\n",
" <td>1500</td>\n",
" <td>e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...</td>\n",
" <td>C20</td>\n",
" <td>King's Pawn Game: Leonardis Variation</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>kWKvrqYL</td>\n",
" <td>True</td>\n",
" <td>1.504110e+12</td>\n",
" <td>1.504110e+12</td>\n",
" <td>61</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>20+0</td>\n",
" <td>daniamurashov</td>\n",
" <td>1439</td>\n",
" <td>adivanov2009</td>\n",
" <td>1454</td>\n",
" <td>d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...</td>\n",
" <td>D02</td>\n",
" <td>Queen's Pawn Game: Zukertort Variation</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9tXo1AUZ</td>\n",
" <td>True</td>\n",
" <td>1.504030e+12</td>\n",
" <td>1.504030e+12</td>\n",
" <td>95</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>30+3</td>\n",
" <td>nik221107</td>\n",
" <td>1523</td>\n",
" <td>adivanov2009</td>\n",
" <td>1469</td>\n",
" <td>e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...</td>\n",
" <td>C41</td>\n",
" <td>Philidor Defense</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20053</th>\n",
" <td>EfqH7VVH</td>\n",
" <td>True</td>\n",
" <td>1.499791e+12</td>\n",
" <td>1.499791e+12</td>\n",
" <td>24</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+10</td>\n",
" <td>belcolt</td>\n",
" <td>1691</td>\n",
" <td>jamboger</td>\n",
" <td>1220</td>\n",
" <td>d4 f5 e3 e6 Nf3 Nf6 Nc3 b6 Be2 Bb7 O-O Be7 Ne5...</td>\n",
" <td>A80</td>\n",
" <td>Dutch Defense</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20054</th>\n",
" <td>WSJDhbPl</td>\n",
" <td>True</td>\n",
" <td>1.499698e+12</td>\n",
" <td>1.499699e+12</td>\n",
" <td>82</td>\n",
" <td>mate</td>\n",
" <td>black</td>\n",
" <td>10+0</td>\n",
" <td>jamboger</td>\n",
" <td>1233</td>\n",
" <td>farrukhasomiddinov</td>\n",
" <td>1196</td>\n",
" <td>d4 d6 Bf4 e5 Bg3 Nf6 e3 exd4 exd4 d5 c3 Bd6 Bd...</td>\n",
" <td>A41</td>\n",
" <td>Queen's Pawn</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20055</th>\n",
" <td>yrAas0Kj</td>\n",
" <td>True</td>\n",
" <td>1.499698e+12</td>\n",
" <td>1.499698e+12</td>\n",
" <td>35</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>jamboger</td>\n",
" <td>1219</td>\n",
" <td>schaaksmurf3</td>\n",
" <td>1286</td>\n",
" <td>d4 d5 Bf4 Nc6 e3 Nf6 c3 e6 Nf3 Be7 Bd3 O-O Nbd...</td>\n",
" <td>D00</td>\n",
" <td>Queen's Pawn Game: Mason Attack</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20056</th>\n",
" <td>b0v4tRyF</td>\n",
" <td>True</td>\n",
" <td>1.499696e+12</td>\n",
" <td>1.499697e+12</td>\n",
" <td>109</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>marcodisogno</td>\n",
" <td>1360</td>\n",
" <td>jamboger</td>\n",
" <td>1227</td>\n",
" <td>e4 d6 d4 Nf6 e5 dxe5 dxe5 Qxd1+ Kxd1 Nd5 c4 Nb...</td>\n",
" <td>B07</td>\n",
" <td>Pirc Defense</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20057</th>\n",
" <td>N8G2JHGG</td>\n",
" <td>True</td>\n",
" <td>1.499643e+12</td>\n",
" <td>1.499644e+12</td>\n",
" <td>78</td>\n",
" <td>mate</td>\n",
" <td>black</td>\n",
" <td>10+0</td>\n",
" <td>jamboger</td>\n",
" <td>1235</td>\n",
" <td>ffbob</td>\n",
" <td>1339</td>\n",
" <td>d4 d5 Bf4 Na6 e3 e6 c3 Nf6 Nf3 Bd7 Nbd2 b5 Bd3...</td>\n",
" <td>D00</td>\n",
" <td>Queen's Pawn Game: Mason Attack</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20058 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" id rated created_at last_move_at turns victory_status \\\n",
"0 TZJHLljE False 1.504210e+12 1.504210e+12 13 outoftime \n",
"1 l1NXvwaE True 1.504130e+12 1.504130e+12 16 resign \n",
"2 mIICvQHh True 1.504130e+12 1.504130e+12 61 mate \n",
"3 kWKvrqYL True 1.504110e+12 1.504110e+12 61 mate \n",
"4 9tXo1AUZ True 1.504030e+12 1.504030e+12 95 mate \n",
"... ... ... ... ... ... ... \n",
"20053 EfqH7VVH True 1.499791e+12 1.499791e+12 24 resign \n",
"20054 WSJDhbPl True 1.499698e+12 1.499699e+12 82 mate \n",
"20055 yrAas0Kj True 1.499698e+12 1.499698e+12 35 mate \n",
"20056 b0v4tRyF True 1.499696e+12 1.499697e+12 109 resign \n",
"20057 N8G2JHGG True 1.499643e+12 1.499644e+12 78 mate \n",
"\n",
" winner increment_code white_id white_rating black_id \\\n",
"0 white 15+2 bourgris 1500 a-00 \n",
"1 black 5+10 a-00 1322 skinnerua \n",
"2 white 5+10 ischia 1496 a-00 \n",
"3 white 20+0 daniamurashov 1439 adivanov2009 \n",
"4 white 30+3 nik221107 1523 adivanov2009 \n",
"... ... ... ... ... ... \n",
"20053 white 10+10 belcolt 1691 jamboger \n",
"20054 black 10+0 jamboger 1233 farrukhasomiddinov \n",
"20055 white 10+0 jamboger 1219 schaaksmurf3 \n",
"20056 white 10+0 marcodisogno 1360 jamboger \n",
"20057 black 10+0 jamboger 1235 ffbob \n",
"\n",
" black_rating moves \\\n",
"0 1191 d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5... \n",
"1 1261 d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6... \n",
"2 1500 e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc... \n",
"3 1454 d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O... \n",
"4 1469 e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N... \n",
"... ... ... \n",
"20053 1220 d4 f5 e3 e6 Nf3 Nf6 Nc3 b6 Be2 Bb7 O-O Be7 Ne5... \n",
"20054 1196 d4 d6 Bf4 e5 Bg3 Nf6 e3 exd4 exd4 d5 c3 Bd6 Bd... \n",
"20055 1286 d4 d5 Bf4 Nc6 e3 Nf6 c3 e6 Nf3 Be7 Bd3 O-O Nbd... \n",
"20056 1227 e4 d6 d4 Nf6 e5 dxe5 dxe5 Qxd1+ Kxd1 Nd5 c4 Nb... \n",
"20057 1339 d4 d5 Bf4 Na6 e3 e6 c3 Nf6 Nf3 Bd7 Nbd2 b5 Bd3... \n",
"\n",
" opening_eco opening_name opening_ply \n",
"0 D10 Slav Defense: Exchange Variation 5 \n",
"1 B00 Nimzowitsch Defense: Kennedy Variation 4 \n",
"2 C20 King's Pawn Game: Leonardis Variation 3 \n",
"3 D02 Queen's Pawn Game: Zukertort Variation 3 \n",
"4 C41 Philidor Defense 5 \n",
"... ... ... ... \n",
"20053 A80 Dutch Defense 2 \n",
"20054 A41 Queen's Pawn 2 \n",
"20055 D00 Queen's Pawn Game: Mason Attack 3 \n",
"20056 B07 Pirc Defense 4 \n",
"20057 D00 Queen's Pawn Game: Mason Attack 3 \n",
"\n",
"[20058 rows x 16 columns]"
]
},
2021-05-15 18:18:35 +02:00
"execution_count": 50,
2021-03-21 18:41:56 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"chess=pd.read_csv('games.csv')\n",
"chess"
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 51,
2021-03-21 19:12:28 +01:00
"id": "pointed-grass",
2021-03-21 18:41:56 +01:00
"metadata": {
2021-03-21 19:12:28 +01:00
"scrolled": false
2021-03-21 18:41:56 +01:00
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>rated</th>\n",
" <th>created_at</th>\n",
" <th>last_move_at</th>\n",
" <th>turns</th>\n",
" <th>victory_status</th>\n",
" <th>winner</th>\n",
" <th>increment_code</th>\n",
" <th>white_id</th>\n",
" <th>white_rating</th>\n",
" <th>black_id</th>\n",
" <th>black_rating</th>\n",
" <th>moves</th>\n",
" <th>opening_eco</th>\n",
" <th>opening_name</th>\n",
" <th>opening_ply</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>2.005800e+04</td>\n",
" <td>2.005800e+04</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>19113</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>400</td>\n",
" <td>9438</td>\n",
" <td>NaN</td>\n",
" <td>9331</td>\n",
" <td>NaN</td>\n",
" <td>18920</td>\n",
" <td>365</td>\n",
" <td>1477</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>XRuQPSzH</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>taranga</td>\n",
" <td>NaN</td>\n",
" <td>taranga</td>\n",
" <td>NaN</td>\n",
" <td>e4 e5</td>\n",
" <td>A00</td>\n",
" <td>Van't Kruijs Opening</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>5</td>\n",
" <td>16155</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>11147</td>\n",
" <td>10001</td>\n",
" <td>7721</td>\n",
" <td>72</td>\n",
" <td>NaN</td>\n",
" <td>82</td>\n",
" <td>NaN</td>\n",
" <td>27</td>\n",
" <td>1007</td>\n",
" <td>368</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.483617e+12</td>\n",
" <td>1.483618e+12</td>\n",
" <td>60.465999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1596.631868</td>\n",
" <td>NaN</td>\n",
" <td>1588.831987</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.816981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.850151e+10</td>\n",
" <td>2.850140e+10</td>\n",
" <td>33.570585</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>291.253376</td>\n",
" <td>NaN</td>\n",
" <td>291.036126</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.797152</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.376772e+12</td>\n",
" <td>1.376772e+12</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>784.000000</td>\n",
" <td>NaN</td>\n",
" <td>789.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.477548e+12</td>\n",
" <td>1.477548e+12</td>\n",
" <td>37.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1398.000000</td>\n",
" <td>NaN</td>\n",
" <td>1391.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.496010e+12</td>\n",
" <td>1.496010e+12</td>\n",
" <td>55.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1567.000000</td>\n",
" <td>NaN</td>\n",
" <td>1562.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.503170e+12</td>\n",
" <td>1.503170e+12</td>\n",
" <td>79.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1793.000000</td>\n",
" <td>NaN</td>\n",
" <td>1784.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.504493e+12</td>\n",
" <td>1.504494e+12</td>\n",
" <td>349.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2700.000000</td>\n",
" <td>NaN</td>\n",
" <td>2723.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>28.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id rated created_at last_move_at turns \\\n",
"count 20058 20058 2.005800e+04 2.005800e+04 20058.000000 \n",
"unique 19113 2 NaN NaN NaN \n",
"top XRuQPSzH True NaN NaN NaN \n",
"freq 5 16155 NaN NaN NaN \n",
"mean NaN NaN 1.483617e+12 1.483618e+12 60.465999 \n",
"std NaN NaN 2.850151e+10 2.850140e+10 33.570585 \n",
"min NaN NaN 1.376772e+12 1.376772e+12 1.000000 \n",
"25% NaN NaN 1.477548e+12 1.477548e+12 37.000000 \n",
"50% NaN NaN 1.496010e+12 1.496010e+12 55.000000 \n",
"75% NaN NaN 1.503170e+12 1.503170e+12 79.000000 \n",
"max NaN NaN 1.504493e+12 1.504494e+12 349.000000 \n",
"\n",
" victory_status winner increment_code white_id white_rating black_id \\\n",
"count 20058 20058 20058 20058 20058.000000 20058 \n",
"unique 4 3 400 9438 NaN 9331 \n",
"top resign white 10+0 taranga NaN taranga \n",
"freq 11147 10001 7721 72 NaN 82 \n",
"mean NaN NaN NaN NaN 1596.631868 NaN \n",
"std NaN NaN NaN NaN 291.253376 NaN \n",
"min NaN NaN NaN NaN 784.000000 NaN \n",
"25% NaN NaN NaN NaN 1398.000000 NaN \n",
"50% NaN NaN NaN NaN 1567.000000 NaN \n",
"75% NaN NaN NaN NaN 1793.000000 NaN \n",
"max NaN NaN NaN NaN 2700.000000 NaN \n",
"\n",
" black_rating moves opening_eco opening_name opening_ply \n",
"count 20058.000000 20058 20058 20058 20058.000000 \n",
"unique NaN 18920 365 1477 NaN \n",
"top NaN e4 e5 A00 Van't Kruijs Opening NaN \n",
"freq NaN 27 1007 368 NaN \n",
"mean 1588.831987 NaN NaN NaN 4.816981 \n",
"std 291.036126 NaN NaN NaN 2.797152 \n",
"min 789.000000 NaN NaN NaN 1.000000 \n",
"25% 1391.000000 NaN NaN NaN 3.000000 \n",
"50% 1562.000000 NaN NaN NaN 4.000000 \n",
"75% 1784.000000 NaN NaN NaN 6.000000 \n",
"max 2723.000000 NaN NaN NaN 28.000000 "
]
},
2021-05-15 18:18:35 +02:00
"execution_count": 51,
2021-03-21 18:41:56 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chess.describe(include='all')"
]
},
2021-03-21 19:12:28 +01:00
{
"cell_type": "raw",
"id": "equal-resort",
"metadata": {},
"source": [
"Usunięcie id, czasu rozpoczęcia i zakończenia partii oraz id białych i czarnych oraz listy ruchów"
]
},
2021-03-21 18:41:56 +01:00
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 52,
2021-03-21 19:12:28 +01:00
"id": "representative-lodge",
2021-05-15 18:18:35 +02:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-52-96f13fb1223a>:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" chess['winner'][i]=0\n",
"/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py:3437: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
"<ipython-input-52-96f13fb1223a>:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" chess['winner'][i]=1\n",
"<ipython-input-52-96f13fb1223a>:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" chess['winner'][i]=2\n"
]
}
],
2021-03-21 18:41:56 +01:00
"source": [
2021-05-15 18:18:35 +02:00
"cols=['turns','winner','white_rating','black_rating']\n",
2021-03-21 18:41:56 +01:00
"chess=chess[cols]\n",
2021-05-15 18:18:35 +02:00
"for i in range(0,len(chess)):\n",
" if chess['winner'][i]=='white':\n",
" chess['winner'][i]=0\n",
" elif chess['winner'][i]=='black':\n",
" chess['winner'][i]=1\n",
" else:\n",
" chess['winner'][i]=2\n",
"\n",
2021-03-21 18:41:56 +01:00
"chess.to_csv(\"chess.csv\", index=False)"
]
},
2021-03-21 19:12:28 +01:00
{
"cell_type": "raw",
"id": "fitting-investigator",
"metadata": {},
"source": [
"Średnia, minimum, maksimum, odchylenia standardowe, medianę wartości poszczególnych parametrów"
]
},
2021-03-21 18:41:56 +01:00
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 53,
2021-03-21 19:12:28 +01:00
"id": "fiscal-vacation",
2021-03-21 18:41:56 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>turns</th>\n",
" <th>winner</th>\n",
" <th>white_rating</th>\n",
" <th>black_rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>20058.000000</td>\n",
2021-05-15 18:18:35 +02:00
" <td>20058.0</td>\n",
2021-03-21 18:41:56 +01:00
" <td>20058.000000</td>\n",
" <td>20058.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>NaN</td>\n",
2021-05-15 18:18:35 +02:00
" <td>3.0</td>\n",
2021-03-21 18:41:56 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>NaN</td>\n",
2021-05-15 18:18:35 +02:00
" <td>0.0</td>\n",
2021-03-21 18:41:56 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>NaN</td>\n",
2021-05-15 18:18:35 +02:00
" <td>10001.0</td>\n",
2021-03-21 18:41:56 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>60.465999</td>\n",
" <td>NaN</td>\n",
" <td>1596.631868</td>\n",
" <td>1588.831987</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>33.570585</td>\n",
" <td>NaN</td>\n",
" <td>291.253376</td>\n",
" <td>291.036126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>784.000000</td>\n",
" <td>789.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>37.000000</td>\n",
" <td>NaN</td>\n",
" <td>1398.000000</td>\n",
" <td>1391.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>55.000000</td>\n",
" <td>NaN</td>\n",
" <td>1567.000000</td>\n",
" <td>1562.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>79.000000</td>\n",
" <td>NaN</td>\n",
" <td>1793.000000</td>\n",
" <td>1784.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>349.000000</td>\n",
" <td>NaN</td>\n",
" <td>2700.000000</td>\n",
" <td>2723.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-05-15 18:18:35 +02:00
" turns winner white_rating black_rating\n",
"count 20058.000000 20058.0 20058.000000 20058.000000\n",
"unique NaN 3.0 NaN NaN\n",
"top NaN 0.0 NaN NaN\n",
"freq NaN 10001.0 NaN NaN\n",
"mean 60.465999 NaN 1596.631868 1588.831987\n",
"std 33.570585 NaN 291.253376 291.036126\n",
"min 1.000000 NaN 784.000000 789.000000\n",
"25% 37.000000 NaN 1398.000000 1391.000000\n",
"50% 55.000000 NaN 1567.000000 1562.000000\n",
"75% 79.000000 NaN 1793.000000 1784.000000\n",
"max 349.000000 NaN 2700.000000 2723.000000"
2021-03-21 18:41:56 +01:00
]
},
2021-05-15 18:18:35 +02:00
"execution_count": 53,
2021-03-21 18:41:56 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chess.describe(include='all')"
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 54,
2021-03-21 19:12:28 +01:00
"id": "painted-shift",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-05-15 18:18:35 +02:00
"!tail -n +2 chess.csv | shuf > chess.csv.shuf"
2021-03-21 18:41:56 +01:00
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 55,
2021-03-21 19:12:28 +01:00
"id": "thick-circular",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20059 chess.csv\r\n"
]
}
],
"source": [
"!wc -l chess.csv"
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 56,
2021-03-21 19:12:28 +01:00
"id": "adverse-scope",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-21 19:12:28 +01:00
"!head -n 2006 chess.csv.shuf > test.csv"
2021-03-21 18:41:56 +01:00
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 57,
2021-03-21 19:12:28 +01:00
"id": "fiscal-contemporary",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-21 19:12:28 +01:00
"!head -n 4012 chess.csv.shuf | tail -n 2006 > dev.csv"
2021-03-21 18:41:56 +01:00
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 58,
2021-03-21 19:12:28 +01:00
"id": "possible-witness",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-21 19:12:28 +01:00
"!tail -n +4013 chess.csv.shuf > train.csv"
2021-03-21 18:41:56 +01:00
]
},
{
2021-03-21 19:12:28 +01:00
"cell_type": "raw",
"id": "reflected-alias",
2021-03-21 18:41:56 +01:00
"metadata": {},
2021-03-21 19:12:28 +01:00
"source": [
"Wielkość zbiorów"
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 59,
2021-03-21 19:12:28 +01:00
"id": "entire-mathematics",
"metadata": {
"scrolled": true
},
2021-03-21 18:41:56 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2021-05-15 18:18:35 +02:00
" 20059 chess.csv\r\n",
" 2006 dev.csv\r\n",
" 20059 games.csv\r\n",
" 2006 test.csv\r\n",
" 16046 train.csv\r\n",
" 60176 total\r\n"
2021-03-21 18:41:56 +01:00
]
}
],
"source": [
2021-03-21 19:12:28 +01:00
"!wc -l *.csv"
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": 60,
2021-03-21 19:12:28 +01:00
"id": "genetic-moscow",
"metadata": {},
"outputs": [],
"source": [
"chess_test=pd.read_csv('test.csv')\n",
"chess_train=pd.read_csv('train.csv')\n",
"chess_dev=pd.read_csv('dev.csv')"
]
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": null,
"id": "metropolitan-retention",
2021-03-21 19:12:28 +01:00
"metadata": {},
2021-05-15 18:18:35 +02:00
"outputs": [],
"source": []
2021-03-21 19:12:28 +01:00
},
{
"cell_type": "code",
2021-05-15 18:18:35 +02:00
"execution_count": null,
"id": "bound-beatles",
2021-03-21 19:12:28 +01:00
"metadata": {},
2021-05-15 18:18:35 +02:00
"outputs": [],
"source": []
2021-03-21 18:41:56 +01:00
},
{
"cell_type": "code",
"execution_count": null,
2021-05-15 18:18:35 +02:00
"id": "premium-prayer",
2021-03-21 18:41:56 +01:00
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}