ium_s437622/.ipynb_checkpoints/IUM-checkpoint.ipynb
2021-03-21 18:41:56 +01:00

1121 lines
40 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "consecutive-sunset",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/pogoda/.kaggle/kaggle.json'\n",
"Downloading chess.zip to /home/pogoda/dev/IUM\n",
"100%|██████████████████████████████████████| 2.77M/2.77M [00:00<00:00, 9.89MB/s]\n",
"100%|██████████████████████████████████████| 2.77M/2.77M [00:00<00:00, 9.05MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d datasnaek/chess"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "seven-grenada",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: chess.zip\n",
" inflating: games.csv \n"
]
}
],
"source": [
"!unzip -o chess.zip "
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "requested-phenomenon",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply\r",
"\r\n",
"TZJHLljE,FALSE,1.50421E+12,1.50421E+12,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4,D10,Slav Defense: Exchange Variation,5\r",
"\r\n",
"l1NXvwaE,TRUE,1.50413E+12,1.50413E+12,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6 Qe5+ Nxe5 c4 Bb4+,B00,Nimzowitsch Defense: Kennedy Variation,4\r",
"\r\n",
"mIICvQHh,TRUE,1.50413E+12,1.50413E+12,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc6 bxc6 Ra6 Nc4 a4 c3 a3 Nxa3 Rxa3 Rxa3 c4 dxc4 d5 cxd5 Qxd5 exd5 Be6 Ra8+ Ke7 Bc5+ Kf6 Bxf8 Kg6 Bxg7 Kxg7 dxe6 Kh6 exf7 Nf6 Rxh8 Nh5 Bxh5 Kg5 Rxh7 Kf5 Qf3+ Ke6 Bg4+ Kd6 Rh6+ Kc5 Qe3+ Kb5 c4+ Kb4 Qc3+ Ka4 Bd1#,C20,King's Pawn Game: Leonardis Variation,3\r",
"\r\n",
"kWKvrqYL,TRUE,1.50411E+12,1.50411E+12,61,mate,white,20+0,daniamurashov,1439,adivanov2009,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O-O O-O-O Nb5 Nb4 Rc1 Nxa2 Ra1 Nb4 Nxa7+ Kb8 Nb5 Bxc2 Bxc7+ Kc8 Qd2 Qc6 Na7+ Kd7 Nxc6 bxc6 Bxd8 Kxd8 Qxb4 e5 Qb8+ Ke7 dxe5 Be4 Ra7+ Ke6 Qe8+ Kf5 Qxf7+ Nf6 Nh4+ Kg5 g3 Ng4 Qf4+ Kh5 Qxg4+ Kh6 Qf4+ g5 Qf6+ Bg6 Nxg6 Bg7 Qxg7#,D02,Queen's Pawn Game: Zukertort Variation,3\r",
"\r\n"
]
}
],
"source": [
"!head -n 5 games.csv "
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "adjacent-jacob",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>rated</th>\n",
" <th>created_at</th>\n",
" <th>last_move_at</th>\n",
" <th>turns</th>\n",
" <th>victory_status</th>\n",
" <th>winner</th>\n",
" <th>increment_code</th>\n",
" <th>white_id</th>\n",
" <th>white_rating</th>\n",
" <th>black_id</th>\n",
" <th>black_rating</th>\n",
" <th>moves</th>\n",
" <th>opening_eco</th>\n",
" <th>opening_name</th>\n",
" <th>opening_ply</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>TZJHLljE</td>\n",
" <td>False</td>\n",
" <td>1.504210e+12</td>\n",
" <td>1.504210e+12</td>\n",
" <td>13</td>\n",
" <td>outoftime</td>\n",
" <td>white</td>\n",
" <td>15+2</td>\n",
" <td>bourgris</td>\n",
" <td>1500</td>\n",
" <td>a-00</td>\n",
" <td>1191</td>\n",
" <td>d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...</td>\n",
" <td>D10</td>\n",
" <td>Slav Defense: Exchange Variation</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>l1NXvwaE</td>\n",
" <td>True</td>\n",
" <td>1.504130e+12</td>\n",
" <td>1.504130e+12</td>\n",
" <td>16</td>\n",
" <td>resign</td>\n",
" <td>black</td>\n",
" <td>5+10</td>\n",
" <td>a-00</td>\n",
" <td>1322</td>\n",
" <td>skinnerua</td>\n",
" <td>1261</td>\n",
" <td>d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...</td>\n",
" <td>B00</td>\n",
" <td>Nimzowitsch Defense: Kennedy Variation</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>mIICvQHh</td>\n",
" <td>True</td>\n",
" <td>1.504130e+12</td>\n",
" <td>1.504130e+12</td>\n",
" <td>61</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>5+10</td>\n",
" <td>ischia</td>\n",
" <td>1496</td>\n",
" <td>a-00</td>\n",
" <td>1500</td>\n",
" <td>e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...</td>\n",
" <td>C20</td>\n",
" <td>King's Pawn Game: Leonardis Variation</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>kWKvrqYL</td>\n",
" <td>True</td>\n",
" <td>1.504110e+12</td>\n",
" <td>1.504110e+12</td>\n",
" <td>61</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>20+0</td>\n",
" <td>daniamurashov</td>\n",
" <td>1439</td>\n",
" <td>adivanov2009</td>\n",
" <td>1454</td>\n",
" <td>d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...</td>\n",
" <td>D02</td>\n",
" <td>Queen's Pawn Game: Zukertort Variation</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9tXo1AUZ</td>\n",
" <td>True</td>\n",
" <td>1.504030e+12</td>\n",
" <td>1.504030e+12</td>\n",
" <td>95</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>30+3</td>\n",
" <td>nik221107</td>\n",
" <td>1523</td>\n",
" <td>adivanov2009</td>\n",
" <td>1469</td>\n",
" <td>e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...</td>\n",
" <td>C41</td>\n",
" <td>Philidor Defense</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20053</th>\n",
" <td>EfqH7VVH</td>\n",
" <td>True</td>\n",
" <td>1.499791e+12</td>\n",
" <td>1.499791e+12</td>\n",
" <td>24</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+10</td>\n",
" <td>belcolt</td>\n",
" <td>1691</td>\n",
" <td>jamboger</td>\n",
" <td>1220</td>\n",
" <td>d4 f5 e3 e6 Nf3 Nf6 Nc3 b6 Be2 Bb7 O-O Be7 Ne5...</td>\n",
" <td>A80</td>\n",
" <td>Dutch Defense</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20054</th>\n",
" <td>WSJDhbPl</td>\n",
" <td>True</td>\n",
" <td>1.499698e+12</td>\n",
" <td>1.499699e+12</td>\n",
" <td>82</td>\n",
" <td>mate</td>\n",
" <td>black</td>\n",
" <td>10+0</td>\n",
" <td>jamboger</td>\n",
" <td>1233</td>\n",
" <td>farrukhasomiddinov</td>\n",
" <td>1196</td>\n",
" <td>d4 d6 Bf4 e5 Bg3 Nf6 e3 exd4 exd4 d5 c3 Bd6 Bd...</td>\n",
" <td>A41</td>\n",
" <td>Queen's Pawn</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20055</th>\n",
" <td>yrAas0Kj</td>\n",
" <td>True</td>\n",
" <td>1.499698e+12</td>\n",
" <td>1.499698e+12</td>\n",
" <td>35</td>\n",
" <td>mate</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>jamboger</td>\n",
" <td>1219</td>\n",
" <td>schaaksmurf3</td>\n",
" <td>1286</td>\n",
" <td>d4 d5 Bf4 Nc6 e3 Nf6 c3 e6 Nf3 Be7 Bd3 O-O Nbd...</td>\n",
" <td>D00</td>\n",
" <td>Queen's Pawn Game: Mason Attack</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20056</th>\n",
" <td>b0v4tRyF</td>\n",
" <td>True</td>\n",
" <td>1.499696e+12</td>\n",
" <td>1.499697e+12</td>\n",
" <td>109</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>marcodisogno</td>\n",
" <td>1360</td>\n",
" <td>jamboger</td>\n",
" <td>1227</td>\n",
" <td>e4 d6 d4 Nf6 e5 dxe5 dxe5 Qxd1+ Kxd1 Nd5 c4 Nb...</td>\n",
" <td>B07</td>\n",
" <td>Pirc Defense</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20057</th>\n",
" <td>N8G2JHGG</td>\n",
" <td>True</td>\n",
" <td>1.499643e+12</td>\n",
" <td>1.499644e+12</td>\n",
" <td>78</td>\n",
" <td>mate</td>\n",
" <td>black</td>\n",
" <td>10+0</td>\n",
" <td>jamboger</td>\n",
" <td>1235</td>\n",
" <td>ffbob</td>\n",
" <td>1339</td>\n",
" <td>d4 d5 Bf4 Na6 e3 e6 c3 Nf6 Nf3 Bd7 Nbd2 b5 Bd3...</td>\n",
" <td>D00</td>\n",
" <td>Queen's Pawn Game: Mason Attack</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20058 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" id rated created_at last_move_at turns victory_status \\\n",
"0 TZJHLljE False 1.504210e+12 1.504210e+12 13 outoftime \n",
"1 l1NXvwaE True 1.504130e+12 1.504130e+12 16 resign \n",
"2 mIICvQHh True 1.504130e+12 1.504130e+12 61 mate \n",
"3 kWKvrqYL True 1.504110e+12 1.504110e+12 61 mate \n",
"4 9tXo1AUZ True 1.504030e+12 1.504030e+12 95 mate \n",
"... ... ... ... ... ... ... \n",
"20053 EfqH7VVH True 1.499791e+12 1.499791e+12 24 resign \n",
"20054 WSJDhbPl True 1.499698e+12 1.499699e+12 82 mate \n",
"20055 yrAas0Kj True 1.499698e+12 1.499698e+12 35 mate \n",
"20056 b0v4tRyF True 1.499696e+12 1.499697e+12 109 resign \n",
"20057 N8G2JHGG True 1.499643e+12 1.499644e+12 78 mate \n",
"\n",
" winner increment_code white_id white_rating black_id \\\n",
"0 white 15+2 bourgris 1500 a-00 \n",
"1 black 5+10 a-00 1322 skinnerua \n",
"2 white 5+10 ischia 1496 a-00 \n",
"3 white 20+0 daniamurashov 1439 adivanov2009 \n",
"4 white 30+3 nik221107 1523 adivanov2009 \n",
"... ... ... ... ... ... \n",
"20053 white 10+10 belcolt 1691 jamboger \n",
"20054 black 10+0 jamboger 1233 farrukhasomiddinov \n",
"20055 white 10+0 jamboger 1219 schaaksmurf3 \n",
"20056 white 10+0 marcodisogno 1360 jamboger \n",
"20057 black 10+0 jamboger 1235 ffbob \n",
"\n",
" black_rating moves \\\n",
"0 1191 d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5... \n",
"1 1261 d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6... \n",
"2 1500 e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc... \n",
"3 1454 d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O... \n",
"4 1469 e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N... \n",
"... ... ... \n",
"20053 1220 d4 f5 e3 e6 Nf3 Nf6 Nc3 b6 Be2 Bb7 O-O Be7 Ne5... \n",
"20054 1196 d4 d6 Bf4 e5 Bg3 Nf6 e3 exd4 exd4 d5 c3 Bd6 Bd... \n",
"20055 1286 d4 d5 Bf4 Nc6 e3 Nf6 c3 e6 Nf3 Be7 Bd3 O-O Nbd... \n",
"20056 1227 e4 d6 d4 Nf6 e5 dxe5 dxe5 Qxd1+ Kxd1 Nd5 c4 Nb... \n",
"20057 1339 d4 d5 Bf4 Na6 e3 e6 c3 Nf6 Nf3 Bd7 Nbd2 b5 Bd3... \n",
"\n",
" opening_eco opening_name opening_ply \n",
"0 D10 Slav Defense: Exchange Variation 5 \n",
"1 B00 Nimzowitsch Defense: Kennedy Variation 4 \n",
"2 C20 King's Pawn Game: Leonardis Variation 3 \n",
"3 D02 Queen's Pawn Game: Zukertort Variation 3 \n",
"4 C41 Philidor Defense 5 \n",
"... ... ... ... \n",
"20053 A80 Dutch Defense 2 \n",
"20054 A41 Queen's Pawn 2 \n",
"20055 D00 Queen's Pawn Game: Mason Attack 3 \n",
"20056 B07 Pirc Defense 4 \n",
"20057 D00 Queen's Pawn Game: Mason Attack 3 \n",
"\n",
"[20058 rows x 16 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"chess=pd.read_csv('games.csv')\n",
"chess"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "elegant-pursuit",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>rated</th>\n",
" <th>created_at</th>\n",
" <th>last_move_at</th>\n",
" <th>turns</th>\n",
" <th>victory_status</th>\n",
" <th>winner</th>\n",
" <th>increment_code</th>\n",
" <th>white_id</th>\n",
" <th>white_rating</th>\n",
" <th>black_id</th>\n",
" <th>black_rating</th>\n",
" <th>moves</th>\n",
" <th>opening_eco</th>\n",
" <th>opening_name</th>\n",
" <th>opening_ply</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>2.005800e+04</td>\n",
" <td>2.005800e+04</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>19113</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>400</td>\n",
" <td>9438</td>\n",
" <td>NaN</td>\n",
" <td>9331</td>\n",
" <td>NaN</td>\n",
" <td>18920</td>\n",
" <td>365</td>\n",
" <td>1477</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>XRuQPSzH</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>taranga</td>\n",
" <td>NaN</td>\n",
" <td>taranga</td>\n",
" <td>NaN</td>\n",
" <td>e4 e5</td>\n",
" <td>A00</td>\n",
" <td>Van't Kruijs Opening</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>5</td>\n",
" <td>16155</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>11147</td>\n",
" <td>10001</td>\n",
" <td>7721</td>\n",
" <td>72</td>\n",
" <td>NaN</td>\n",
" <td>82</td>\n",
" <td>NaN</td>\n",
" <td>27</td>\n",
" <td>1007</td>\n",
" <td>368</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.483617e+12</td>\n",
" <td>1.483618e+12</td>\n",
" <td>60.465999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1596.631868</td>\n",
" <td>NaN</td>\n",
" <td>1588.831987</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.816981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.850151e+10</td>\n",
" <td>2.850140e+10</td>\n",
" <td>33.570585</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>291.253376</td>\n",
" <td>NaN</td>\n",
" <td>291.036126</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.797152</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.376772e+12</td>\n",
" <td>1.376772e+12</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>784.000000</td>\n",
" <td>NaN</td>\n",
" <td>789.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.477548e+12</td>\n",
" <td>1.477548e+12</td>\n",
" <td>37.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1398.000000</td>\n",
" <td>NaN</td>\n",
" <td>1391.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.496010e+12</td>\n",
" <td>1.496010e+12</td>\n",
" <td>55.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1567.000000</td>\n",
" <td>NaN</td>\n",
" <td>1562.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.503170e+12</td>\n",
" <td>1.503170e+12</td>\n",
" <td>79.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1793.000000</td>\n",
" <td>NaN</td>\n",
" <td>1784.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.504493e+12</td>\n",
" <td>1.504494e+12</td>\n",
" <td>349.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2700.000000</td>\n",
" <td>NaN</td>\n",
" <td>2723.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>28.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id rated created_at last_move_at turns \\\n",
"count 20058 20058 2.005800e+04 2.005800e+04 20058.000000 \n",
"unique 19113 2 NaN NaN NaN \n",
"top XRuQPSzH True NaN NaN NaN \n",
"freq 5 16155 NaN NaN NaN \n",
"mean NaN NaN 1.483617e+12 1.483618e+12 60.465999 \n",
"std NaN NaN 2.850151e+10 2.850140e+10 33.570585 \n",
"min NaN NaN 1.376772e+12 1.376772e+12 1.000000 \n",
"25% NaN NaN 1.477548e+12 1.477548e+12 37.000000 \n",
"50% NaN NaN 1.496010e+12 1.496010e+12 55.000000 \n",
"75% NaN NaN 1.503170e+12 1.503170e+12 79.000000 \n",
"max NaN NaN 1.504493e+12 1.504494e+12 349.000000 \n",
"\n",
" victory_status winner increment_code white_id white_rating black_id \\\n",
"count 20058 20058 20058 20058 20058.000000 20058 \n",
"unique 4 3 400 9438 NaN 9331 \n",
"top resign white 10+0 taranga NaN taranga \n",
"freq 11147 10001 7721 72 NaN 82 \n",
"mean NaN NaN NaN NaN 1596.631868 NaN \n",
"std NaN NaN NaN NaN 291.253376 NaN \n",
"min NaN NaN NaN NaN 784.000000 NaN \n",
"25% NaN NaN NaN NaN 1398.000000 NaN \n",
"50% NaN NaN NaN NaN 1567.000000 NaN \n",
"75% NaN NaN NaN NaN 1793.000000 NaN \n",
"max NaN NaN NaN NaN 2700.000000 NaN \n",
"\n",
" black_rating moves opening_eco opening_name opening_ply \n",
"count 20058.000000 20058 20058 20058 20058.000000 \n",
"unique NaN 18920 365 1477 NaN \n",
"top NaN e4 e5 A00 Van't Kruijs Opening NaN \n",
"freq NaN 27 1007 368 NaN \n",
"mean 1588.831987 NaN NaN NaN 4.816981 \n",
"std 291.036126 NaN NaN NaN 2.797152 \n",
"min 789.000000 NaN NaN NaN 1.000000 \n",
"25% 1391.000000 NaN NaN NaN 3.000000 \n",
"50% 1562.000000 NaN NaN NaN 4.000000 \n",
"75% 1784.000000 NaN NaN NaN 6.000000 \n",
"max 2723.000000 NaN NaN NaN 28.000000 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chess.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "coral-actress",
"metadata": {},
"outputs": [],
"source": [
"cols=['id','rated','turns','victory_status','winner','increment_code','white_id','white_rating','black_id','black_rating','moves','opening_eco','opening_name','opening_ply']\n",
"chess=chess[cols]\n",
"chess.to_csv(\"chess.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "prostate-lindsay",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>rated</th>\n",
" <th>turns</th>\n",
" <th>victory_status</th>\n",
" <th>winner</th>\n",
" <th>increment_code</th>\n",
" <th>white_id</th>\n",
" <th>white_rating</th>\n",
" <th>black_id</th>\n",
" <th>black_rating</th>\n",
" <th>moves</th>\n",
" <th>opening_eco</th>\n",
" <th>opening_name</th>\n",
" <th>opening_ply</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058</td>\n",
" <td>20058.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>19113</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>400</td>\n",
" <td>9438</td>\n",
" <td>NaN</td>\n",
" <td>9331</td>\n",
" <td>NaN</td>\n",
" <td>18920</td>\n",
" <td>365</td>\n",
" <td>1477</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>XRuQPSzH</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>resign</td>\n",
" <td>white</td>\n",
" <td>10+0</td>\n",
" <td>taranga</td>\n",
" <td>NaN</td>\n",
" <td>taranga</td>\n",
" <td>NaN</td>\n",
" <td>e4 e5</td>\n",
" <td>A00</td>\n",
" <td>Van't Kruijs Opening</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>5</td>\n",
" <td>16155</td>\n",
" <td>NaN</td>\n",
" <td>11147</td>\n",
" <td>10001</td>\n",
" <td>7721</td>\n",
" <td>72</td>\n",
" <td>NaN</td>\n",
" <td>82</td>\n",
" <td>NaN</td>\n",
" <td>27</td>\n",
" <td>1007</td>\n",
" <td>368</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>60.465999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1596.631868</td>\n",
" <td>NaN</td>\n",
" <td>1588.831987</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.816981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>33.570585</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>291.253376</td>\n",
" <td>NaN</td>\n",
" <td>291.036126</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.797152</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>784.000000</td>\n",
" <td>NaN</td>\n",
" <td>789.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>37.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1398.000000</td>\n",
" <td>NaN</td>\n",
" <td>1391.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>55.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1567.000000</td>\n",
" <td>NaN</td>\n",
" <td>1562.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>79.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1793.000000</td>\n",
" <td>NaN</td>\n",
" <td>1784.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>349.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2700.000000</td>\n",
" <td>NaN</td>\n",
" <td>2723.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>28.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id rated turns victory_status winner increment_code \\\n",
"count 20058 20058 20058.000000 20058 20058 20058 \n",
"unique 19113 2 NaN 4 3 400 \n",
"top XRuQPSzH True NaN resign white 10+0 \n",
"freq 5 16155 NaN 11147 10001 7721 \n",
"mean NaN NaN 60.465999 NaN NaN NaN \n",
"std NaN NaN 33.570585 NaN NaN NaN \n",
"min NaN NaN 1.000000 NaN NaN NaN \n",
"25% NaN NaN 37.000000 NaN NaN NaN \n",
"50% NaN NaN 55.000000 NaN NaN NaN \n",
"75% NaN NaN 79.000000 NaN NaN NaN \n",
"max NaN NaN 349.000000 NaN NaN NaN \n",
"\n",
" white_id white_rating black_id black_rating moves opening_eco \\\n",
"count 20058 20058.000000 20058 20058.000000 20058 20058 \n",
"unique 9438 NaN 9331 NaN 18920 365 \n",
"top taranga NaN taranga NaN e4 e5 A00 \n",
"freq 72 NaN 82 NaN 27 1007 \n",
"mean NaN 1596.631868 NaN 1588.831987 NaN NaN \n",
"std NaN 291.253376 NaN 291.036126 NaN NaN \n",
"min NaN 784.000000 NaN 789.000000 NaN NaN \n",
"25% NaN 1398.000000 NaN 1391.000000 NaN NaN \n",
"50% NaN 1567.000000 NaN 1562.000000 NaN NaN \n",
"75% NaN 1793.000000 NaN 1784.000000 NaN NaN \n",
"max NaN 2700.000000 NaN 2723.000000 NaN NaN \n",
"\n",
" opening_name opening_ply \n",
"count 20058 20058.000000 \n",
"unique 1477 NaN \n",
"top Van't Kruijs Opening NaN \n",
"freq 368 NaN \n",
"mean NaN 4.816981 \n",
"std NaN 2.797152 \n",
"min NaN 1.000000 \n",
"25% NaN 3.000000 \n",
"50% NaN 4.000000 \n",
"75% NaN 6.000000 \n",
"max NaN 28.000000 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chess.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "periodic-behavior",
"metadata": {},
"outputs": [],
"source": [
"!head -n -1 games.csv | shuf > chess.csv.shuf"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "focal-embassy",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20059 chess.csv\r\n"
]
}
],
"source": [
"!wc -l chess.csv"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "together-velvet",
"metadata": {},
"outputs": [],
"source": [
"!head -n 2006 chess.csv.shuf > chess.csv.test"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "passive-moment",
"metadata": {},
"outputs": [],
"source": [
"!head -n 4012 chess.csv.shuf | tail -n 2006 > chess.csv.dev"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "clean-spanish",
"metadata": {},
"outputs": [],
"source": [
"!tail -n +4013 chess.csv.shuf > chess.csv.train"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "convenient-singles",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 20059 chess.csv\r\n",
" 2006 chess.csv.dev\r\n",
" 20058 chess.csv.shuf\r\n",
" 2006 chess.csv.test\r\n",
" 16046 chess.csv.train\r\n",
" 60175 total\r\n"
]
}
],
"source": [
"!wc -l chess.csv*"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "spare-reporter",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}