ium_s437622/IUM.ipynb

52 KiB
Raw Blame History

!kaggle datasets download -d datasnaek/chess
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/pogoda/.kaggle/kaggle.json'
Downloading chess.zip to /home/pogoda/dev/IUM
100%|██████████████████████████████████████| 2.77M/2.77M [00:00<00:00, 9.89MB/s]
100%|██████████████████████████████████████| 2.77M/2.77M [00:00<00:00, 9.05MB/s]
!unzip -o chess.zip 
Archive:  chess.zip
  inflating: games.csv               
!head -n 5 games.csv 
id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply

TZJHLljE,FALSE,1.50421E+12,1.50421E+12,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4,D10,Slav Defense: Exchange Variation,5

l1NXvwaE,TRUE,1.50413E+12,1.50413E+12,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6 Qe5+ Nxe5 c4 Bb4+,B00,Nimzowitsch Defense: Kennedy Variation,4

mIICvQHh,TRUE,1.50413E+12,1.50413E+12,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc6 bxc6 Ra6 Nc4 a4 c3 a3 Nxa3 Rxa3 Rxa3 c4 dxc4 d5 cxd5 Qxd5 exd5 Be6 Ra8+ Ke7 Bc5+ Kf6 Bxf8 Kg6 Bxg7 Kxg7 dxe6 Kh6 exf7 Nf6 Rxh8 Nh5 Bxh5 Kg5 Rxh7 Kf5 Qf3+ Ke6 Bg4+ Kd6 Rh6+ Kc5 Qe3+ Kb5 c4+ Kb4 Qc3+ Ka4 Bd1#,C20,King's Pawn Game: Leonardis Variation,3

kWKvrqYL,TRUE,1.50411E+12,1.50411E+12,61,mate,white,20+0,daniamurashov,1439,adivanov2009,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O-O O-O-O Nb5 Nb4 Rc1 Nxa2 Ra1 Nb4 Nxa7+ Kb8 Nb5 Bxc2 Bxc7+ Kc8 Qd2 Qc6 Na7+ Kd7 Nxc6 bxc6 Bxd8 Kxd8 Qxb4 e5 Qb8+ Ke7 dxe5 Be4 Ra7+ Ke6 Qe8+ Kf5 Qxf7+ Nf6 Nh4+ Kg5 g3 Ng4 Qf4+ Kh5 Qxg4+ Kh6 Qf4+ g5 Qf6+ Bg6 Nxg6 Bg7 Qxg7#,D02,Queen's Pawn Game: Zukertort Variation,3

id - identyfikator gry rated - czy gra rankingowa created_at - timestamp utworzenia gry last_move_at - timestamp ostatniego ruchu turns - liczba posunięć victory_status - sposób zakończenia rozgrywki winner - wygrany gracz increment_code - czas na partię i dodawany czas white_id - id grającego białymi white_ranking - ranking grającego białymi black_id - id grającego czarnymi black_ranking - ranking grającego czarnymi moves - lista wykonanych posunięć opening_eco - kod otwarcia opening_name - nazwa otwarcia opening_ply - liczba książkowych posunięć
import pandas as pd
chess=pd.read_csv('games.csv')
chess
id rated created_at last_move_at turns victory_status winner increment_code white_id white_rating black_id black_rating moves opening_eco opening_name opening_ply
0 TZJHLljE False 1.504210e+12 1.504210e+12 13 outoftime white 15+2 bourgris 1500 a-00 1191 d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5... D10 Slav Defense: Exchange Variation 5
1 l1NXvwaE True 1.504130e+12 1.504130e+12 16 resign black 5+10 a-00 1322 skinnerua 1261 d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6... B00 Nimzowitsch Defense: Kennedy Variation 4
2 mIICvQHh True 1.504130e+12 1.504130e+12 61 mate white 5+10 ischia 1496 a-00 1500 e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc... C20 King's Pawn Game: Leonardis Variation 3
3 kWKvrqYL True 1.504110e+12 1.504110e+12 61 mate white 20+0 daniamurashov 1439 adivanov2009 1454 d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O... D02 Queen's Pawn Game: Zukertort Variation 3
4 9tXo1AUZ True 1.504030e+12 1.504030e+12 95 mate white 30+3 nik221107 1523 adivanov2009 1469 e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N... C41 Philidor Defense 5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
20053 EfqH7VVH True 1.499791e+12 1.499791e+12 24 resign white 10+10 belcolt 1691 jamboger 1220 d4 f5 e3 e6 Nf3 Nf6 Nc3 b6 Be2 Bb7 O-O Be7 Ne5... A80 Dutch Defense 2
20054 WSJDhbPl True 1.499698e+12 1.499699e+12 82 mate black 10+0 jamboger 1233 farrukhasomiddinov 1196 d4 d6 Bf4 e5 Bg3 Nf6 e3 exd4 exd4 d5 c3 Bd6 Bd... A41 Queen's Pawn 2
20055 yrAas0Kj True 1.499698e+12 1.499698e+12 35 mate white 10+0 jamboger 1219 schaaksmurf3 1286 d4 d5 Bf4 Nc6 e3 Nf6 c3 e6 Nf3 Be7 Bd3 O-O Nbd... D00 Queen's Pawn Game: Mason Attack 3
20056 b0v4tRyF True 1.499696e+12 1.499697e+12 109 resign white 10+0 marcodisogno 1360 jamboger 1227 e4 d6 d4 Nf6 e5 dxe5 dxe5 Qxd1+ Kxd1 Nd5 c4 Nb... B07 Pirc Defense 4
20057 N8G2JHGG True 1.499643e+12 1.499644e+12 78 mate black 10+0 jamboger 1235 ffbob 1339 d4 d5 Bf4 Na6 e3 e6 c3 Nf6 Nf3 Bd7 Nbd2 b5 Bd3... D00 Queen's Pawn Game: Mason Attack 3

20058 rows × 16 columns

chess.describe(include='all')
id rated created_at last_move_at turns victory_status winner increment_code white_id white_rating black_id black_rating moves opening_eco opening_name opening_ply
count 20058 20058 2.005800e+04 2.005800e+04 20058.000000 20058 20058 20058 20058 20058.000000 20058 20058.000000 20058 20058 20058 20058.000000
unique 19113 2 NaN NaN NaN 4 3 400 9438 NaN 9331 NaN 18920 365 1477 NaN
top XRuQPSzH True NaN NaN NaN resign white 10+0 taranga NaN taranga NaN e4 e5 A00 Van't Kruijs Opening NaN
freq 5 16155 NaN NaN NaN 11147 10001 7721 72 NaN 82 NaN 27 1007 368 NaN
mean NaN NaN 1.483617e+12 1.483618e+12 60.465999 NaN NaN NaN NaN 1596.631868 NaN 1588.831987 NaN NaN NaN 4.816981
std NaN NaN 2.850151e+10 2.850140e+10 33.570585 NaN NaN NaN NaN 291.253376 NaN 291.036126 NaN NaN NaN 2.797152
min NaN NaN 1.376772e+12 1.376772e+12 1.000000 NaN NaN NaN NaN 784.000000 NaN 789.000000 NaN NaN NaN 1.000000
25% NaN NaN 1.477548e+12 1.477548e+12 37.000000 NaN NaN NaN NaN 1398.000000 NaN 1391.000000 NaN NaN NaN 3.000000
50% NaN NaN 1.496010e+12 1.496010e+12 55.000000 NaN NaN NaN NaN 1567.000000 NaN 1562.000000 NaN NaN NaN 4.000000
75% NaN NaN 1.503170e+12 1.503170e+12 79.000000 NaN NaN NaN NaN 1793.000000 NaN 1784.000000 NaN NaN NaN 6.000000
max NaN NaN 1.504493e+12 1.504494e+12 349.000000 NaN NaN NaN NaN 2700.000000 NaN 2723.000000 NaN NaN NaN 28.000000
Usunięcie id, czasu rozpoczęcia i zakończenia partii oraz id białych i czarnych oraz listy ruchów
cols=['turns','victory_status','winner','increment_code','white_rating','black_rating','opening_name']
chess=chess[cols]
chess.to_csv("chess.csv", index=False)
Średnia, minimum, maksimum, odchylenia standardowe, medianę wartości poszczególnych parametrów
chess.describe(include='all')
turns victory_status winner increment_code white_rating black_rating opening_name
count 20058.000000 20058 20058 20058 20058.000000 20058.000000 20058
unique NaN 4 3 400 NaN NaN 1477
top NaN resign white 10+0 NaN NaN Van't Kruijs Opening
freq NaN 11147 10001 7721 NaN NaN 368
mean 60.465999 NaN NaN NaN 1596.631868 1588.831987 NaN
std 33.570585 NaN NaN NaN 291.253376 291.036126 NaN
min 1.000000 NaN NaN NaN 784.000000 789.000000 NaN
25% 37.000000 NaN NaN NaN 1398.000000 1391.000000 NaN
50% 55.000000 NaN NaN NaN 1567.000000 1562.000000 NaN
75% 79.000000 NaN NaN NaN 1793.000000 1784.000000 NaN
max 349.000000 NaN NaN NaN 2700.000000 2723.000000 NaN
!head -n -1 games.csv | shuf > chess.csv.shuf
!wc -l chess.csv
20059 chess.csv
!head -n 2006 chess.csv.shuf > test.csv
!head -n 4012 chess.csv.shuf | tail -n 2006 > dev.csv
!tail -n +4013 chess.csv.shuf > train.csv
Wielkość zbiorów
!wc -l *.csv
   20059 chess.csv
    2006 dev.csv
   20059 games.csv
    2006 test.csv
   16046 train.csv
   60176 total
chess_test=pd.read_csv('test.csv')
chess_train=pd.read_csv('train.csv')
chess_dev=pd.read_csv('dev.csv')
pd.value_counts(chess['victory_status']).plot.bar()
<AxesSubplot:>
pd.value_counts(chess['winner']).plot.bar()
<AxesSubplot:>