analiza-danych-sportowych/.ipynb_checkpoints/Wizualizacja-checkpoint.ipynb
2024-01-12 17:32:16 +01:00

5.3 KiB

import pandas as pd
import re

pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 10000)

file_path = 'games.csv'

data = pd.read_csv(file_path)   
import re

def extract_square_names(moves_list):
    square_names = []

    for move in moves_list:
        # Use a regular expression to match the chess square name
        match = re.search(r'[a-h][1-8]', move)
        if match:
            square_names.append(match.group())

    return square_names
['Qd1', 'Qc2', 'Qc3', 'Qb3', 'Qb7+', 'Qxd7#']
['Qd1', 'Qxd4', 'Qd1', 'Qa4', 'Qh4', 'Qxg5', 'Qg3', 'Qxg6']
Square names from list1: ['d1', 'c2', 'c3', 'b3', 'b7', 'd7']
Square names from list2: ['d1', 'd4', 'd1', 'a4', 'h4', 'g5', 'g3', 'g6']
opening_number_of_games = data['opening_name'].value_counts()
opening_number_of_games.head(10)
# openings = ['Sicilian Defense', 'Old Benoni Defense', "Queen's Pawn Game: Mason Attack"]

old_benoni_games = data[data['opening_name'].str.contains('Old Benoni Defense', case = False)]
old_benoni_count = len(old_benoni_games)

for text in old_benoni_games.head(1)['moves']:
    moves = text.split()
    white_queen_moves = ['Qd1']
    black_queen_moves = ['Qd8']
    for idx, move in enumerate(moves):
        if move.startswith("Q"):
            if idx % 2 == 0:
                white_queen_moves.append(move)
            if idx % 2 == 1:
                black_queen_moves.append(move)
#     print(f'White queen moves: {white_queen_moves}')
#     print(f'Black queen moves: {black_queen_moves}')
    print(f'White queen moves: {extract_square_names(white_queen_moves)}')
    print(f'Black queen moves: {extract_square_names(black_queen_moves)}')
# list1 = ['Qd1', 'Qc2', 'Qc3', 'Qb3', 'Qb7+', 'Qxd7#']
# list2 = ['Qd1', 'Qxd4', 'Qd1', 'Qa4', 'Qh4', 'Qxg5', 'Qg3', 'Qxg6']
# print(white_queen_moves)
# print(list1)
['Qd1', 'Qxd4', 'Qd1', 'Qa4', 'Qh4', 'Qxg5', 'Qg3', 'Qxg6']
White queen moves: ['d1', 'd4', 'd1', 'a4', 'h4', 'g5', 'g3', 'g6']
['Qd8', 'Qe7', 'Qc7', 'Qg7', 'Qxg6']
Black queen moves: ['d8', 'e7', 'c7', 'g7', 'g6']


# Filtrowanie danych dla elo > 2200
filtered_data = data[data['white_rating'] > 2200][data['black_rating'] > 2200]

# Oblicz średnią opening_ply dla każdej unikalnej wartości w kolumnie 'opening'
average_opening_ply = filtered_data.groupby('opening_name')['opening_ply'].mean()

# Wyświetl średnie opening_ply dla każdej opening, dla elo > 2200
print(average_opening_ply)
opening_name
Alekhine Defense: Exchange Variation                 9.0
Anderssen Opening                                    1.0
Benko Gambit Accepted |  Fully Accepted Variation    9.0
Benko Gambit Declined |  Quiet Line                  7.0
Benoni Defense: Benoni-Indian Defense                4.0
                                                    ... 
Trompowsky Attack                                    3.0
Van Geet Opening: Dunst-Perrenet Gambit              5.0
Vienna Game #2                                       6.0
Vienna Game: Vienna Gambit |  Main Line              6.0
Yusupov-Rubinstein System                            5.0
Name: opening_ply, Length: 112, dtype: float64
/var/folders/lm/cbc3n48n4x94zd3vf6zbbly40000gn/T/ipykernel_2729/977498901.py:12: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  filtered_data = data[data['white_rating'] > 2200][data['black_rating'] > 2200]