import json
from import json_normalize
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Arc, Rectangle, ConnectionPatch
from matplotlib.offsetbox import  OffsetImage
#import squarify
from functools import reduce
import os
path = """C:\\\\Users\\\\Koushik\\\\Downloads\\\\open-data-master\\\\open-data-master\\\\data\\\\my_events\\\\"""
Xg_req = pd.DataFrame(data=None)
for filename in (os.listdir(path)):
    with open("%s" % path + filename,encoding="utf8") as data_file:    
        data = json.load(data_file)
        df = pd.DataFrame(data=None)
        df = json_normalize(data, sep = "_")
        df =  df[(df['type_name'] == "Shot")]
        df = df.loc[:,['location','shot_body_part_id','shot_end_location','shot_one_on_one','shot_technique_id','shot_type_id','under_pressure','shot_outcome_id']]
    Xg_req = Xg_req.append(df,ignore_index=True,sort=False)
        #df.drop(df.index, inplace=True)
Xg_req[['shot_X_axis','shot_Y_axis']] = pd.DataFrame(Xg_req.location.values.tolist(), index= Xg_req.location.index) # has the x,y coordinates of shot loc
Xg_req[['shot_end_X_axis','shot_end_Y_axis','shot_end_Z_axis']] = pd.DataFrame(Xg_req.shot_end_location.values.tolist(), index= Xg_req.location.index)
Xg_req[['shot_end_Z_axis']] = pd.DataFrame(Xg_req.shot_end_Z_axis.fillna(0),index= Xg_req.location.index)
Xg_req[['shot_one_on_one']] = pd.DataFrame(Xg_req.shot_one_on_one.fillna(False),index= Xg_req.location.index)
Xg_req[['under_pressure']] = pd.DataFrame(Xg_req.under_pressure.fillna(False),index= Xg_req.location.index)

Xg_req = Xg_req.loc[:,['shot_body_part_id','shot_one_on_one','shot_technique_id','shot_type_id','under_pressure','shot_outcome_id','shot_end_X_axis','shot_end_Y_axis','shot_end_Z_axis','shot_X_axis','shot_Y_axis']]
import math # to calculate distance from shot location to goal post ends
dist_ab = np.zeros(shape=(Xg_req.shape[0]))
dist_ca = np.zeros(shape=(Xg_req.shape[0]))
x = Xg_req.shot_X_axis.values.tolist()
y = Xg_req.shot_Y_axis.values.tolist()
x = np.asarray(x)
y = np.asarray(y)
for i in range(0,x.size):
    dist_ca[i] = math.sqrt((x[i] -120 )**2 + (y[i] - 44)**2) #CA
    dist_ab[i] = math.sqrt((x[i] - 120)**2 + (y[i] - 36)**2)
# to calculate angle for shot loc to post 
angle_A = np.zeros(shape=(Xg_req.shape[0]))
for i in range(0,dist_ab.size):
    angle_A[i] = math.acos(((dist_ca[i]**2) + (dist_ab[i]**2) - 64) / (2 * dist_ca[i] *dist_ab[i])) #inverse_cos((b^2+c^2-a^2)/(2 * c * a))
    angle_A[i] = angle_A[i]*180/math.pi   #to convert angle to degrees from rad

Xg_req[['shot_angle']] = pd.DataFrame(angle_A,index= Xg_req.shot_X_axis.index)    
goal =  Xg_req[(Xg_req['shot_outcome_id'] == 97)] #shot_outcome_id 97 == GOAL
goal = goal.loc[:,['shot_X_axis','shot_Y_axis','shot_end_X_axis','shot_end_Y_axis','shot_end_Z_axis']]
Pred_X = np.zeros(shape=(Xg_req.shape[0])) # value to be predicted
for i in range(0,Pred_X.size):
    if Xg_req.shot_outcome_id[i] == 97:
        Pred_X[i] = 1
        Pred_X[i] = 0
def draw_pitch(ax):
    # focus on only half of the pitch
    #Pitch Outline & Centre Line
    Pitch = Rectangle([0,0], width = 120, height = 80, fill = False)
    #Left, Right Penalty Area and midline
    LeftPenalty = Rectangle([0,22.3], width = 14.6, height = 35.3, fill = False)
    RightPenalty = Rectangle([105.4,22.3], width = 14.6, height = 35.3, fill = False)
    midline = ConnectionPatch([60,0], [60,80], "data", "data")

    #Left, Right 6-yard Box
    LeftSixYard = Rectangle([0,32], width = 4.9, height = 16, fill = False)
    RightSixYard = Rectangle([115.1,32], width = 4.9, height = 16, fill = False)

    #Prepare Circles
    centreCircle = plt.Circle((60,40),8.1,color="black", fill = False)
    centreSpot = plt.Circle((60,40),0.71,color="black")
    #Penalty spots and Arcs around penalty boxes
    leftPenSpot = plt.Circle((9.7,40),0.71,color="black")
    rightPenSpot = plt.Circle((110.3,40),0.71,color="black")
    leftArc = Arc((9.7,40),height=16.2,width=16.2,angle=0,theta1=310,theta2=50,color="black")
    rightArc = Arc((110.3,40),height=16.2,width=16.2,angle=0,theta1=130,theta2=230,color="black")
    element = [Pitch, LeftPenalty, RightPenalty, midline, LeftSixYard, RightSixYard, centreCircle, 
               centreSpot, rightPenSpot, leftPenSpot, leftArc, rightArc]
    for i in element:
fig=plt.figure() #set up the figures
fig.set_size_inches(7, 5)
draw_pitch(ax) #overlay our different objects on the pitch
plt.ylim(-2, 82)
plt.xlim(-2, 122)
import matplotlib.pyplot as plt

plt.scatter(Xg_req.shot_end_Y_axis,Xg_req.shot_end_Z_axis, alpha=0.5)
plt.scatter(goal.shot_end_Y_axis,goal.shot_end_Z_axis, alpha=0.5,color='red')
import matplotlib.pyplot as plt

plt.ylim(0, 7)
plt.xlim(0, 80)

plt.scatter(goal.shot_end_Y_axis,goal.shot_end_Z_axis, alpha=0.5)
X = Xg_req[['shot_body_part_id', 'shot_one_on_one', 'shot_technique_id','shot_type_id', 'under_pressure','shot_end_X_axis',
       'shot_end_Y_axis', 'shot_end_Z_axis', 'shot_X_axis', 'shot_Y_axis','shot_angle']]
Y = Pred_X
#LogisticRegression model for predicting Xg
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
log_r = LogisticRegression()

X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.2 , random_state = 52),y_train)
print("Log regression test accuracy {:.3f}".format(log_r.score(X_train,y_train)))
Log regression test accuracy 0.900
prediction = log_r.predict(X_test)
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test,prediction)
[[325   5]
 [ 43   4]]
from sklearn.metrics import accuracy_score
import xgboost as xgb  #xgboost model
model = xgb.XGBClassifier(), y_train)
prediction = model.predict(X_test)
#prediction = log_r.predict(X_test)
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test,prediction)
[[325   5]
 [ 14  33]]
#!pip install --upgrade tensorflow
