import numpy as np import pandas as pd import joblib import matplotlib.pyplot as plt from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB, GaussianNB from sklearn import svm from sklearn.model_selection import GridSearchCV # Loading the Data - as different payloads got many special chars csv is loaded using predefined amount of cols to avoid problems with delimiter data = pd.read_csv('../datasets/out.csv', usecols=range(2), lineterminator='\n', header=None) y_data=data[0] x_data=data[1] split =(int)(0.8*data.shape[0]) x_train=x_data[:split] x_test=x_data[split:] y_train=y_data[:split] y_test=y_data[split:] # Extracting Features count_vector = CountVectorizer() extracted_features = count_vector.fit_transform(x_train) # Building and Training the Model tuned_parameters = {'kernel': ['rbf','linear'], 'gamma': [1e-3, 1e-4],'C': [1, 10, 100, 1000]} model = GridSearchCV(svm.SVC(), tuned_parameters) model.fit(extracted_features,y_train) print("Model Trained Successfully!") print("Accuracy of the model is: ",model.score(count_vector.transform(x_test),y_test)*100) joblib.dump(model, "nb.joblib") joblib.dump(count_vector, "vec.joblib")