38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
import joblib
|
||
|
import matplotlib.pyplot as plt
|
||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||
|
from sklearn.naive_bayes import MultinomialNB, GaussianNB
|
||
|
from sklearn import svm
|
||
|
from sklearn.model_selection import GridSearchCV
|
||
|
|
||
|
# Loading the Data - as different payloads got many special chars csv is loaded using predefined amount of cols to avoid problems with delimiter
|
||
|
data = pd.read_csv('../datasets/out.csv',
|
||
|
usecols=range(2),
|
||
|
lineterminator='\n',
|
||
|
header=None)
|
||
|
|
||
|
y_data=data[0]
|
||
|
x_data=data[1]
|
||
|
|
||
|
split =(int)(0.8*data.shape[0])
|
||
|
x_train=x_data[:split]
|
||
|
x_test=x_data[split:]
|
||
|
y_train=y_data[:split]
|
||
|
y_test=y_data[split:]
|
||
|
|
||
|
# Extracting Features
|
||
|
count_vector = CountVectorizer()
|
||
|
extracted_features = count_vector.fit_transform(x_train)
|
||
|
|
||
|
# Building and Training the Model
|
||
|
tuned_parameters = {'kernel': ['rbf','linear'], 'gamma': [1e-3, 1e-4],'C': [1, 10, 100, 1000]}
|
||
|
model = GridSearchCV(svm.SVC(), tuned_parameters)
|
||
|
model.fit(extracted_features,y_train)
|
||
|
print("Model Trained Successfully!")
|
||
|
|
||
|
print("Accuracy of the model is: ",model.score(count_vector.transform(x_test),y_test)*100)
|
||
|
|
||
|
joblib.dump(model, "nb.joblib")
|
||
|
joblib.dump(count_vector, "vec.joblib")
|