NN with on value
This commit is contained in:
parent
a8b9ffb939
commit
d36302317c
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,3 +7,4 @@ data_train.csv
|
||||
data.csv
|
||||
data_not_shuf.csv
|
||||
data_not_cutted.csv
|
||||
venv
|
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
34
.idea/inspectionProfiles/Project_Default.xml
Normal file
34
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredPackages">
|
||||
<value>
|
||||
<list size="7">
|
||||
<item index="0" class="java.lang.String" itemvalue="pl-core-news-sm" />
|
||||
<item index="1" class="java.lang.String" itemvalue="en-core-web-sm" />
|
||||
<item index="2" class="java.lang.String" itemvalue="livocat-core" />
|
||||
<item index="3" class="java.lang.String" itemvalue="tqdm" />
|
||||
<item index="4" class="java.lang.String" itemvalue="spacy" />
|
||||
<item index="5" class="java.lang.String" itemvalue="streamlit" />
|
||||
<item index="6" class="java.lang.String" itemvalue="requests" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<option name="ignoredErrors">
|
||||
<list>
|
||||
<option value="N802" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredIdentifiers">
|
||||
<list>
|
||||
<option value="translation_handler.fairseq_translation.FairseqTransferer" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
8
.idea/ium_444463.iml
Normal file
8
.idea/ium_444463.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.8 (ium_444463)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (ium_444463)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ium_444463.iml" filepath="$PROJECT_DIR$/.idea/ium_444463.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
113
main.py
Normal file
113
main.py
Normal file
@ -0,0 +1,113 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import scipy
|
||||
import torch
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
import kaggle
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from torch import nn
|
||||
from torch import optim
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if __name__ == "__main__":
|
||||
# kaggle.api.authenticate()
|
||||
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
|
||||
# unzip=True)
|
||||
|
||||
data = pd.read_csv('fake_job_postings.csv', engine='python')
|
||||
data = data.replace(np.nan, '', regex=True)
|
||||
|
||||
data_train, data_test = train_test_split(data, test_size=3000, random_state=1)
|
||||
data_dev, data_test = train_test_split(data_test, test_size=1500, random_state=1)
|
||||
|
||||
x_train = data_train["title"]
|
||||
x_dev = data_dev["title"]
|
||||
x_test = data_test["title"]
|
||||
|
||||
y_train = data_train["fraudulent"]
|
||||
y_dev = data_dev["fraudulent"]
|
||||
y_test = data_test["fraudulent"]
|
||||
|
||||
x_train = np.array(x_train)
|
||||
x_dev = np.array(x_dev)
|
||||
|
||||
y_train = np.array(y_train)
|
||||
y_dev = np.array(y_dev)
|
||||
|
||||
vectorizer = TfidfVectorizer()
|
||||
|
||||
x_train = vectorizer.fit_transform(x_train)
|
||||
x_dev = vectorizer.transform(x_dev)
|
||||
|
||||
x_train = torch.tensor(scipy.sparse.csr_matrix.todense(x_train)).float()
|
||||
x_dev = torch.tensor(scipy.sparse.csr_matrix.todense(x_dev)).float()
|
||||
|
||||
y_train = torch.tensor(y_train)
|
||||
y_dev = torch.tensor(y_dev)
|
||||
|
||||
from torch import nn
|
||||
|
||||
model = nn.Sequential(
|
||||
nn.Linear(x_train.shape[1], 64),
|
||||
nn.ReLU(),
|
||||
nn.Linear(64, data_train["title"].nunique()),
|
||||
nn.LogSoftmax(dim=1))
|
||||
|
||||
# Define the loss
|
||||
criterion = nn.NLLLoss() # Forward pass, log
|
||||
logps = model(x_train) # Calculate the loss with the logits and the labels
|
||||
loss = criterion(logps, y_train)
|
||||
loss.backward() # Optimizers need parameters to optimize and a learning rate
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.002)
|
||||
|
||||
train_losses = []
|
||||
test_losses = []
|
||||
test_accuracies = []
|
||||
|
||||
epochs = 5
|
||||
for e in range(epochs):
|
||||
optimizer.zero_grad()
|
||||
|
||||
output = model.forward(x_train)
|
||||
loss = criterion(output, y_train)
|
||||
loss.backward()
|
||||
train_loss = loss.item()
|
||||
train_losses.append(train_loss)
|
||||
|
||||
optimizer.step()
|
||||
|
||||
# Turn off gradients for validation, saves memory and computations
|
||||
with torch.no_grad():
|
||||
model.eval()
|
||||
log_ps = model(x_dev)
|
||||
test_loss = criterion(log_ps, y_dev)
|
||||
test_losses.append(test_loss)
|
||||
|
||||
ps = torch.exp(log_ps)
|
||||
top_p, top_class = ps.topk(1, dim=1)
|
||||
equals = top_class == y_dev.view(*top_class.shape)
|
||||
test_accuracy = torch.mean(equals.float())
|
||||
test_accuracies.append(test_accuracy)
|
||||
|
||||
model.train()
|
||||
|
||||
print(f"Epoch: {e + 1}/{epochs}.. ",
|
||||
f"Training Loss: {train_loss:.3f}.. ",
|
||||
f"Test Loss: {test_loss:.3f}.. ",
|
||||
f"Test Accuracy: {test_accuracy:.3f}")
|
||||
|
||||
plt.figure(figsize=(12, 5))
|
||||
ax = plt.subplot(121)
|
||||
plt.xlabel('epochs')
|
||||
plt.ylabel('negative log likelihood loss')
|
||||
plt.plot(train_losses, label='Training loss')
|
||||
plt.plot(test_losses, label='Validation loss')
|
||||
plt.legend(frameon=False)
|
||||
plt.subplot(122)
|
||||
plt.xlabel('epochs')
|
||||
plt.ylabel('test accuracy')
|
||||
plt.plot(test_accuracies)
|
||||
plt.show()
|
||||
|
||||
print('Succes')
|
@ -1,3 +1,7 @@
|
||||
pandas
|
||||
numpy
|
||||
kaggle
|
||||
torch
|
||||
matplotlib
|
||||
sklearn
|
||||
scipy
|
||||
|
0
Jenkinsfile → stare_zadania/Jenkinsfile
vendored
0
Jenkinsfile → stare_zadania/Jenkinsfile
vendored
@ -4,7 +4,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "5e2107a5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Skrypt do ściagnięcia zbiory danych\n"
|
||||
@ -14,7 +18,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "bcc889e5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -29,14 +37,14 @@
|
||||
"Requirement already satisfied: python-slugify in /home/students/s444463/.local/lib/python3.8/site-packages (from kaggle) (6.1.1)\n",
|
||||
"Requirement already satisfied: python-dateutil in /usr/lib/python3/dist-packages (from kaggle) (2.7.3)\n",
|
||||
"Requirement already satisfied: text-unidecode>=1.3 in /home/students/s444463/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
|
||||
"\u001B[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001B[0m\n",
|
||||
"Requirement already satisfied: pandas in /usr/lib/python3/dist-packages (0.25.3)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
|
||||
"\u001B[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001B[0m\n",
|
||||
"Requirement already satisfied: numpy in /usr/lib/python3/dist-packages (1.17.4)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n"
|
||||
"\u001B[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001B[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -50,7 +58,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "02a4034f",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -70,7 +82,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "5035aef0",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -88,7 +104,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "14344d2f",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -105,8 +125,8 @@
|
||||
"Requirement already satisfied: kiwisolver>=1.0.1 in /home/students/s444463/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7 in /usr/lib/python3/dist-packages (from matplotlib>=2.2->seaborn) (2.7.3)\n",
|
||||
"Requirement already satisfied: six in /usr/lib/python3/dist-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.14.0)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n"
|
||||
"\u001B[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001B[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -118,7 +138,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "0f5ebfab",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -534,7 +558,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "edbf49da",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -553,7 +581,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "e60b3f32",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -585,7 +617,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "ddb2fc38",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1001,7 +1037,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "c5ac75f5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1373,7 +1413,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "4b0e77a4",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1399,7 +1443,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "5a1d8ec7",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -1411,8 +1459,8 @@
|
||||
"Requirement already satisfied: threadpoolctl>=2.0.0 in /home/students/s444463/.local/lib/python3.8/site-packages (from scikit-learn) (3.1.0)\n",
|
||||
"Requirement already satisfied: joblib>=0.11 in /usr/lib/python3/dist-packages (from scikit-learn) (0.14.0)\n",
|
||||
"Requirement already satisfied: scipy>=1.1.0 in /usr/lib/python3/dist-packages (from scikit-learn) (1.3.3)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
|
||||
"\u001B[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n",
|
||||
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001B[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
@ -1425,7 +1473,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "50813795",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@ -1461,7 +1513,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "ea3c9f2e",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -1483,7 +1539,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "b20cc27a",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
3
stare_zadania/requirements.txt
Normal file
3
stare_zadania/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
pandas
|
||||
numpy
|
||||
kaggle
|
Loading…
Reference in New Issue
Block a user