add requirements.txt and update README
This commit is contained in:
parent
7400bc17e1
commit
0456ca00ee
14
README.md
14
README.md
@ -1,2 +1,16 @@
|
||||
# warsztaty-prefect
|
||||
|
||||
### Uruchomienie
|
||||
|
||||
|
||||
1. Pobrać pliki `train.csv` oraz `test.csv` z [https://www.kaggle.com/c/nlp-getting-started](wyzwania na Keaggle) i umieścić je w tym samym katalogu co plik `main.py`
|
||||
|
||||
2. Zainstalować potrzebne moduły
|
||||
```
|
||||
> pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Uruchomić skrypt
|
||||
```
|
||||
> python3 main.py
|
||||
```
|
||||
|
19
main.py
19
main.py
@ -1,19 +1,15 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import string
|
||||
import re
|
||||
import nltk
|
||||
import numpy as np
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import WordNetLemmatizer
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from prefect import task, Flow
|
||||
from prefect import task, Flow, context
|
||||
from pandas import DataFrame
|
||||
from typing import List
|
||||
|
||||
nltk.download('stopwords')
|
||||
nltk.download('wordnet')
|
||||
@ -76,14 +72,19 @@ def preprocessing(text):
|
||||
|
||||
@task
|
||||
def get_train_set() -> DataFrame:
|
||||
logger = context.get("logger")
|
||||
train = pd.read_csv('train.csv')
|
||||
train = train.drop(['keyword', 'location'], axis=1)
|
||||
logger.info(f"Train set: {len(train)} elements")
|
||||
return train
|
||||
|
||||
|
||||
@task
|
||||
def get_test_set() -> DataFrame:
|
||||
return pd.read_csv('test.csv')
|
||||
logger = context.get("logger")
|
||||
test = pd.read_csv('test.csv')
|
||||
logger.info(f"Test set: {len(test)} elements")
|
||||
return test
|
||||
|
||||
|
||||
@task
|
||||
@ -141,16 +142,18 @@ def train_model(X: DataFrame, Y: DataFrame) -> LogisticRegression:
|
||||
|
||||
@task
|
||||
def evaluate(model: LogisticRegression, X: DataFrame, Y: DataFrame) -> None:
|
||||
logger = context.get("logger")
|
||||
predictions = model.predict(X)
|
||||
count = 0
|
||||
for guess, answer in zip(predictions, Y):
|
||||
if guess == answer:
|
||||
count += 1
|
||||
print("> model score: ", count/len(Y))
|
||||
score = count/len(Y)
|
||||
logger.info(f"model score: {count/len(Y)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with Flow("My First Flow!") as flow:
|
||||
with Flow("My First Prefect Flow!") as flow:
|
||||
train_data = get_train_set()
|
||||
test_data = get_test_set()
|
||||
|
||||
|
7
requirements.txt
Normal file
7
requirements.txt
Normal file
@ -0,0 +1,7 @@
|
||||
nltk==3.5
|
||||
numpy==1.18.5
|
||||
pandas==1.0.4
|
||||
prefect==0.11.5
|
||||
scikit-learn==0.23.1
|
||||
scipy==1.4.1
|
||||
sklearn==0.0
|
Loading…
Reference in New Issue
Block a user