Multipipeline #wip

This commit is contained in:
sadurska@trui.pl 2021-05-16 22:03:44 +02:00
parent f530b9e628
commit a587a38012
2 changed files with 15 additions and 1 deletions

View File

@ -1,4 +1,4 @@
FROM ubuntu:20.04
FROM ubuntu:latest
RUN apt update && apt install -y python3 python3-pip

14
main.py
View File

@ -1,6 +1,20 @@
import string
import pandas as pd
from sklearn.model_selection import train_test_split
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
def remove_punct(text):
translator = str.maketrans("", "", string.punctuation)
return text.translate(translator)
stop = set(stopwords.words("english"))
def remove_stopwords(text):
filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
return " ".join(filtered_words)
def main():