Multipipeline #wip
This commit is contained in:
parent
f530b9e628
commit
a587a38012
@ -1,4 +1,4 @@
|
||||
FROM ubuntu:20.04
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3 python3-pip
|
||||
|
||||
|
14
main.py
14
main.py
@ -1,6 +1,20 @@
|
||||
import string
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
import nltk
|
||||
nltk.download('stopwords')
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
|
||||
def remove_punct(text):
|
||||
translator = str.maketrans("", "", string.punctuation)
|
||||
return text.translate(translator)
|
||||
|
||||
|
||||
stop = set(stopwords.words("english"))
|
||||
def remove_stopwords(text):
|
||||
filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
|
||||
return " ".join(filtered_words)
|
||||
|
||||
|
||||
def main():
|
||||
|
Loading…
Reference in New Issue
Block a user