task(UM_03) Tweaks in main
This commit is contained in:
parent
78c8da15a8
commit
5484926efc
7
main.py
7
main.py
@ -3,7 +3,7 @@ from sklearn.model_selection import train_test_split
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
data = pd.read_csv('resources/data.csv', header=0, sep=',')
|
data = pd.read_csv('resources/Amazon_Consumer_Reviews.csv', header=0, sep=',')
|
||||||
|
|
||||||
columns = ['reviews.date', 'reviews.numHelpful', 'reviews.rating', 'reviews.doRecommend']
|
columns = ['reviews.date', 'reviews.numHelpful', 'reviews.rating', 'reviews.doRecommend']
|
||||||
string_columns = ['name', 'brand', 'categories', 'primaryCategories', 'keys', 'manufacturer', 'reviews.title',
|
string_columns = ['name', 'brand', 'categories', 'primaryCategories', 'keys', 'manufacturer', 'reviews.title',
|
||||||
@ -14,7 +14,8 @@ def main():
|
|||||||
for c in string_columns:
|
for c in string_columns:
|
||||||
data[c] = data[c].str.lower()
|
data[c] = data[c].str.lower()
|
||||||
|
|
||||||
# print(data.isnull().sum())
|
print("Empty rows summary:")
|
||||||
|
print(data.isnull().sum())
|
||||||
data.dropna()
|
data.dropna()
|
||||||
|
|
||||||
data.to_csv('resources/data.csv')
|
data.to_csv('resources/data.csv')
|
||||||
@ -25,7 +26,7 @@ def main():
|
|||||||
train.to_csv('resources/train.csv')
|
train.to_csv('resources/train.csv')
|
||||||
dev.to_csv('resources/dev.csv')
|
dev.to_csv('resources/dev.csv')
|
||||||
|
|
||||||
print("Mean reviews rating for each primary category: ")
|
print("\n\nMean reviews rating for each primary category: ")
|
||||||
print(data[["primaryCategories", "reviews.rating"]].groupby("primaryCategories").mean())
|
print(data[["primaryCategories", "reviews.rating"]].groupby("primaryCategories").mean())
|
||||||
|
|
||||||
print("\n\nCounted primary categories: ")
|
print("\n\nCounted primary categories: ")
|
||||||
|
5001
resources/Amazon_Consumer_Reviews.csv
Normal file
5001
resources/Amazon_Consumer_Reviews.csv
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user