TFIDF proj commit

This commit is contained in:
Bartusiak 2020-05-05 14:47:21 +02:00
parent 655535d50d
commit 830a36db52
8 changed files with 99868 additions and 99858 deletions

View File

@ -7,4 +7,7 @@
<orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="renderExternalDocumentation" value="true" />
</component>
</module>

7
.idea/other.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW" value="true" />
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -32,7 +32,7 @@ def train():
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1,1)) #Konwertuje tekst w dokumencie do macierzy tfidf , ngram_range - lb słów w sekwencji
x = tfidf.fit_transform(created_dictionary)
#PCA - principal component analysis
pca = TruncatedSVD(n_components=100) # Liniowa redukcja wymiarów , n_components - Pożądana wymiarowość danych wyjściowych
pca = TruncatedSVD(n_components=200) # Liniowa redukcja wymiarów , n_components - Pożądana wymiarowość danych wyjściowych
x_pca = pca.fit_transform(x)
l_regression = LinearRegression()
l_regression.fit(x_pca,expected_dictionary)

View File

@ -17,7 +17,7 @@ def predict():
testA_vector = tfidf.fit_transform(testA)
#print(testA_vector)
pca = TruncatedSVD(n_components=100)
pca = TruncatedSVD(n_components=200)
dev0_pca = pca.fit_transform(dev0_vector)
testA_pca = pca.fit_transform(testA_vector)

File diff suppressed because it is too large Load Diff

Binary file not shown.