TFIDF proj commit
This commit is contained in:
parent
655535d50d
commit
830a36db52
@ -7,4 +7,7 @@
|
||||
<orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="renderExternalDocumentation" value="true" />
|
||||
</component>
|
||||
</module>
|
7
.idea/other.xml
Normal file
7
.idea/other.xml
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="PySciProjectComponent">
|
||||
<option name="PY_SCI_VIEW" value="true" />
|
||||
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
|
||||
</component>
|
||||
</project>
|
99856
dev-0/out.tsv
99856
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
BIN
l_regression.pkl
BIN
l_regression.pkl
Binary file not shown.
@ -32,7 +32,7 @@ def train():
|
||||
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1,1)) #Konwertuje tekst w dokumencie do macierzy tfidf , ngram_range - lb słów w sekwencji
|
||||
x = tfidf.fit_transform(created_dictionary)
|
||||
#PCA - principal component analysis
|
||||
pca = TruncatedSVD(n_components=100) # Liniowa redukcja wymiarów , n_components - Pożądana wymiarowość danych wyjściowych
|
||||
pca = TruncatedSVD(n_components=200) # Liniowa redukcja wymiarów , n_components - Pożądana wymiarowość danych wyjściowych
|
||||
x_pca = pca.fit_transform(x)
|
||||
l_regression = LinearRegression()
|
||||
l_regression.fit(x_pca,expected_dictionary)
|
||||
|
@ -17,7 +17,7 @@ def predict():
|
||||
testA_vector = tfidf.fit_transform(testA)
|
||||
|
||||
#print(testA_vector)
|
||||
pca = TruncatedSVD(n_components=100)
|
||||
pca = TruncatedSVD(n_components=200)
|
||||
|
||||
dev0_pca = pca.fit_transform(dev0_vector)
|
||||
testA_pca = pca.fit_transform(testA_vector)
|
||||
|
99856
test-A/out.tsv
99856
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
BIN
tfidf_model.pkl
BIN
tfidf_model.pkl
Binary file not shown.
Loading…
Reference in New Issue
Block a user