This commit is contained in:
Klaudia 2021-04-25 00:51:40 +02:00
parent e4adfb04dc
commit 1ca4058a3f
3 changed files with 238 additions and 0 deletions

64
Skrypt.py Normal file
View File

@ -0,0 +1,64 @@
#!/usr/bin/env python
# coding: utf-8
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import string
from stop_words import get_stop_words
stop_words = get_stop_words('polish')
inp1 = open('dev-0/in.tsv', 'r', encoding="utf-8")
out1 = open("dev-0/out.tsv", "w")
linia1 = inp1.readlines()
inp1.close()
prep=[]
for x in linia1:
temp = ""
for y in x.split():
y = y.strip().replace(",", "")
if y not in stop_words:
temp = temp + " " + y
prep.append(temp)
vectorizer1 = TfidfVectorizer()
vectorizer1 = vectorizer1.fit_transform(prep)
predict1 = KMeans(n_clusters=25, max_iter=1000).fit_predict(vectorizer1)
print(predict1)
for x in predict1:
out1.write(str(x) + '\n')
out1.close()
inp2 = open('test-A/in.tsv', 'r', encoding="utf-8")
out2 = open("test-A/out.tsv", "w")
linia2 = inp2.readlines()
inp2.close()
prep2=[]
for x2 in linia1:
temp2 = ""
for y2 in x2.split():
y2 = y2.strip().replace(",", "")
if y2 not in stop_words:
temp2 = temp2 + " " + y2
prep2.append(temp2)
vectorizer2 = TfidfVectorizer()
vectorizer2 = vectorizer2.fit_transform(prep)
predict2 = KMeans(n_clusters=25, max_iter=1000).fit_predict(vectorizer2)
print(predict2)
for y in predict2:
out2.write(str(y) + '\n')
out2.close()

87
dev-0/out.tsv Normal file
View File

@ -0,0 +1,87 @@
7
0
1
6
22
20
3
4
8
5
4
20
0
7
21
11
7
22
12
17
4
24
9
2
18
15
10
9
24
0
3
3
3
7
21
23
7
7
10
6
8
14
3
6
14
7
23
0
16
3
18
1
3
15
13
2
0
0
3
19
3
7
3
0
20
19
3
14
7
23
22
12
14
3
0
4
18
13
6
7
5
4
5
17
3
15
0
1 7
2 0
3 1
4 6
5 22
6 20
7 3
8 4
9 8
10 5
11 4
12 20
13 0
14 7
15 21
16 11
17 7
18 22
19 12
20 17
21 4
22 24
23 9
24 2
25 18
26 15
27 10
28 9
29 24
30 0
31 3
32 3
33 3
34 7
35 21
36 23
37 7
38 7
39 10
40 6
41 8
42 14
43 3
44 6
45 14
46 7
47 23
48 0
49 16
50 3
51 18
52 1
53 3
54 15
55 13
56 2
57 0
58 0
59 3
60 19
61 3
62 7
63 3
64 0
65 20
66 19
67 3
68 14
69 7
70 23
71 22
72 12
73 14
74 3
75 0
76 4
77 18
78 13
79 6
80 7
81 5
82 4
83 5
84 17
85 3
86 15
87 0

87
test-A/out.tsv Normal file
View File

@ -0,0 +1,87 @@
9
0
2
16
10
18
13
23
18
3
6
18
0
9
12
12
21
10
14
0
11
24
20
22
23
4
15
20
20
0
7
7
7
8
12
1
8
9
15
16
18
17
13
19
17
1
1
0
18
7
23
2
7
4
5
22
0
0
7
24
1
21
13
0
18
24
7
17
8
1
10
14
17
7
0
0
23
5
19
0
3
6
3
10
7
9
0
1 9
2 0
3 2
4 16
5 10
6 18
7 13
8 23
9 18
10 3
11 6
12 18
13 0
14 9
15 12
16 12
17 21
18 10
19 14
20 0
21 11
22 24
23 20
24 22
25 23
26 4
27 15
28 20
29 20
30 0
31 7
32 7
33 7
34 8
35 12
36 1
37 8
38 9
39 15
40 16
41 18
42 17
43 13
44 19
45 17
46 1
47 1
48 0
49 18
50 7
51 23
52 2
53 7
54 4
55 5
56 22
57 0
58 0
59 7
60 24
61 1
62 21
63 13
64 0
65 18
66 24
67 7
68 17
69 8
70 1
71 10
72 14
73 17
74 7
75 0
76 0
77 23
78 5
79 19
80 0
81 3
82 6
83 3
84 10
85 7
86 9
87 0