Compare commits

..

5 Commits

Author SHA1 Message Date
Adrian Charkiewicz
6059bfb4a7 one too many row 2022-04-27 01:24:42 +02:00
Adrian Charkiewicz
8e63063f61 one columns 2022-04-27 00:57:26 +02:00
Adrian Charkiewicz
9326f20c59 one column instead of 2 2022-04-27 00:49:06 +02:00
Adrian Charkiewicz
b27e12d4d7 deletion of unnecesary run.py 2022-04-27 00:13:15 +02:00
Adrian Charkiewicz
8c8f903b11 change of dic 2022-04-26 23:55:07 +02:00
14 changed files with 428907 additions and 137526 deletions

View File

@ -1,4 +0,0 @@
<a name="2.0.0"></a>
## 2.0.0 (2020-05-22)
* Switch to probabilities as the main metric

View File

@ -1,6 +0,0 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,3 +0,0 @@
{
"notebooks": {}
}

274629
dev-0/out.tsv

File diff suppressed because it is too large Load Diff

View File

@ -1,175 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 90,
"id": "7dc5e391",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "a0825c64",
"metadata": {},
"outputs": [],
"source": [
"tsv_data = pd.read_csv('in.tsv', sep='\\t',header=None, quoting=csv.QUOTE_NONE)[0]"
]
},
{
"cell_type": "code",
"execution_count": 139,
"id": "4b9092a6",
"metadata": {},
"outputs": [],
"source": [
"expected = pd.read_csv('expected.tsv', sep='\\t',header=None)[0]"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "56c39aa1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"137314\n",
"137314\n"
]
}
],
"source": [
"print(len(expected))\n",
"print(len(tsv_data))"
]
},
{
"cell_type": "code",
"execution_count": 158,
"id": "d7b300ca",
"metadata": {},
"outputs": [],
"source": [
"male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer'}\n",
"female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta'}\n",
"male = {x[:6].lower() for x in male}\n",
"female = {x[:6].lower() for x in female}"
]
},
{
"cell_type": "code",
"execution_count": 159,
"id": "31b5864b",
"metadata": {},
"outputs": [],
"source": [
"trimmed_docs=[]\n",
"for document in tsv_data:\n",
" new_doc=[]\n",
" for word in str(document).lower().split():\n",
" new_doc.append(word[:6])\n",
" trimmed_docs.append(new_doc)"
]
},
{
"cell_type": "code",
"execution_count": 160,
"id": "c1f02d77",
"metadata": {},
"outputs": [],
"source": [
"male_or_female=[]\n",
"for doc in trimmed_docs:\n",
" male_or_female.append((len(male&set(doc)), len(female&set(doc))))"
]
},
{
"cell_type": "code",
"execution_count": 161,
"id": "6edfd944",
"metadata": {},
"outputs": [],
"source": [
"answers=[]\n",
"for i in male_or_female:\n",
" if i[0]>i[1]:\n",
" answers.append(1)\n",
" else:\n",
" answers.append(0)"
]
},
{
"cell_type": "code",
"execution_count": 162,
"id": "40369c2b",
"metadata": {},
"outputs": [],
"source": [
"result=[]\n",
"for i in range(len(answers)):\n",
" if answers[i]==expected[i]:\n",
" result.append(1)\n",
" else:\n",
" result.append(0)\n"
]
},
{
"cell_type": "code",
"execution_count": 163,
"id": "e296921c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predykcja modelu wynosi 51.007909%\n"
]
}
],
"source": [
"print(f'Predykcja modelu wynosi {sum(result)/len(result)*100:.6f}%')"
]
},
{
"cell_type": "code",
"execution_count": 167,
"id": "fee431a4",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(result)\n",
"df.to_csv('out.tsv', sep = '\\t')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -17,21 +17,17 @@ tsv_data = pd.read_csv('in.tsv', sep='\t',header=None, quoting=csv.QUOTE_NONE)[0
# In[139]:
expected = pd.read_csv('expected.tsv', sep='\t',header=None)[0]
#expected = pd.read_csv('expected.tsv', sep='\t',header=None)[0]
# In[94]:
print(len(expected))
print(len(tsv_data))
# In[158]:
male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer'}
female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta'}
male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer', 'piłka', 'metal'}
female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta', 'narzeczony', 'ślub'}
male = {x[:6].lower() for x in male}
female = {x[:6].lower() for x in female}
@ -51,41 +47,41 @@ for document in tsv_data:
male_or_female=[]
for doc in trimmed_docs:
male_or_female.append((len(male&set(doc)), len(female&set(doc))))
doc_mean = sum(map(len, trimmed_docs))/float(len(trimmed_docs))
# In[161]:
#print(doc_mean)
answers=[]
for i in male_or_female:
if i[0]>i[1]:
for i in range(len(male_or_female)):
if male_or_female[i][0]>male_or_female[i][1]:
answers.append(1)
else:
elif male_or_female[i][0]<male_or_female[i][1]:
answers.append(0)
else:
if len(trimmed_docs[i]) < doc_mean:
answers.append(0)
else:
answers.append(1)
# In[162]:
"""
result=[]
for i in range(len(answers)):
if answers[i]==expected[i]:
result.append(1)
else:
result.append(0)
"""
# In[163]:
print(f'Predykcja modelu wynosi {sum(result)/len(result)*100:.6f}%')
# In[167]:
df = pd.DataFrame(result)
df.to_csv('out.tsv', sep = '\t')
df = pd.Series(answers)
df.to_csv('out.tsv', sep = '\t', index=False, header=False)

View File

@ -0,0 +1 @@
,DESKTOP-QF2J2E3/riraa,DESKTOP-QF2J2E3,27.04.2022 01:17,file:///C:/Users/riraa/AppData/Roaming/LibreOffice/4;

1
dev-1/.~lock.out.tsv# Normal file
View File

@ -0,0 +1 @@
,DESKTOP-QF2J2E3/riraa,DESKTOP-QF2J2E3,27.04.2022 01:16,file:///C:/Users/riraa/AppData/Roaming/LibreOffice/4;

156606
dev-1/out.tsv Normal file

File diff suppressed because it is too large Load Diff

87
dev-1/run.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
# coding: utf-8
# In[90]:
import pandas as pd
import csv
# In[91]:
tsv_data = pd.read_csv('in.tsv', sep='\t',header=None, quoting=csv.QUOTE_NONE)[0]
# In[139]:
#expected = pd.read_csv('expected.tsv', sep='\t',header=None)[0]
# In[158]:
male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer', 'piłka', 'metal'}
female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta', 'narzeczony', 'ślub'}
male = {x[:6].lower() for x in male}
female = {x[:6].lower() for x in female}
# In[159]:
trimmed_docs=[]
for document in tsv_data:
new_doc=[]
for word in str(document).lower().split():
new_doc.append(word[:6])
trimmed_docs.append(new_doc)
# In[160]:
male_or_female=[]
for doc in trimmed_docs:
male_or_female.append((len(male&set(doc)), len(female&set(doc))))
doc_mean = sum(map(len, trimmed_docs))/float(len(trimmed_docs))
# In[161]:
#print(doc_mean)
answers=[]
for i in range(len(male_or_female)):
if male_or_female[i][0]>male_or_female[i][1]:
answers.append(1)
elif male_or_female[i][0]<male_or_female[i][1]:
answers.append(0)
else:
if len(trimmed_docs[i]) < doc_mean:
answers.append(0)
else:
answers.append(1)
# In[162]:
"""
result=[]
for i in range(len(answers)):
if answers[i]==expected[i]:
result.append(1)
else:
result.append(0)
"""
df = pd.Series(answers)
df.to_csv('out.tsv', sep = '\t', index=False, header=False)

87
run.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
# coding: utf-8
# In[90]:
import pandas as pd
import csv
# In[91]:
tsv_data = pd.read_csv('in.tsv', sep='\t',header=None, quoting=csv.QUOTE_NONE)[0]
# In[139]:
#expected = pd.read_csv('expected.tsv', sep='\t',header=None)[0]
# In[158]:
male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer', 'piłka', 'metal'}
female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta', 'narzeczony', 'ślub'}
male = {x[:6].lower() for x in male}
female = {x[:6].lower() for x in female}
# In[159]:
trimmed_docs=[]
for document in tsv_data:
new_doc=[]
for word in str(document).lower().split():
new_doc.append(word[:6])
trimmed_docs.append(new_doc)
# In[160]:
male_or_female=[]
for doc in trimmed_docs:
male_or_female.append((len(male&set(doc)), len(female&set(doc))))
doc_mean = sum(map(len, trimmed_docs))/float(len(trimmed_docs))
# In[161]:
#print(doc_mean)
answers=[]
for i in range(len(male_or_female)):
if male_or_female[i][0]>male_or_female[i][1]:
answers.append(1)
elif male_or_female[i][0]<male_or_female[i][1]:
answers.append(0)
else:
if len(trimmed_docs[i]) < doc_mean:
answers.append(0)
else:
answers.append(1)
# In[162]:
"""
result=[]
for i in range(len(answers)):
if answers[i]==expected[i]:
result.append(1)
else:
result.append(0)
"""
df = pd.Series(answers)
df.to_csv('out.tsv', sep = '\t', index=False, header=False)

134618
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

87
test-A/run.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
# coding: utf-8
# In[90]:
import pandas as pd
import csv
# In[91]:
tsv_data = pd.read_csv('in.tsv', sep='\t',header=None, quoting=csv.QUOTE_NONE)[0]
# In[139]:
#expected = pd.read_csv('expected.tsv', sep='\t',header=None)[0]
# In[158]:
male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer', 'piłka', 'metal'}
female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta', 'narzeczony', 'ślub'}
male = {x[:6].lower() for x in male}
female = {x[:6].lower() for x in female}
# In[159]:
trimmed_docs=[]
for document in tsv_data:
new_doc=[]
for word in str(document).lower().split():
new_doc.append(word[:6])
trimmed_docs.append(new_doc)
# In[160]:
male_or_female=[]
for doc in trimmed_docs:
male_or_female.append((len(male&set(doc)), len(female&set(doc))))
doc_mean = sum(map(len, trimmed_docs))/float(len(trimmed_docs))
# In[161]:
#print(doc_mean)
answers=[]
for i in range(len(male_or_female)):
if male_or_female[i][0]>male_or_female[i][1]:
answers.append(1)
elif male_or_female[i][0]<male_or_female[i][1]:
answers.append(0)
else:
if len(trimmed_docs[i]) < doc_mean:
answers.append(0)
else:
answers.append(1)
# In[162]:
"""
result=[]
for i in range(len(answers)):
if answers[i]==expected[i]:
result.append(1)
else:
result.append(0)
"""
df = pd.Series(answers)
df.to_csv('out.tsv', sep = '\t', index=False, header=False)

87
train/run.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
# coding: utf-8
# In[90]:
import pandas as pd
import csv
# In[91]:
tsv_data = pd.read_csv('in.tsv', sep='\t',header=None, quoting=csv.QUOTE_NONE)[0]
# In[139]:
#expected = pd.read_csv('expected.tsv', sep='\t',header=None)[0]
# In[158]:
male={'silnik', 'windows', 'gb', 'mb', 'mecz', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'system', 'serwer', 'piłka', 'metal'}
female={'ciąża', 'miesiączki', 'ciasto', 'ciąże', 'zadowolona', 'antykoncepcyjne', 'ginekologia', 'tabletki', 'porodzie', 'mąż', 'krwawienie', 'ciasta', 'narzeczony', 'ślub'}
male = {x[:6].lower() for x in male}
female = {x[:6].lower() for x in female}
# In[159]:
trimmed_docs=[]
for document in tsv_data:
new_doc=[]
for word in str(document).lower().split():
new_doc.append(word[:6])
trimmed_docs.append(new_doc)
# In[160]:
male_or_female=[]
for doc in trimmed_docs:
male_or_female.append((len(male&set(doc)), len(female&set(doc))))
doc_mean = sum(map(len, trimmed_docs))/float(len(trimmed_docs))
# In[161]:
#print(doc_mean)
answers=[]
for i in range(len(male_or_female)):
if male_or_female[i][0]>male_or_female[i][1]:
answers.append(1)
elif male_or_female[i][0]<male_or_female[i][1]:
answers.append(0)
else:
if len(trimmed_docs[i]) < doc_mean:
answers.append(0)
else:
answers.append(1)
# In[162]:
"""
result=[]
for i in range(len(answers)):
if answers[i]==expected[i]:
result.append(1)
else:
result.append(0)
"""
df = pd.Series(answers)
df.to_csv('out.tsv', sep = '\t', index=False, header=False)