Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

14 changed files with 0 additions and 292379 deletions

8
.idea/.gitignore vendored
View File

@ -1,8 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -1,6 +0,0 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -1,4 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
</project>

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/retroc2.iml" filepath="$PROJECT_DIR$/.idea/retroc2.iml" />
</modules>
</component>
</project>

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

51
run.py
View File

@ -1,51 +0,0 @@
import lzma
import math
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
import pandas as pd
X_train = []
Y_train = []
stop = 0
with lzma.open('train/train.tsv.xz', 'rt', encoding="utf-8") as f:
data = pd.read_csv(f, sep='\t', names=['Begin', 'End', 'Text'])
data = data[['Text', 'End']]
data = data[0:50000]
X = data['Text']
y = data['End']
model = make_pipeline(TfidfVectorizer(), LinearRegression())
model.fit(X, y)
def readFile(filename):
X_dev = []
with open(filename, 'r', encoding="utf-8") as dev_in:
for line in dev_in:
text = line.split("\t")[0].strip()
X_dev.append(text)
return X_dev
def writePred(filename, predictions):
with open(filename, "w") as out_file:
for pred in predictions:
out_file.write(str(pred) + "\n")
x = readFile('dev-0/in.tsv')
pred = model.predict(x)
writePred('dev-0/out.tsv',pred)
x = readFile('dev-1/in.tsv')
pred = model.predict(x)
writePred('dev-1/out.tsv',pred)
x = readFile('test-A/in.tsv')
pred = model.predict(x)
writePred('test-A/out.tsv',pred)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long