laptop commit linear regression

2020-05-02 20:28:50 +02:00 · 2020-05-02 20:28:50 +02:00 · 468db5f757
commit 468db5f757
parent 37e5b270f2
3 changed files with 1996 additions and 1996 deletions
--- a/code_regression.py
+++ b/code_regression.py
@ -16,8 +16,8 @@ def define_vocabulary(file_to_learn_new_words):
 def tokenize_list(string_input):
    words=[]
    string=string_input.replace('\\n',' ')
-    text=re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', string)
-    text = re.sub(r'\\n+', " ", text)
+    #text=re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', string)
+    text = re.sub(r'\\n+', " ", string)
    text = re.sub(r'http\S+', " ", text)
    text = re.sub(r'\/[a-z]\/', " ", text)
    text = re.sub(r'[^a-z]', " ", text)
@ -53,7 +53,7 @@ def read_words(input_path):
    return vocabulary

 def train(vocabulary,input_train,expected_train):
-    learning_rate=0.0000000000001
+    learning_rate=0.00000001
    learning_precision=0.0000001
    words_vocabulary={}
    with open(input_train,encoding='utf-8') as input_file, open(expected_train,encoding='utf-8') as expected_file:
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/test-A/out.tsv
+++ b/test-A/out.tsv