final touches

This commit is contained in:
wikbom 2023-06-30 14:08:09 +02:00
parent e14a69fe68
commit 3963380a6f
4 changed files with 7425 additions and 7433 deletions

View File

@ -2709,7 +2709,7 @@ and:0.11338878221348402 of:0.09016799163143611 the:0.05697801051737042 to:0.0486
The:0.18442335006040247 It:0.08975364466899911 </s>:0.07971427902420948 He:0.06090923008184781 I:0.04562043203457926 But:0.03662087921527296 In:0.03462087183437364 A:0.03320137536696113 Mr.:0.031188795005829002 There:0.030356795854886938 They:0.026558847528563127 This:0.023538138124877435 We:0.02107174062349002 When:0.01866845617419228 If:0.017528110409600464 You:0.014930477654899944 She:0.014055966750830129 And:0.013882504481669345 That:0.012702693175633209 :0.21065341192888223
The:0.2210015699275491 It:0.09633202922015899 </s>:0.058104758816525985 He:0.04770773441479557 In:0.04350799799673679 This:0.037706108281655 There:0.03502272582437218 We:0.027622810722481083 I:0.026305463398102743 They:0.025633733130409196 If:0.024716880924637495 A:0.02175767926972571 When:0.02037889776204328 And:0.01922936826171691 But:0.018614723280842866 No:0.015445581835439636 She:0.014850989942069116 As:0.014068065332500115 To:0.013960664588274734 :0.2180322170699635
of:0.3332532658016317 for:0.09141219814688935 was:0.06400509641238525 and:0.05805738184714952 at:0.04684922188698578 with:0.04451396894571793 in:0.04362115929754916 to:0.04306996705458996 is:0.03913620494571453 the:0.023684496280744663 or:0.0227236771930735 which:0.020887200291594243 that:0.01748104828604175 as:0.017207732013261753 on:0.016829166837274378 where:0.016675300994992873 are:0.012621116145861644 about:0.012288742939389483 will:0.011327520415729467 :0.06435553426342308
away:0.4650124186675302 to:0.052047859893078245 the:0.046775023185166326 with:0.04487618820003419 in:0.04160911207576728 on:0.03493778071877932 by:0.032328780579199266 into:0.03093138379799511 and:0.029550108067192733 from:0.026919230838816564 away.:0.026539547217604934 through:0.02501800850892528 out:0.023220858356804375 away,:0.02312232471530277 upon:0.021877624783487187 down:0.018977109652189666 for:0.01740393101921067 up:0.014062012154606784 them:0.01206666319056755 over:0.011725033378740696 :-0.9990009990009991
away:0.42316130098745236 to:0.04736355250270119 the:0.04256527109850135 with:0.0408373312620311 in:0.03786429198894821 on:0.03179338045408917 by:0.029419190327071323 into:0.028147559256175545 and:0.026890598341145378 from:0.024496500063323066 away.:0.024150987968020485 through:0.022766387743122 out:0.021130981104691974 away,:0.021041315490925517 upon:0.019908638552973333 down:0.01726916978349259 for:0.015837577227481702 up:0.01279643106069217 them:0.010980663503416466 over:0.01066978037465403 :0.09090909090909105
and:0.11338878221348402 of:0.09016799163143611 the:0.05697801051737042 to:0.04865093934936114 in:0.034102633095281956 The:0.027505283676094172 a:0.02062218809010432 for:0.019165883313140165 </s>:0.018017442967184074 that:0.017930190049347993 by:0.015705073898856066 with:0.015427543201442174 or:0.014697969980079764 at:0.01425839943329796 was:0.01367335277337582 is:0.013638612760441588 on:0.012988835830105475 In:0.01242169885351048 but:0.011883460798950245 :0.42877570756713607
days:0.17837730956774048 years:0.1565529923825594 feet:0.056340590290987404 miles:0.05610862688543958 days,:0.03886401356463544 (30):0.029250944503728282 or:0.029011563663700723 years,:0.02841703651601572 years.:0.02801520902404339 and:0.025016269418428067 minutes:0.02410372865867246 of:0.02234954523700669 thousand:0.02225467633536696 days.:0.02063195107786846 dollars:0.01849566593072363 cents:0.013765648766660747 five:0.012853254534667206 acres:0.0119156881134904 per:0.011691315699577367 :0.2159839698286876
same:0.034981987469035726 United:0.023065667430766986 first:0.022576981210231474 most:0.02120343946715527 said:0.01925999839891064 people:0.015997694621527073 other:0.015566848919230268 time:0.015258170895587952 State:0.014319229871176398 city:0.013845420717094813 last:0.013593655440957511 whole:0.012987246571466542 best:0.012868893348530523 great:0.012857791111561314 old:0.011285267097144074 amount:0.010983032643685626 country:0.010184319384680535 present:0.009517246197076161 state:0.009019129794347764 :0.7006279794098333

Can't render this file because it is too large.

View File

@ -8,6 +8,7 @@ params:
vocab-size: 25000
batch_s: 3200
top_k_words: 20
minimal_wildcard: 0.1
links:
-title: "repo"
url:

File diff suppressed because it is too large Load Diff

View File

@ -88,7 +88,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@ -120,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
@ -131,7 +131,7 @@
"learning_rate = 0.0001\n",
"epochs = 4\n",
"k = 20 #top k words\n",
"wildcard_minweight = 0.001"
"wildcard_minweight = 0.1"
]
},
{
@ -6101,7 +6101,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
@ -6136,14 +6136,14 @@
" for key in dic:\n",
" dic[key] = dic[key]/(probsum*(1+wildcard_minweight)) #plus, becouse it's denominator\n",
" tab = [(key, val) for key, val in dic.items()]\n",
" tab.append(('<unk>', 1-1-sum([val for val in dic.values()])))\n",
" tab.append(('<unk>', 1-sum([val for val in dic.values()])))\n",
" return tab\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 31,
"metadata": {},
"outputs": [
{
@ -6152,7 +6152,7 @@
"<All keys matched successfully>"
]
},
"execution_count": 16,
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@ -6163,18 +6163,9 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/gedin/.local/lib/python3.10/site-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
" input = module(input)\n"
]
}
],
"outputs": [],
"source": [
"\n",
"with lzma.open(test_file, 'rt') as file:\n",