neural bigrams

This commit is contained in:
Krystian Wasilewski 2023-04-27 11:33:12 +02:00
parent c6c596193a
commit 9285bffcec
3 changed files with 17935 additions and 17935 deletions

View File

@ -82,8 +82,8 @@
"source": [ "source": [
"def get_words_from_line(line):\n", "def get_words_from_line(line):\n",
" line = clean_line(line)\n", " line = clean_line(line)\n",
" for m in re.finditer(r'[\\p{L}0-9\\*]+|\\p{P}+', line):\n", " for word in line.split():\n",
" yield m.group(0).lower()" " yield word"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff