remove row limits from data

This commit is contained in:
szypol 2021-09-30 17:50:44 +02:00
parent 2af8e67c62
commit 3dc4b5a074
3 changed files with 1185 additions and 1181 deletions

View File

@ -3,7 +3,6 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "5fcb7312",
"metadata": {},
"outputs": [],
"source": [
@ -19,7 +18,6 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "88ac1be8",
"metadata": {},
"outputs": [],
"source": [
@ -28,14 +26,22 @@
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4aa43416",
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py:73: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n"
]
}
],
"source": [
"#training\n",
"all_train_data_in = pd.read_csv('train/in.tsv.xz', compression='xz', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t', nrows=3000)\n",
"train_data_ex = pd.read_csv('train/expected.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t', nrows=3000)\n",
"all_train_data_in = pd.read_csv('train/in.tsv.xz', compression='xz', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"train_data_ex = pd.read_csv('train/expected.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"train_data_in = []\n",
"for value in all_train_data_in.values:\n",
" temp = \"\"\n",
@ -51,8 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"id": "15c47c24",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -77,8 +82,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"id": "822b1e29",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -118,7 +122,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.8.3"
}
},
"nbformat": 4,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff