remove row limits from data

This commit is contained in:
szypol 2021-09-30 17:50:44 +02:00
parent 2af8e67c62
commit 3dc4b5a074
3 changed files with 1185 additions and 1181 deletions

View File

@ -3,7 +3,6 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"id": "5fcb7312",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -19,7 +18,6 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "88ac1be8",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -28,14 +26,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 3,
"id": "4aa43416",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py:73: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n"
]
}
],
"source": [ "source": [
"#training\n", "#training\n",
"all_train_data_in = pd.read_csv('train/in.tsv.xz', compression='xz', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t', nrows=3000)\n", "all_train_data_in = pd.read_csv('train/in.tsv.xz', compression='xz', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"train_data_ex = pd.read_csv('train/expected.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t', nrows=3000)\n", "train_data_ex = pd.read_csv('train/expected.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"train_data_in = []\n", "train_data_in = []\n",
"for value in all_train_data_in.values:\n", "for value in all_train_data_in.values:\n",
" temp = \"\"\n", " temp = \"\"\n",
@ -51,8 +57,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 4,
"id": "15c47c24",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -77,8 +82,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 5,
"id": "822b1e29",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -118,7 +122,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.8" "version": "3.8.3"
} }
}, },
"nbformat": 4, "nbformat": 4,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff