692 lines
71 KiB
Plaintext
692 lines
71 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Building train and test sets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"scrolled": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# if you don't have some library installed try using pip or pip3 to install it - you can do it from the notebook\n",
|
||
"# example: !pip install tqdm\n",
|
||
"# also on labs it's better to use python3 kernel - ipython3 notebook\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import scipy.sparse as sparse\n",
|
||
"import time\n",
|
||
"import random\n",
|
||
"import evaluation_measures as ev\n",
|
||
"import matplotlib\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"\n",
|
||
"# df = pd.DataFrame(np.loadtxt( './Datasets/ml-1m.dat',delimiter='::'))\n",
|
||
"df=pd.read_csv('./Datasets/ml-100k/u.data',delimiter='\\t', header=None)\n",
|
||
"df.columns=['user', 'item', 'rating', 'timestamp']\n",
|
||
"\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"train, test = train_test_split(df, test_size=0.2, random_state=30)\n",
|
||
"\n",
|
||
"train.to_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, index=False)\n",
|
||
"test.to_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Interactions properties"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### How data looks like?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>user</th>\n",
|
||
" <th>item</th>\n",
|
||
" <th>rating</th>\n",
|
||
" <th>timestamp</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>196</td>\n",
|
||
" <td>242</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>881250949</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>186</td>\n",
|
||
" <td>302</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>891717742</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>22</td>\n",
|
||
" <td>377</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>878887116</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>244</td>\n",
|
||
" <td>51</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>880606923</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>166</td>\n",
|
||
" <td>346</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>886397596</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" user item rating timestamp\n",
|
||
"0 196 242 3 881250949\n",
|
||
"1 186 302 3 891717742\n",
|
||
"2 22 377 1 878887116\n",
|
||
"3 244 51 2 880606923\n",
|
||
"4 166 346 1 886397596"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df[:5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Sample properties"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"We have 943 users, 1682 items and 100000 ratings.\n",
|
||
"\n",
|
||
"Average number of ratings per user is 106.04. \n",
|
||
"\n",
|
||
"Average number of ratings per item is 59.453.\n",
|
||
"\n",
|
||
"Data sparsity (% of missing entries) is 6.3047%.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"users, items, ratings=len(set(df['user'])), len(set(df['item'])), len(df)\n",
|
||
"\n",
|
||
"print('We have {} users, {} items and {} ratings.\\n'.format(users, items, ratings))\n",
|
||
"\n",
|
||
"print('Average number of ratings per user is {}. \\n'.format(round(ratings/users,2)))\n",
|
||
"print('Average number of ratings per item is {}.\\n'.format(round(ratings/items,4)))\n",
|
||
"print('Data sparsity (% of missing entries) is {}%.'.format(round(100*ratings/(users*items),4)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "\n",
|
||
"text/plain": [
|
||
"<Figure size 1152x576 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"items_per_user=df.groupby(['user']).count()['rating']\n",
|
||
"\n",
|
||
"plt.figure(figsize=(16,8))\n",
|
||
"plt.hist(items_per_user, bins=100)\n",
|
||
"\n",
|
||
"# Let's add median\n",
|
||
"t=items_per_user.median()\n",
|
||
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
|
||
"plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
|
||
"\n",
|
||
"# Let's add also some percentiles\n",
|
||
"t=items_per_user.quantile(0.25)\n",
|
||
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
|
||
"plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
|
||
"\n",
|
||
"t=items_per_user.quantile(0.75)\n",
|
||
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
|
||
"plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
|
||
"\n",
|
||
"plt.title('Number of ratings per user', fontsize=30)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "\n",
|
||
"text/plain": [
|
||
"<Figure size 1152x576 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"items_per_user=df.groupby(['item']).count()['rating']\n",
|
||
"\n",
|
||
"plt.figure(figsize=(16,8))\n",
|
||
"plt.hist(items_per_user, bins=100)\n",
|
||
"\n",
|
||
"# Let's add median\n",
|
||
"t=items_per_user.median()\n",
|
||
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
|
||
"plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
|
||
"\n",
|
||
"# Let's add also some percentiles\n",
|
||
"t=items_per_user.quantile(0.25)\n",
|
||
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
|
||
"plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
|
||
"\n",
|
||
"t=items_per_user.quantile(0.75)\n",
|
||
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
|
||
"plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
|
||
"\n",
|
||
"plt.title('Number of ratings per item', fontsize=30)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"rating\n",
|
||
"1 0.06110\n",
|
||
"2 0.11370\n",
|
||
"3 0.27145\n",
|
||
"4 0.34174\n",
|
||
"5 0.21201\n",
|
||
"Name: user, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.groupby(['rating']).count()['user']/len(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Item attributes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n",
|
||
" encoding='latin-1')\n",
|
||
"genres=dict(zip(genres[1], genres[0]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{0: 'unknown',\n",
|
||
" 1: 'Action',\n",
|
||
" 2: 'Adventure',\n",
|
||
" 3: 'Animation',\n",
|
||
" 4: \"Children's\",\n",
|
||
" 5: 'Comedy',\n",
|
||
" 6: 'Crime',\n",
|
||
" 7: 'Documentary',\n",
|
||
" 8: 'Drama',\n",
|
||
" 9: 'Fantasy',\n",
|
||
" 10: 'Film-Noir',\n",
|
||
" 11: 'Horror',\n",
|
||
" 12: 'Musical',\n",
|
||
" 13: 'Mystery',\n",
|
||
" 14: 'Romance',\n",
|
||
" 15: 'Sci-Fi',\n",
|
||
" 16: 'Thriller',\n",
|
||
" 17: 'War',\n",
|
||
" 18: 'Western'}"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"genres"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>0</th>\n",
|
||
" <th>1</th>\n",
|
||
" <th>2</th>\n",
|
||
" <th>3</th>\n",
|
||
" <th>4</th>\n",
|
||
" <th>5</th>\n",
|
||
" <th>6</th>\n",
|
||
" <th>7</th>\n",
|
||
" <th>8</th>\n",
|
||
" <th>9</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>14</th>\n",
|
||
" <th>15</th>\n",
|
||
" <th>16</th>\n",
|
||
" <th>17</th>\n",
|
||
" <th>18</th>\n",
|
||
" <th>19</th>\n",
|
||
" <th>20</th>\n",
|
||
" <th>21</th>\n",
|
||
" <th>22</th>\n",
|
||
" <th>23</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Toy Story (1995)</td>\n",
|
||
" <td>01-Jan-1995</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>http://us.imdb.com/M/title-exact?Toy%20Story%2...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>GoldenEye (1995)</td>\n",
|
||
" <td>01-Jan-1995</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>http://us.imdb.com/M/title-exact?GoldenEye%20(...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Four Rooms (1995)</td>\n",
|
||
" <td>01-Jan-1995</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>http://us.imdb.com/M/title-exact?Four%20Rooms%...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>3 rows × 24 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" 0 1 2 3 \\\n",
|
||
"0 1 Toy Story (1995) 01-Jan-1995 NaN \n",
|
||
"1 2 GoldenEye (1995) 01-Jan-1995 NaN \n",
|
||
"2 3 Four Rooms (1995) 01-Jan-1995 NaN \n",
|
||
"\n",
|
||
" 4 5 6 7 8 9 ... \\\n",
|
||
"0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n",
|
||
"1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n",
|
||
"2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n",
|
||
"\n",
|
||
" 14 15 16 17 18 19 20 21 22 23 \n",
|
||
"0 0 0 0 0 0 0 0 0 0 0 \n",
|
||
"1 0 0 0 0 0 0 0 1 0 0 \n",
|
||
"2 0 0 0 0 0 0 0 1 0 0 \n",
|
||
"\n",
|
||
"[3 rows x 24 columns]"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"movies[:3]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for i in range(19):\n",
|
||
" movies[i+5]=movies[i+5].apply(lambda x: genres[i] if x==1 else '')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"movies['genre']=movies.iloc[:, 5:].apply(lambda x: ', '.join(x[x!='']), axis = 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"movies=movies[[0,1,'genre']]\n",
|
||
"movies.columns=['id', 'title', 'genres']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>genres</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Toy Story (1995)</td>\n",
|
||
" <td>Animation, Children's, Comedy</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>GoldenEye (1995)</td>\n",
|
||
" <td>Action, Adventure, Thriller</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Four Rooms (1995)</td>\n",
|
||
" <td>Thriller</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Get Shorty (1995)</td>\n",
|
||
" <td>Action, Comedy, Drama</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Copycat (1995)</td>\n",
|
||
" <td>Crime, Drama, Thriller</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id title genres\n",
|
||
"0 1 Toy Story (1995) Animation, Children's, Comedy\n",
|
||
"1 2 GoldenEye (1995) Action, Adventure, Thriller\n",
|
||
"2 3 Four Rooms (1995) Thriller\n",
|
||
"3 4 Get Shorty (1995) Action, Comedy, Drama\n",
|
||
"4 5 Copycat (1995) Crime, Drama, Thriller"
|
||
]
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"movies.to_csv('./Datasets/ml-100k/movies.csv', index=False)\n",
|
||
"movies[:5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Toy example"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"if not os.path.exists('./Datasets/toy-example/'):\n",
|
||
" os.mkdir('./Datasets/toy-example/')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"toy_train=pd.DataFrame([[0,0,3,0], [0,10,4,0], [0,40,5,0], [0,70,4,0],\n",
|
||
" [10,10,1,0], [10,20,2,0], [10,30,3,0],\n",
|
||
" [20,30,5,0], [20,50,3,0], [20,60,4,0]])\n",
|
||
"toy_test=pd.DataFrame([[0,60,3,0],\n",
|
||
" [10,40,5,0],\n",
|
||
" [20,0,5,0], [20,20,4,0], [20,70,2,0]])\n",
|
||
"\n",
|
||
"toy_train.to_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, index=False)\n",
|
||
"toy_test.to_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.2"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|