4670 lines
126 KiB
Plaintext
4670 lines
126 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "spread-happiness",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%matplotlib inline\n",
|
||
|
"%load_ext autoreload\n",
|
||
|
"%autoreload 2\n",
|
||
|
"\n",
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"from IPython.display import Markdown, display, HTML\n",
|
||
|
"\n",
|
||
|
"# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n",
|
||
|
"import os\n",
|
||
|
"os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "approximate-classic",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Numpy\n",
|
||
|
"\n",
|
||
|
"For a detailed reference check out: https://numpy.org/doc/stable/reference/arrays.indexing.html."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "renewable-chase",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Creating numpy arrays"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "afraid-consortium",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Directly"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "textile-mainland",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[1. 2. 3.]\n",
|
||
|
" [4. 5. 6.]\n",
|
||
|
" [7. 8. 9.]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = np.array(\n",
|
||
|
" [[1.0, 2.0, 3.0], \n",
|
||
|
" [4.0, 5.0, 6.0], \n",
|
||
|
" [7.0, 8.0, 9.0]]\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"print(a)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "floating-junior",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### From a list"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "reasonable-mistress",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]\n",
|
||
|
"\n",
|
||
|
"[[1. 2. 3.]\n",
|
||
|
" [4. 5. 6.]\n",
|
||
|
" [7. 8. 9.]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = [[1.0, 2.0, 3.0], \n",
|
||
|
" [4.0, 5.0, 6.0], \n",
|
||
|
" [7.0, 8.0, 9.0]]\n",
|
||
|
"\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"a = np.array(a)\n",
|
||
|
"\n",
|
||
|
"print(a)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "incorrect-practitioner",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### From a list comprehension"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"id": "straight-cooling",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]\n",
|
||
|
"\n",
|
||
|
"[ 0 1 4 9 16 25 36 49 64 81]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = [i**2 for i in range(10)]\n",
|
||
|
"\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"print(np.array(a))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "suffering-myanmar",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Ready-made functions in numpy"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"id": "expensive-bowling",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"All zeros\n",
|
||
|
"[[0. 0. 0. 0.]\n",
|
||
|
" [0. 0. 0. 0.]\n",
|
||
|
" [0. 0. 0. 0.]]\n",
|
||
|
"\n",
|
||
|
"All chosen value (variant 1)\n",
|
||
|
"[[7. 7. 7. 7.]\n",
|
||
|
" [7. 7. 7. 7.]\n",
|
||
|
" [7. 7. 7. 7.]]\n",
|
||
|
"\n",
|
||
|
"All chosen value (variant 2)\n",
|
||
|
"[[7. 7. 7. 7.]\n",
|
||
|
" [7. 7. 7. 7.]\n",
|
||
|
" [7. 7. 7. 7.]]\n",
|
||
|
"\n",
|
||
|
"Random integers\n",
|
||
|
"[[7 5]\n",
|
||
|
" [9 8]\n",
|
||
|
" [6 3]]\n",
|
||
|
"\n",
|
||
|
"Random values from the normal distribution\n",
|
||
|
"[[ 3.88109518 -15.30896612]\n",
|
||
|
" [ 7.88779281 7.67458172]\n",
|
||
|
" [ -9.81026963 -6.02098263]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# All zeros\n",
|
||
|
"a = np.zeros((3, 4))\n",
|
||
|
"print(\"All zeros\")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"# All a chosen value\n",
|
||
|
"a = np.full((3, 4), 7.0)\n",
|
||
|
"print(\"All chosen value (variant 1)\")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"# or\n",
|
||
|
"\n",
|
||
|
"a = np.zeros((3, 4))\n",
|
||
|
"a[:] = 7.0\n",
|
||
|
"print(\"All chosen value (variant 2)\")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"# Random integers\n",
|
||
|
"\n",
|
||
|
"a = np.random.randint(low=0, high=10, size=(3, 2))\n",
|
||
|
"print(\"Random integers\")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"# Random values from the normal distribution (Gaussian)\n",
|
||
|
"\n",
|
||
|
"print(\"Random values from the normal distribution\")\n",
|
||
|
"a = np.random.normal(loc=0, scale=10, size=(3, 2))\n",
|
||
|
"print(a)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "aggressive-titanium",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Slicing numpy arrays"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "former-richardson",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Slicing in 1D\n",
|
||
|
"\n",
|
||
|
"To obtain only specific values from a numpy array one can use so called slicing. It has the form\n",
|
||
|
"\n",
|
||
|
"**arr[low:high:step]**\n",
|
||
|
"\n",
|
||
|
"where low is the lowest index to be retrieved, high is the lowest index not to be retrieved and step indicates that every step element will be taken."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"id": "desirable-documentary",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Original: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]\n",
|
||
|
"First 5 elements: [0, 1, 4, 9, 16]\n",
|
||
|
"Elements from index 3 to index 5: [9, 16, 25]\n",
|
||
|
"Last 3 elements (negative indexing): [49, 64, 81]\n",
|
||
|
"Printed in reverse order: [81, 64, 49, 36, 25, 16, 9, 4, 1, 0]\n",
|
||
|
"Every second element: [0, 4, 16, 36, 64]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = [i**2 for i in range(10)]\n",
|
||
|
"\n",
|
||
|
"print(\"Original: \", a)\n",
|
||
|
"print(\"First 5 elements:\", a[:5])\n",
|
||
|
"print(\"Elements from index 3 to index 5:\", a[3:6])\n",
|
||
|
"print(\"Last 3 elements (negative indexing):\", a[-3:])\n",
|
||
|
"print(\"Printed in reverse order:\", a[::-1])\n",
|
||
|
"print(\"Every second element:\", a[::2])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "micro-explosion",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Slicing in 2D\n",
|
||
|
"\n",
|
||
|
"In two dimensions it works similarly, just the slicing is separate for every dimension."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"id": "disciplinary-think",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Original: \n",
|
||
|
"[[ 0 1 2 3 4]\n",
|
||
|
" [ 5 6 7 8 9]\n",
|
||
|
" [10 11 12 13 14]\n",
|
||
|
" [15 16 17 18 19]\n",
|
||
|
" [20 21 22 23 24]]\n",
|
||
|
"\n",
|
||
|
"First 2 elements of the first 3 row:\n",
|
||
|
"[[ 0 1]\n",
|
||
|
" [ 5 6]\n",
|
||
|
" [10 11]]\n",
|
||
|
"\n",
|
||
|
"Middle 3 elements from the middle 3 rows:\n",
|
||
|
"[[ 6 7 8]\n",
|
||
|
" [11 12 13]\n",
|
||
|
" [16 17 18]]\n",
|
||
|
"\n",
|
||
|
"Bottom-right 3 by 3 submatrix (negative indexing):\n",
|
||
|
"[[12 13 14]\n",
|
||
|
" [17 18 19]\n",
|
||
|
" [22 23 24]]\n",
|
||
|
"\n",
|
||
|
"Reversed columns:\n",
|
||
|
"[[ 4 3 2 1 0]\n",
|
||
|
" [ 9 8 7 6 5]\n",
|
||
|
" [14 13 12 11 10]\n",
|
||
|
" [19 18 17 16 15]\n",
|
||
|
" [24 23 22 21 20]]\n",
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = np.array([i for i in range(25)]).reshape(5, 5)\n",
|
||
|
"\n",
|
||
|
"print(\"Original: \")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"print(\"First 2 elements of the first 3 row:\")\n",
|
||
|
"print(a[:3, :2])\n",
|
||
|
"print()\n",
|
||
|
"print(\"Middle 3 elements from the middle 3 rows:\")\n",
|
||
|
"print(a[1:4, 1:4])\n",
|
||
|
"print()\n",
|
||
|
"print(\"Bottom-right 3 by 3 submatrix (negative indexing):\")\n",
|
||
|
"print(a[-3:, -3:])\n",
|
||
|
"print()\n",
|
||
|
"print(\"Reversed columns:\")\n",
|
||
|
"print(a[:, ::-1])\n",
|
||
|
"print()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "removable-canyon",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Setting numpy array field values"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"id": "senior-serbia",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Original: \n",
|
||
|
"[[ 0 1 2 3 4]\n",
|
||
|
" [ 5 6 7 8 9]\n",
|
||
|
" [10 11 12 13 14]\n",
|
||
|
" [15 16 17 18 19]\n",
|
||
|
" [20 21 22 23 24]]\n",
|
||
|
"\n",
|
||
|
"Middle values changed to 5\n",
|
||
|
"[[ 0 1 2 3 4]\n",
|
||
|
" [ 5 5 5 5 9]\n",
|
||
|
" [10 5 5 5 14]\n",
|
||
|
" [15 5 5 5 19]\n",
|
||
|
" [20 21 22 23 24]]\n",
|
||
|
"\n",
|
||
|
"Second matrix\n",
|
||
|
"[[ 0 0 2]\n",
|
||
|
" [ 6 12 20]\n",
|
||
|
" [30 42 56]]\n",
|
||
|
"\n",
|
||
|
"Second matrix substituted into the middle of the first matrix\n",
|
||
|
"[[ 0 1 2 3 4]\n",
|
||
|
" [ 5 0 0 2 9]\n",
|
||
|
" [10 6 12 20 14]\n",
|
||
|
" [15 30 42 56 19]\n",
|
||
|
" [20 21 22 23 24]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = np.array([i for i in range(25)]).reshape(5, 5)\n",
|
||
|
"\n",
|
||
|
"print(\"Original: \")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"a[1:4, 1:4] = 5.0\n",
|
||
|
"\n",
|
||
|
"print(\"Middle values changed to 5\")\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"b = np.array([i**2 - i for i in range(9)]).reshape(3, 3)\n",
|
||
|
"\n",
|
||
|
"print(\"Second matrix\")\n",
|
||
|
"print(b)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"a[1:4, 1:4] = b\n",
|
||
|
"\n",
|
||
|
"print(\"Second matrix substituted into the middle of the first matrix\")\n",
|
||
|
"print(a)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "federal-wayne",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Operations on numpy arrays\n",
|
||
|
"\n",
|
||
|
"It is important to remember that arithmetic operations on numpy arrays are always element-wise."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"id": "southwest-biotechnology",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[ 0 1 4]\n",
|
||
|
" [ 9 16 25]\n",
|
||
|
" [36 49 64]]\n",
|
||
|
"\n",
|
||
|
"[[0. 1. 1.41421356]\n",
|
||
|
" [1.73205081 2. 2.23606798]\n",
|
||
|
" [2.44948974 2.64575131 2.82842712]]\n",
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = np.array([i**2 for i in range(9)]).reshape((3, 3))\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"b = np.array([i**0.5 for i in range(9)]).reshape((3, 3))\n",
|
||
|
"print(b)\n",
|
||
|
"print()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "intensive-gates",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Element-wise sum"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 14,
|
||
|
"id": "behavioral-safety",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[ 0. 2. 5.41421356]\n",
|
||
|
" [10.73205081 18. 27.23606798]\n",
|
||
|
" [38.44948974 51.64575131 66.82842712]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(a + b)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "occupied-trial",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Element-wise multiplication"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"id": "charming-pleasure",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[ 0. 1. 5.65685425]\n",
|
||
|
" [ 15.58845727 32. 55.90169944]\n",
|
||
|
" [ 88.18163074 129.64181424 181.01933598]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(a * b)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "efficient-league",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Matrix multiplication"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 17,
|
||
|
"id": "changing-community",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[ 11.53000978 12.58300524 13.54977648]\n",
|
||
|
" [ 88.95005649 107.14378278 119.21568782]\n",
|
||
|
" [241.63783311 303.32808391 341.49835513]]\n",
|
||
|
"\n",
|
||
|
"[[ 0. 1. 4.]\n",
|
||
|
" [ 9. 16. 25.]\n",
|
||
|
" [36. 49. 64.]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(np.matmul(a, b))\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"# Multiplication by the identity matrix (to check it works as expected)\n",
|
||
|
"id_matrix = np.array([[1.0, 0.0, 0.0], \n",
|
||
|
" [0.0, 1.0, 0.0], \n",
|
||
|
" [0.0, 0.0, 1.0]])\n",
|
||
|
"\n",
|
||
|
"print(np.matmul(id_matrix, a))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "assisted-communications",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Calculating the mean"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 22,
|
||
|
"id": "defensive-wrong",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[1 4 0 6 4]\n",
|
||
|
"\n",
|
||
|
"Mean (by sum): 3.0\n",
|
||
|
"Mean (by mean): 3.0\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = np.random.randint(low=0, high=10, size=(5))\n",
|
||
|
"\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"print(\"Mean (by sum): \", np.sum(a) / len(a))\n",
|
||
|
"print(\"Mean (by mean):\", np.mean(a))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "complex-karma",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Calculating the mean of every row"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 30,
|
||
|
"id": "correct-dietary",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[4 9 5]\n",
|
||
|
" [8 9 1]\n",
|
||
|
" [5 6 4]\n",
|
||
|
" [3 7 8]\n",
|
||
|
" [2 1 5]]\n",
|
||
|
"\n",
|
||
|
"(5, 3)\n",
|
||
|
"\n",
|
||
|
"Mean: [6. 6. 5. 6. 2.66666667]\n",
|
||
|
"Mean in the original matrix form:\n",
|
||
|
"[[6. ]\n",
|
||
|
" [6. ]\n",
|
||
|
" [5. ]\n",
|
||
|
" [6. ]\n",
|
||
|
" [2.66666667]]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = np.random.randint(low=0, high=10, size=(5, 3))\n",
|
||
|
"\n",
|
||
|
"print(a)\n",
|
||
|
"print()\n",
|
||
|
"print(a.shape)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"print(\"Mean:\", np.sum(a, axis=1) / a.shape[1])\n",
|
||
|
"\n",
|
||
|
"print(\"Mean in the original matrix form:\")\n",
|
||
|
"print((np.sum(a, axis=1) / a.shape[1]).reshape(-1, 1)) # -1 calculates the right size to use all elements"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "indian-orlando",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### More complex operations"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 31,
|
||
|
"id": "presidential-cologne",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Vector to power 2 (element-wise)\n",
|
||
|
"[1. 4. 9.]\n",
|
||
|
"\n",
|
||
|
"Euler number to the power a (element-wise)\n",
|
||
|
"[ 2.71828183 7.3890561 20.08553692]\n",
|
||
|
"\n",
|
||
|
"An even more complex expression\n",
|
||
|
"[0.61971364 1.89817602 4.84758949]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"a = [1.0, 2.0, 3.0]\n",
|
||
|
"\n",
|
||
|
"print(\"Vector to power 2 (element-wise)\")\n",
|
||
|
"print(np.power(a, 2))\n",
|
||
|
"print()\n",
|
||
|
"print(\"Euler number to the power a (element-wise)\")\n",
|
||
|
"print(np.exp(a))\n",
|
||
|
"print()\n",
|
||
|
"print(\"An even more complex expression\")\n",
|
||
|
"print((np.power(a, 2) + np.exp(a)) / np.sum(a))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "hearing-street",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Numpy tasks"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "regular-niger",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 1.** Calculate the sigmoid (logistic) function on every element of the following numpy array [0.3, 1.2, -1.4, 0.2, -0.1, 0.1, 0.8, -0.25] and print the last 5 elements. Use only vector operations."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "agreed-single",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "another-catch",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 2.** Calculate the dot product of the following two vectors:<br/>\n",
|
||
|
"$x = [3, 1, 4, 2, 6, 1, 4, 8]$<br/>\n",
|
||
|
"$y = [5, 2, 3, 12, 2, 4, 17, 11]$<br/>\n",
|
||
|
"a) by using element-wise mutliplication and np.sum,<br/>\n",
|
||
|
"b) by using np.dot,<br/>\n",
|
||
|
"b) by using np.matmul and transposition (x.T)."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "forbidden-journalism",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "acute-amber",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 3.** Calculate the following expression<br/>\n",
|
||
|
"$$\\frac{1}{1 + e^{-x_0 \\theta_0 - \\ldots - x_9 \\theta_9 - \\theta_{10}}}$$\n",
|
||
|
"for<br/>\n",
|
||
|
"$x = [1.2, 2.3, 3.4, -0.7, 4.2, 2.7, -0.5, -2.1, -3.3, 0.2]$<br/>\n",
|
||
|
"$\\theta = [7.7, 0.33, -2.12, -1.73, 2.9, -5.8, -0.9, 12.11, 3.43, -0.5, 1.65]$<br/>\n",
|
||
|
"and print the result. Use only vector operations."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "falling-holder",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "latter-vector",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Pandas"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "contrary-vacuum",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Load datasets\n",
|
||
|
"\n",
|
||
|
"- Steam (https://www.kaggle.com/tamber/steam-video-games)\n",
|
||
|
"\n",
|
||
|
"- MovieLens (https://grouplens.org/datasets/movielens/)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 32,
|
||
|
"id": "alert-friday",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"steam_df = pd.read_csv(os.path.join(\"data\", \"steam\", \"steam-200k.csv\"), \n",
|
||
|
" names=['user-id', 'game-title', 'behavior-name', 'value', 'zero'])\n",
|
||
|
"\n",
|
||
|
"ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\"))\n",
|
||
|
"ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\"))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "personal-productivity",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Peek into the datasets"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 33,
|
||
|
"id": "musical-trust",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" user-id game-title behavior-name value zero\n",
|
||
|
"0 151603712 The Elder Scrolls V Skyrim purchase 1.0 0\n",
|
||
|
"1 151603712 The Elder Scrolls V Skyrim play 273.0 0\n",
|
||
|
"2 151603712 Fallout 4 purchase 1.0 0\n",
|
||
|
"3 151603712 Fallout 4 play 87.0 0\n",
|
||
|
"4 151603712 Spore purchase 1.0 0\n",
|
||
|
"5 151603712 Spore play 14.9 0\n",
|
||
|
"6 151603712 Fallout New Vegas purchase 1.0 0\n",
|
||
|
"7 151603712 Fallout New Vegas play 12.1 0\n",
|
||
|
"8 151603712 Left 4 Dead 2 purchase 1.0 0\n",
|
||
|
"9 151603712 Left 4 Dead 2 play 8.9 0"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 33,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"steam_df.head(10)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 34,
|
||
|
"id": "electrical-floor",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>userId</th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>rating</th>\n",
|
||
|
" <th>timestamp</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>964982703</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>964981247</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>964982224</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>47</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>964983815</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>50</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>964982931</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>70</td>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" <td>964982400</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>101</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>964980868</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>110</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>964982176</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>151</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>964984041</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>157</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>964984100</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" userId movieId rating timestamp\n",
|
||
|
"0 1 1 4.0 964982703\n",
|
||
|
"1 1 3 4.0 964981247\n",
|
||
|
"2 1 6 4.0 964982224\n",
|
||
|
"3 1 47 5.0 964983815\n",
|
||
|
"4 1 50 5.0 964982931\n",
|
||
|
"5 1 70 3.0 964982400\n",
|
||
|
"6 1 101 5.0 964980868\n",
|
||
|
"7 1 110 4.0 964982176\n",
|
||
|
"8 1 151 5.0 964984041\n",
|
||
|
"9 1 157 5.0 964984100"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 34,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"ml_ratings_df.head(10)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 36,
|
||
|
"id": "cordless-daniel",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>Jumanji (1995)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>Grumpier Old Men (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>Waiting to Exhale (1995)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Father of the Bride Part II (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>Heat (1995)</td>\n",
|
||
|
" <td>Action|Crime|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>Sabrina (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>Tom and Huck (1995)</td>\n",
|
||
|
" <td>Adventure|Children</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>Sudden Death (1995)</td>\n",
|
||
|
" <td>Action</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>GoldenEye (1995)</td>\n",
|
||
|
" <td>Action|Adventure|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" movieId title \\\n",
|
||
|
"0 1 Toy Story (1995) \n",
|
||
|
"1 2 Jumanji (1995) \n",
|
||
|
"2 3 Grumpier Old Men (1995) \n",
|
||
|
"3 4 Waiting to Exhale (1995) \n",
|
||
|
"4 5 Father of the Bride Part II (1995) \n",
|
||
|
"5 6 Heat (1995) \n",
|
||
|
"6 7 Sabrina (1995) \n",
|
||
|
"7 8 Tom and Huck (1995) \n",
|
||
|
"8 9 Sudden Death (1995) \n",
|
||
|
"9 10 GoldenEye (1995) \n",
|
||
|
"\n",
|
||
|
" genres \n",
|
||
|
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"1 Adventure|Children|Fantasy \n",
|
||
|
"2 Comedy|Romance \n",
|
||
|
"3 Comedy|Drama|Romance \n",
|
||
|
"4 Comedy \n",
|
||
|
"5 Action|Crime|Thriller \n",
|
||
|
"6 Comedy|Romance \n",
|
||
|
"7 Adventure|Children \n",
|
||
|
"8 Action \n",
|
||
|
"9 Action|Adventure|Thriller "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 36,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"ml_movies_df.head(10)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "alpha-portal",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Merge both MovieLens DataFrames into one"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 39,
|
||
|
"id": "affecting-disclosure",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>userId</th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>rating</th>\n",
|
||
|
" <th>timestamp</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>964982703</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>847434962</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4.5</td>\n",
|
||
|
" <td>1106635946</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>15</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>2.5</td>\n",
|
||
|
" <td>1510577970</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>17</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4.5</td>\n",
|
||
|
" <td>1305696483</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>18</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>3.5</td>\n",
|
||
|
" <td>1455209816</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>19</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4.0</td>\n",
|
||
|
" <td>965705637</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>21</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>3.5</td>\n",
|
||
|
" <td>1407618878</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>27</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" <td>962685262</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>31</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>850466616</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" userId movieId rating timestamp title \\\n",
|
||
|
"0 1 1 4.0 964982703 Toy Story (1995) \n",
|
||
|
"1 5 1 4.0 847434962 Toy Story (1995) \n",
|
||
|
"2 7 1 4.5 1106635946 Toy Story (1995) \n",
|
||
|
"3 15 1 2.5 1510577970 Toy Story (1995) \n",
|
||
|
"4 17 1 4.5 1305696483 Toy Story (1995) \n",
|
||
|
"5 18 1 3.5 1455209816 Toy Story (1995) \n",
|
||
|
"6 19 1 4.0 965705637 Toy Story (1995) \n",
|
||
|
"7 21 1 3.5 1407618878 Toy Story (1995) \n",
|
||
|
"8 27 1 3.0 962685262 Toy Story (1995) \n",
|
||
|
"9 31 1 5.0 850466616 Toy Story (1995) \n",
|
||
|
"\n",
|
||
|
" genres \n",
|
||
|
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"1 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"2 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"3 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"4 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"5 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"6 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"7 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"8 Adventure|Animation|Children|Comedy|Fantasy \n",
|
||
|
"9 Adventure|Animation|Children|Comedy|Fantasy "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 39,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='movieId')\n",
|
||
|
"ml_df.head(10)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "lightweight-constitution",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Choosing a row, a column or several columns"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"id": "excited-interface",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Choosing rows by index\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Choosing rows by position\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"display(HTML(steam_df.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"# Choosing rows by index\n",
|
||
|
"chosen_df = steam_df[3:6]\n",
|
||
|
"\n",
|
||
|
"print(\"Choosing rows by index\")\n",
|
||
|
"display(HTML(chosen_df.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"# Choosing rows by position\n",
|
||
|
"chosen_df = steam_df.iloc[3:6]\n",
|
||
|
"\n",
|
||
|
"print(\"Choosing rows by position\")\n",
|
||
|
"display(HTML(chosen_df.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 42,
|
||
|
"id": "reflected-banner",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"0 The Elder Scrolls V Skyrim\n",
|
||
|
"1 The Elder Scrolls V Skyrim\n",
|
||
|
"2 Fallout 4\n",
|
||
|
"3 Fallout 4\n",
|
||
|
"4 Spore\n",
|
||
|
"5 Spore\n",
|
||
|
"6 Fallout New Vegas\n",
|
||
|
"7 Fallout New Vegas\n",
|
||
|
"8 Left 4 Dead 2\n",
|
||
|
"9 Left 4 Dead 2\n",
|
||
|
"Name: game-title, dtype: object\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Choosing a column\n",
|
||
|
"chosen_df = steam_df['game-title']\n",
|
||
|
"\n",
|
||
|
"print(chosen_df.head(10))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"id": "efficient-humidity",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Choosing several columns\n",
|
||
|
"chosen_df = steam_df[['user-id', 'game-title']]\n",
|
||
|
"\n",
|
||
|
"display(HTML(chosen_df.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "popular-cause",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Splitting the dataset into training and test set"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 45,
|
||
|
"id": "continuous-cheat",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Shuffled range of indices\n",
|
||
|
"[ 88886 27084 35588 56116 183664 34019 190384 138109 48325 94171\n",
|
||
|
" 163304 35071 45875 187591 107927 62332 97588 3784 669 75931]\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>88886</th>\n",
|
||
|
" <td>173434036</td>\n",
|
||
|
" <td>Mortal Kombat X</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>27084</th>\n",
|
||
|
" <td>80779496</td>\n",
|
||
|
" <td>Sins of a Solar Empire Trinity</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>35588</th>\n",
|
||
|
" <td>109669093</td>\n",
|
||
|
" <td>Killing Floor</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>225.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>56116</th>\n",
|
||
|
" <td>94269421</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>10.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>183664</th>\n",
|
||
|
" <td>279406744</td>\n",
|
||
|
" <td>BLOCKADE 3D</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>34019</th>\n",
|
||
|
" <td>126269125</td>\n",
|
||
|
" <td>Grand Theft Auto San Andreas</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>190384</th>\n",
|
||
|
" <td>71335402</td>\n",
|
||
|
" <td>7 Days to Die</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.2</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>138109</th>\n",
|
||
|
" <td>156818121</td>\n",
|
||
|
" <td>Half-Life 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>22.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>48325</th>\n",
|
||
|
" <td>114617787</td>\n",
|
||
|
" <td>Garry's Mod</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.2</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>94171</th>\n",
|
||
|
" <td>156615447</td>\n",
|
||
|
" <td>LEGO MARVEL Super Heroes</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.7</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>170080</th>\n",
|
||
|
" <td>81591317</td>\n",
|
||
|
" <td>Warframe</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>85279</th>\n",
|
||
|
" <td>44472980</td>\n",
|
||
|
" <td>Serious Sam Double D XXL</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>132916</th>\n",
|
||
|
" <td>45592640</td>\n",
|
||
|
" <td>Penumbra Black Plague</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12193</th>\n",
|
||
|
" <td>64787956</td>\n",
|
||
|
" <td>Always Sometimes Monsters</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>46374</th>\n",
|
||
|
" <td>192538478</td>\n",
|
||
|
" <td>Heroes & Generals</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>0.4</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>89823</th>\n",
|
||
|
" <td>1936551</td>\n",
|
||
|
" <td>Castle Crashers</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>179113</th>\n",
|
||
|
" <td>132196353</td>\n",
|
||
|
" <td>Knights and Merchants</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>144002</th>\n",
|
||
|
" <td>13190476</td>\n",
|
||
|
" <td>Blood Bowl 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>6.3</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>35416</th>\n",
|
||
|
" <td>60296891</td>\n",
|
||
|
" <td>Mirror's Edge</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>120786</th>\n",
|
||
|
" <td>62990992</td>\n",
|
||
|
" <td>Rome Total War</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"160000\n",
|
||
|
"40000\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"shuffle = np.array(list(range(len(steam_df))))\n",
|
||
|
"\n",
|
||
|
"# alternatively\n",
|
||
|
"\n",
|
||
|
"shuffle = np.arange(len(steam_df))\n",
|
||
|
"\n",
|
||
|
"np.random.shuffle(shuffle)\n",
|
||
|
"# shuffle = list(shuffle)\n",
|
||
|
"print(\"Shuffled range of indices\")\n",
|
||
|
"print(shuffle[:20])\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"train_test_split = 0.8\n",
|
||
|
"split_index = int(len(steam_df) * train_test_split)\n",
|
||
|
"\n",
|
||
|
"training_set = steam_df.iloc[shuffle[:split_index]]\n",
|
||
|
"test_set = steam_df.iloc[shuffle[split_index:]]\n",
|
||
|
"\n",
|
||
|
"display(HTML(training_set.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"display(HTML(test_set.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"print(len(training_set))\n",
|
||
|
"print(len(test_set))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "outside-twist",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Filtering"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "otherwise-rachel",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Filtering columns"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 46,
|
||
|
"id": "numerical-pride",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"chosen_df = steam_df.loc[:, ['user-id', 'game-title']]\n",
|
||
|
"\n",
|
||
|
"display(HTML(chosen_df.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "interior-cleaner",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Filtering rows"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 47,
|
||
|
"id": "marine-growth",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"0 False\n",
|
||
|
"1 False\n",
|
||
|
"2 True\n",
|
||
|
"3 True\n",
|
||
|
"4 False\n",
|
||
|
"5 False\n",
|
||
|
"6 False\n",
|
||
|
"7 False\n",
|
||
|
"8 False\n",
|
||
|
"9 False\n",
|
||
|
"Name: game-title, dtype: bool\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3187</th>\n",
|
||
|
" <td>87445402</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3188</th>\n",
|
||
|
" <td>87445402</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>83.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5683</th>\n",
|
||
|
" <td>25096601</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5684</th>\n",
|
||
|
" <td>25096601</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.6</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6219</th>\n",
|
||
|
" <td>211925330</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6220</th>\n",
|
||
|
" <td>211925330</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>133.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7300</th>\n",
|
||
|
" <td>115396529</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7301</th>\n",
|
||
|
" <td>115396529</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>17.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"condition = steam_df['game-title'] == 'Fallout 4'\n",
|
||
|
"\n",
|
||
|
"print(condition.head(10))\n",
|
||
|
"\n",
|
||
|
"chosen_df = steam_df.loc[condition]\n",
|
||
|
"\n",
|
||
|
"display(HTML(chosen_df.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "functioning-condition",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Filtering rows and columns at once"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 48,
|
||
|
"id": "advanced-religion",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3188</th>\n",
|
||
|
" <td>87445402</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>83.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5684</th>\n",
|
||
|
" <td>25096601</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>1.6</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6220</th>\n",
|
||
|
" <td>211925330</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>133.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7301</th>\n",
|
||
|
" <td>115396529</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>17.9</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7527</th>\n",
|
||
|
" <td>4834220</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>19.8</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7617</th>\n",
|
||
|
" <td>65229865</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>0.5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7712</th>\n",
|
||
|
" <td>65958466</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>123.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9963</th>\n",
|
||
|
" <td>91800733</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>63.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10700</th>\n",
|
||
|
" <td>43913966</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>65.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"condition = (steam_df['game-title'] == 'Fallout 4') & (steam_df['behavior-name'] == 'play')\n",
|
||
|
"\n",
|
||
|
"chosen_df = steam_df.loc[condition, ['user-id', 'game-title', 'value']]\n",
|
||
|
"\n",
|
||
|
"display(HTML(chosen_df.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "frequent-match",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Simple operations on columns"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "described-sister",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Multiply a column by 2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 51,
|
||
|
"id": "injured-sweet",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>546.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>174.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>29.8</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>24.2</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>purchase</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>17.8</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"steam_df_copy = steam_df.copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"steam_df_copy.loc[:, 'value'] = steam_df_copy['value'] * 2\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "executed-processor",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Choose the first n letters of a string"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 52,
|
||
|
"id": "forbidden-mining",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>Jumanji (1995)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>Grumpier Old Men (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>Waiting to Exhale (1995)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Father of the Bride Part II (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>Heat (1995)</td>\n",
|
||
|
" <td>Action|Crime|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>Sabrina (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>Tom and Huck (1995)</td>\n",
|
||
|
" <td>Adventure|Children</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>Sudden Death (1995)</td>\n",
|
||
|
" <td>Action</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>GoldenEye (1995)</td>\n",
|
||
|
" <td>Action|Adventure|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy St</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>Jumanj</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>Grumpi</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>Waitin</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Father</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>Heat (</td>\n",
|
||
|
" <td>Action|Crime|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>Sabrin</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>Tom an</td>\n",
|
||
|
" <td>Adventure|Children</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>Sudden</td>\n",
|
||
|
" <td>Action</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>Golden</td>\n",
|
||
|
" <td>Action|Adventure|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"ml_movies_df_copy = ml_movies_df.copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(ml_movies_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"ml_movies_df_copy.loc[:, 'title'] = ml_movies_df_copy['title'].str[:6]\n",
|
||
|
"\n",
|
||
|
"display(HTML(ml_movies_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "incorporated-entrance",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Take the mean of a column"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 53,
|
||
|
"id": "selected-trial",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"17.874384000000475\n",
|
||
|
"17.874384000000475\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Option 1\n",
|
||
|
"print(steam_df['value'].mean())\n",
|
||
|
"\n",
|
||
|
"# Option 2\n",
|
||
|
"print(np.mean(steam_df['value']))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "discrete-cheese",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Simple operation on filtered data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 54,
|
||
|
"id": "bridal-greenhouse",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>73</th>\n",
|
||
|
" <td>59945701</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>58.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1066</th>\n",
|
||
|
" <td>92107940</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>110.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1168</th>\n",
|
||
|
" <td>250006052</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>465.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1388</th>\n",
|
||
|
" <td>11373749</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>220.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2065</th>\n",
|
||
|
" <td>54103616</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>35.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2569</th>\n",
|
||
|
" <td>56038151</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.6</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3188</th>\n",
|
||
|
" <td>87445402</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>83.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3233</th>\n",
|
||
|
" <td>94088853</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>320.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>174.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>73</th>\n",
|
||
|
" <td>59945701</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>58.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1066</th>\n",
|
||
|
" <td>92107940</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>110.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1168</th>\n",
|
||
|
" <td>250006052</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>465.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1388</th>\n",
|
||
|
" <td>11373749</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>220.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2065</th>\n",
|
||
|
" <td>54103616</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>35.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2569</th>\n",
|
||
|
" <td>56038151</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.6</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3188</th>\n",
|
||
|
" <td>87445402</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>166.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3233</th>\n",
|
||
|
" <td>94088853</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>320.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"steam_df_copy = steam_df.loc[((steam_df['game-title'] == 'Fallout 4') | (steam_df['game-title'] == 'The Elder Scrolls V Skyrim')) \n",
|
||
|
" & (steam_df['behavior-name'] == 'play')].copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"condition = (steam_df_copy['game-title'] == 'Fallout 4') & (steam_df_copy['behavior-name'] == 'play')\n",
|
||
|
"\n",
|
||
|
"steam_df_copy.loc[condition, 'value'] = steam_df_copy.loc[condition, 'value'] * 2\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "relevant-strap",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Advanced operations on columns"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 55,
|
||
|
"id": "female-french",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>7.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>3.3</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.8</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.766319</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.572612</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.292535</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.251292</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.208274</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.140066</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.458615</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.335001</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"def reduce_outliers(x):\n",
|
||
|
" return min(np.log(1 + x), 4)\n",
|
||
|
"\n",
|
||
|
"steam_df_copy = steam_df.loc[steam_df['behavior-name'] == 'play'].copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"steam_df_copy.loc[:, 'value'] = steam_df_copy['value'].apply(reduce_outliers)\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "supported-graphic",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### The same apply operation can be achieved with the use of a lambda function"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 56,
|
||
|
"id": "objective-survey",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>7.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>3.3</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.8</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.766319</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.572612</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.292535</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.251292</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.208274</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.140066</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.458615</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1.335001</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"steam_df_copy = steam_df.loc[steam_df['behavior-name'] == 'play'].copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"steam_df_copy.loc[:, 'value'] = steam_df_copy['value'].apply(lambda x: min(np.log(1 + x), 4))\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "guilty-single",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Apply on two columns at once"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 58,
|
||
|
"id": "thrown-geneva",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>7.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>3.3</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.8</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" <th>value_2</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>273.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>87.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>14.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.766319</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>12.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.572612</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.9</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.292535</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.251292</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>8.1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.208274</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>7.5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.140066</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>3.3</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1.458615</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>2.8</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1.335001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user-id</th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>behavior-name</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" <th>zero</th>\n",
|
||
|
" <th>value_2</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>The Elder Scrolls V Skyrim</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>1092.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout 4</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>348.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Spore</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>41.218155</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.766319</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Fallout New Vegas</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>31.128608</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.572612</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>20.403559</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.292535</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>HuniePop</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>19.135980</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.251292</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Path of Exile</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>17.887023</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.208274</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Poly Bridge</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>16.050496</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>2.140066</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Left 4 Dead</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>4.813430</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1.458615</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>151603712</td>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>play</td>\n",
|
||
|
" <td>3.738003</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1.335001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"steam_df_copy = steam_df.loc[steam_df['behavior-name'] == 'play'].copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"steam_df_copy.loc[:, 'value_2'] = steam_df_copy['value'].apply(lambda x: min(np.log(1 + x), 4))\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"steam_df_copy.loc[:, 'value'] = steam_df_copy[['value', 'value_2']].apply(lambda x: x[0] * x[1], axis=1)\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 59,
|
||
|
"id": "governing-alexandria",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>Jumanji (1995)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>Grumpier Old Men (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>Waiting to Exhale (1995)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Father of the Bride Part II (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>Heat (1995)</td>\n",
|
||
|
" <td>Action|Crime|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>Sabrina (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>Tom and Huck (1995)</td>\n",
|
||
|
" <td>Adventure|Children</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>Sudden Death (1995)</td>\n",
|
||
|
" <td>Action</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>GoldenEye (1995)</td>\n",
|
||
|
" <td>Action|Adventure|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>movieId</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" <th>title|genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" <td>Toy Story (1995)|Adventure|Animation|Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>Jumanji (1995)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" <td>Jumanji (1995)|Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>Grumpier Old Men (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" <td>Grumpier Old Men (1995)|Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>Waiting to Exhale (1995)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" <td>Waiting to Exhale (1995)|Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Father of the Bride Part II (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" <td>Father of the Bride Part II (1995)|Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>Heat (1995)</td>\n",
|
||
|
" <td>Action|Crime|Thriller</td>\n",
|
||
|
" <td>Heat (1995)|Action|Crime|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>Sabrina (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" <td>Sabrina (1995)|Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>Tom and Huck (1995)</td>\n",
|
||
|
" <td>Adventure|Children</td>\n",
|
||
|
" <td>Tom and Huck (1995)|Adventure|Children</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>Sudden Death (1995)</td>\n",
|
||
|
" <td>Action</td>\n",
|
||
|
" <td>Sudden Death (1995)|Action</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>GoldenEye (1995)</td>\n",
|
||
|
" <td>Action|Adventure|Thriller</td>\n",
|
||
|
" <td>GoldenEye (1995)|Action|Adventure|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"ml_movies_df_copy = ml_movies_df.copy()\n",
|
||
|
"\n",
|
||
|
"display(HTML(ml_movies_df_copy.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"ml_movies_df_copy.loc[:, 'title|genres'] = ml_movies_df_copy[['title', 'genres']].apply(lambda x: x[0] + \"|\" + x[1], axis=1)\n",
|
||
|
"\n",
|
||
|
"display(HTML(ml_movies_df_copy.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "critical-fields",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Grouping and aggregating"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "biological-light",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Find the most popular games (in terms of purchases)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 61,
|
||
|
"id": "greenhouse-scout",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>007 Legends</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0RBITALIS</th>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby)</th>\n",
|
||
|
" <td>7.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10 Second Ninja</th>\n",
|
||
|
" <td>6.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10,000,000</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>100% Orange Juice</th>\n",
|
||
|
" <td>10.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1000 Amps</th>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12 Labours of Hercules</th>\n",
|
||
|
" <td>10.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12 Labours of Hercules II The Cretan Bull</th>\n",
|
||
|
" <td>12.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12 Labours of Hercules III Girl Power</th>\n",
|
||
|
" <td>6.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>game-title</th>\n",
|
||
|
" <th>value</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>Dota 2</td>\n",
|
||
|
" <td>4841.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>Team Fortress 2</td>\n",
|
||
|
" <td>2323.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>Unturned</td>\n",
|
||
|
" <td>1563.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>Counter-Strike Global Offensive</td>\n",
|
||
|
" <td>1412.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>Half-Life 2 Lost Coast</td>\n",
|
||
|
" <td>981.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>Counter-Strike Source</td>\n",
|
||
|
" <td>978.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>Left 4 Dead 2</td>\n",
|
||
|
" <td>951.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>Counter-Strike</td>\n",
|
||
|
" <td>856.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>Warframe</td>\n",
|
||
|
" <td>847.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>Half-Life 2 Deathmatch</td>\n",
|
||
|
" <td>823.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"steam_grouped = steam_df.loc[steam_df['behavior-name'] == 'purchase', ['game-title', 'value']]\n",
|
||
|
"steam_grouped = steam_grouped.groupby('game-title').sum()\n",
|
||
|
"display(HTML(steam_grouped.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"steam_grouped = steam_grouped.sort_values(by='value', ascending=False).reset_index()\n",
|
||
|
"\n",
|
||
|
"display(HTML(steam_grouped.head(10).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "indie-calcium",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Iterating over a DataFrame (if possible, use column operations instead)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 63,
|
||
|
"id": "laden-intersection",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[0, 151603712, The Elder Scrolls V Skyrim, purchase]\n",
|
||
|
"[1, 151603712, The Elder Scrolls V Skyrim, play]\n",
|
||
|
"[2, 151603712, Fallout 4, purchase]\n",
|
||
|
"[3, 151603712, Fallout 4, play]\n",
|
||
|
"[4, 151603712, Spore, purchase]\n",
|
||
|
"[5, 151603712, Spore, play]\n",
|
||
|
"[6, 151603712, Fallout New Vegas, purchase]\n",
|
||
|
"[7, 151603712, Fallout New Vegas, play]\n",
|
||
|
"[8, 151603712, Left 4 Dead 2, purchase]\n",
|
||
|
"[9, 151603712, Left 4 Dead 2, play]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"i = 0\n",
|
||
|
"for idx, row in steam_df.iterrows():\n",
|
||
|
" print(\"[{}, {}, {}, {}]\".format(idx, row['user-id'], row['game-title'], row['behavior-name']))\n",
|
||
|
" i += 1\n",
|
||
|
" if i == 10:\n",
|
||
|
" break"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "objective-associate",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Pandas tasks - Steam dataset"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "floppy-american",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 4.** How many people made a purchase in the Steam dataset? Remember that a person could by many games, but you need to count every person once."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "decimal-grass",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "protected-glossary",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 5.** How many people made a purchase of \"The Elder Scrolls V Skyrim\"?"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "distant-overview",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "vocational-weekly",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 6.** How many purchases people made on average?"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "reflected-cathedral",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "signed-transaction",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 7.** Who bought the most games?"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "handmade-revolution",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "piano-bobby",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 8.** How many hours on average people played in \"The Elder Scrolls V Skyrim\"?"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "hydraulic-observation",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "stuffed-creativity",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 9.** Which games were played the most (in terms of the number of hours played)? Print the first 10 titles and respective numbers of hours."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "challenging-truck",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "crude-petroleum",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 10.** Which games are the most consistently played (in terms of the average number of hours played)? Print the first 10 titles and respective numbers of hours."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "surgical-lawsuit",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "monetary-toyota",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 11\\*\\*.** Fix the above for the fact that 0 hours played is not listed, but only a purchase is recorded in such a case."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "protective-report",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ceramic-awareness",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 12.** Apply the sigmoid function\n",
|
||
|
"$$f(x) = \\frac{1}{1 + e^{-\\frac{1}{100}x}}$$\n",
|
||
|
"to hours played and print the first 10 rows from the entire Steam dataset after this change."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "optical-announcement",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "actual-spotlight",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Pandas tasks - MovieLens dataset"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "inclusive-crash",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 13\\*.** Calculate popularity (by the number of users who watched a movie) of all genres."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "developmental-seven",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "personalized-finland",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 14\\*.** Calculate average rating for all genres."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "inside-personal",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "vertical-stick",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 15.** Calculate each movie rating bias (deviation from the mean of all movies average ratings). Print first 10 in the form: title, average rating, bias."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "greatest-screen",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "hawaiian-haiti",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 16.** Calculate each user rating bias (deviation from the mean of all users average ratings). Print first 10 in the form: user_id, average rating, bias."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "charitable-guyana",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "intimate-porcelain",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 17.** Randomly choose 10 movies and 10 users and print their interaction matrix in the form of a DataFrame with user_id as index and movie titles as columns (use HTML Display for that). You can iterate over the DataFrame in this task."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "brazilian-frost",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "boolean-modem",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Pandas + numpy tasks"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "worldwide-disclaimer",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 18.** Create the entire interaction matrix for the MovieLens dataset."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "marine-initial",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "micro-vision",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 19.** Calculate the matrix of size (n_users, n_users) where at position (i, j) is the number of movies watched both by user i and user j. Print the submatrix of first 10 rows and 10 columns."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "swedish-lambda",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "shaped-advance",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Task 20.** Calculate the matrix of size (n_items, n_items) where at position (i, j) is the number of users who watched both movie i and movie j. Print the submatrix of first 10 rows and 10 columns."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "quality-bubble",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Write your code here"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.6.9"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|