remove duplicate columns

This commit is contained in:
Marysia 2024-12-11 12:06:46 +01:00
parent 7da7a5288e
commit e6bb2e6970
5 changed files with 103964 additions and 103974 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -438,8 +438,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2024-12-09T22:45:09.915862Z", "end_time": "2024-12-11T11:04:32.648126Z",
"start_time": "2024-12-09T22:45:09.344595Z" "start_time": "2024-12-11T11:04:31.503628Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -449,13 +449,40 @@
], ],
"id": "f7b5130c72ad35af", "id": "f7b5130c72ad35af",
"outputs": [], "outputs": [],
"execution_count": 43 "execution_count": 53
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# train_data = train_data.drop(columns=['similarUsers_y'])\n",
"# test_data = test_data.drop(columns=['similarUsers_y'])"
],
"id": "4cd347cc3bfd35aa",
"outputs": [],
"execution_count": null
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2024-12-09T22:45:09.942641Z", "end_time": "2024-12-11T11:03:13.267007Z",
"start_time": "2024-12-09T22:45:09.916835Z" "start_time": "2024-12-11T11:03:13.258841Z"
}
},
"cell_type": "code",
"source": [
"# train_data.rename(columns={'similarUsers_x': 'similarUsers'}, inplace=True)\n",
"# test_data.rename(columns={'similarUsers_x': 'similarUsers'}, inplace=True)"
],
"id": "aa8a10762dd70a4d",
"outputs": [],
"execution_count": 50
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-11T11:04:43.594264Z",
"start_time": "2024-12-11T11:04:43.495639Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -469,13 +496,13 @@
], ],
"id": "20dba13e7a3d105b", "id": "20dba13e7a3d105b",
"outputs": [], "outputs": [],
"execution_count": 44 "execution_count": 54
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2024-12-09T22:45:09.968052Z", "end_time": "2024-12-11T11:04:45.194584Z",
"start_time": "2024-12-09T22:45:09.943175Z" "start_time": "2024-12-11T11:04:45.132035Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -485,13 +512,13 @@
], ],
"id": "be9f6106c5e4b04a", "id": "be9f6106c5e4b04a",
"outputs": [], "outputs": [],
"execution_count": 45 "execution_count": 55
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2024-12-09T22:49:01.766906Z", "end_time": "2024-12-11T11:03:47.746798Z",
"start_time": "2024-12-09T22:49:01.744809Z" "start_time": "2024-12-11T11:03:47.714278Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -527,20 +554,20 @@
"73175 [Action, Sci-Fi] 3.63 \n", "73175 [Action, Sci-Fi] 3.63 \n",
"73176 [Action, Crime, Drama, Thriller] 3.71 \n", "73176 [Action, Crime, Drama, Thriller] 3.71 \n",
"\n", "\n",
" similarUsers_x similarUsers_y popularity \n", " similarUsers popularity \n",
"0 4.50 4.50 33.81 \n", "0 4.50 33.81 \n",
"1 4.00 4.00 8.81 \n", "1 4.00 8.81 \n",
"2 3.00 3.00 16.19 \n", "2 3.00 16.19 \n",
"3 3.88 3.88 33.20 \n", "3 3.88 33.20 \n",
"4 4.75 4.75 32.38 \n", "4 4.75 32.38 \n",
"... ... ... ... \n", "... ... ... \n",
"73172 NaN NaN 0.82 \n", "73172 NaN 0.82 \n",
"73173 5.00 5.00 1.02 \n", "73173 5.00 1.02 \n",
"73174 NaN NaN 2.66 \n", "73174 NaN 2.66 \n",
"73175 4.50 4.50 4.30 \n", "73175 4.50 4.30 \n",
"73176 NaN NaN 0.41 \n", "73176 NaN 0.41 \n",
"\n", "\n",
"[73177 rows x 10 columns]" "[73177 rows x 9 columns]"
], ],
"text/html": [ "text/html": [
"<div>\n", "<div>\n",
@ -568,8 +595,7 @@
" <th>title</th>\n", " <th>title</th>\n",
" <th>genres</th>\n", " <th>genres</th>\n",
" <th>genreMatch</th>\n", " <th>genreMatch</th>\n",
" <th>similarUsers_x</th>\n", " <th>similarUsers</th>\n",
" <th>similarUsers_y</th>\n",
" <th>popularity</th>\n", " <th>popularity</th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
@ -584,7 +610,6 @@
" <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n", " <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n",
" <td>4.44</td>\n", " <td>4.44</td>\n",
" <td>4.50</td>\n", " <td>4.50</td>\n",
" <td>4.50</td>\n",
" <td>33.81</td>\n", " <td>33.81</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -597,7 +622,6 @@
" <td>[Comedy, Romance]</td>\n", " <td>[Comedy, Romance]</td>\n",
" <td>4.29</td>\n", " <td>4.29</td>\n",
" <td>4.00</td>\n", " <td>4.00</td>\n",
" <td>4.00</td>\n",
" <td>8.81</td>\n", " <td>8.81</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -610,7 +634,6 @@
" <td>[Action, Crime, Thriller]</td>\n", " <td>[Action, Crime, Thriller]</td>\n",
" <td>4.27</td>\n", " <td>4.27</td>\n",
" <td>3.00</td>\n", " <td>3.00</td>\n",
" <td>3.00</td>\n",
" <td>16.19</td>\n", " <td>16.19</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -623,7 +646,6 @@
" <td>[Mystery, Thriller]</td>\n", " <td>[Mystery, Thriller]</td>\n",
" <td>4.16</td>\n", " <td>4.16</td>\n",
" <td>3.88</td>\n", " <td>3.88</td>\n",
" <td>3.88</td>\n",
" <td>33.20</td>\n", " <td>33.20</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -636,7 +658,6 @@
" <td>[Crime, Mystery, Thriller]</td>\n", " <td>[Crime, Mystery, Thriller]</td>\n",
" <td>4.22</td>\n", " <td>4.22</td>\n",
" <td>4.75</td>\n", " <td>4.75</td>\n",
" <td>4.75</td>\n",
" <td>32.38</td>\n", " <td>32.38</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -650,7 +671,6 @@
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>73172</th>\n", " <th>73172</th>\n",
@ -662,7 +682,6 @@
" <td>[Drama, Horror, Thriller]</td>\n", " <td>[Drama, Horror, Thriller]</td>\n",
" <td>3.65</td>\n", " <td>3.65</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.82</td>\n", " <td>0.82</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -675,7 +694,6 @@
" <td>[Action, Crime, Thriller]</td>\n", " <td>[Action, Crime, Thriller]</td>\n",
" <td>3.66</td>\n", " <td>3.66</td>\n",
" <td>5.00</td>\n", " <td>5.00</td>\n",
" <td>5.00</td>\n",
" <td>1.02</td>\n", " <td>1.02</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -688,7 +706,6 @@
" <td>[Horror]</td>\n", " <td>[Horror]</td>\n",
" <td>3.51</td>\n", " <td>3.51</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.66</td>\n", " <td>2.66</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -701,7 +718,6 @@
" <td>[Action, Sci-Fi]</td>\n", " <td>[Action, Sci-Fi]</td>\n",
" <td>3.63</td>\n", " <td>3.63</td>\n",
" <td>4.50</td>\n", " <td>4.50</td>\n",
" <td>4.50</td>\n",
" <td>4.30</td>\n", " <td>4.30</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -714,27 +730,26 @@
" <td>[Action, Crime, Drama, Thriller]</td>\n", " <td>[Action, Crime, Drama, Thriller]</td>\n",
" <td>3.71</td>\n", " <td>3.71</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.41</td>\n", " <td>0.41</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>73177 rows × 10 columns</p>\n", "<p>73177 rows × 9 columns</p>\n",
"</div>" "</div>"
] ]
}, },
"execution_count": 46, "execution_count": 51,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"execution_count": 46 "execution_count": 51
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2024-12-09T22:49:14.765827Z", "end_time": "2024-12-11T11:04:01.183022Z",
"start_time": "2024-12-09T22:49:14.748582Z" "start_time": "2024-12-11T11:04:01.159989Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@ -770,33 +785,20 @@
"27657 Return of Martin Guerre, The (Retour de Martin... \n", "27657 Return of Martin Guerre, The (Retour de Martin... \n",
"27658 Tin Drum, The (Blechtrommel, Die) (1979) \n", "27658 Tin Drum, The (Blechtrommel, Die) (1979) \n",
"\n", "\n",
" genres genreMatch similarUsers_x \\\n", " genres genreMatch similarUsers popularity \n",
"0 [Drama] 0.75 2.00 \n", "0 [Drama] 0.75 2.00 5.74 \n",
"1 [Drama, War] 0.62 3.67 \n", "1 [Drama, War] 0.62 3.67 31.97 \n",
"2 [Action, Crime, Drama, War] 1.33 3.00 \n", "2 [Action, Crime, Drama, War] 1.33 3.00 5.74 \n",
"3 [Action, Adventure, Comedy, War] 1.95 1.50 \n", "3 [Action, Adventure, Comedy, War] 1.95 1.50 3.28 \n",
"4 [Adventure, Animation, Comedy] 1.41 4.50 \n", "4 [Adventure, Animation, Comedy] 1.41 4.50 5.74 \n",
"... ... ... ... \n", "... ... ... ... ... \n",
"27654 [Comedy, Drama, Romance] 3.28 NaN \n", "27654 [Comedy, Drama, Romance] 3.28 NaN 4.10 \n",
"27655 [Drama] 3.37 NaN \n", "27655 [Drama] 3.37 NaN 1.64 \n",
"27656 [Drama, Romance] 3.28 NaN \n", "27656 [Drama, Romance] 3.28 NaN 7.38 \n",
"27657 [Drama] 3.37 NaN \n", "27657 [Drama] 3.37 NaN 0.82 \n",
"27658 [Drama, War] 3.43 NaN \n", "27658 [Drama, War] 3.43 NaN 0.82 \n",
"\n", "\n",
" similarUsers_y popularity \n", "[27659 rows x 9 columns]"
"0 2.00 5.74 \n",
"1 3.67 31.97 \n",
"2 3.00 5.74 \n",
"3 1.50 3.28 \n",
"4 4.50 5.74 \n",
"... ... ... \n",
"27654 NaN 4.10 \n",
"27655 NaN 1.64 \n",
"27656 NaN 7.38 \n",
"27657 NaN 0.82 \n",
"27658 NaN 0.82 \n",
"\n",
"[27659 rows x 10 columns]"
], ],
"text/html": [ "text/html": [
"<div>\n", "<div>\n",
@ -824,8 +826,7 @@
" <th>title</th>\n", " <th>title</th>\n",
" <th>genres</th>\n", " <th>genres</th>\n",
" <th>genreMatch</th>\n", " <th>genreMatch</th>\n",
" <th>similarUsers_x</th>\n", " <th>similarUsers</th>\n",
" <th>similarUsers_y</th>\n",
" <th>popularity</th>\n", " <th>popularity</th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
@ -840,7 +841,6 @@
" <td>[Drama]</td>\n", " <td>[Drama]</td>\n",
" <td>0.75</td>\n", " <td>0.75</td>\n",
" <td>2.00</td>\n", " <td>2.00</td>\n",
" <td>2.00</td>\n",
" <td>5.74</td>\n", " <td>5.74</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -853,7 +853,6 @@
" <td>[Drama, War]</td>\n", " <td>[Drama, War]</td>\n",
" <td>0.62</td>\n", " <td>0.62</td>\n",
" <td>3.67</td>\n", " <td>3.67</td>\n",
" <td>3.67</td>\n",
" <td>31.97</td>\n", " <td>31.97</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -866,7 +865,6 @@
" <td>[Action, Crime, Drama, War]</td>\n", " <td>[Action, Crime, Drama, War]</td>\n",
" <td>1.33</td>\n", " <td>1.33</td>\n",
" <td>3.00</td>\n", " <td>3.00</td>\n",
" <td>3.00</td>\n",
" <td>5.74</td>\n", " <td>5.74</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -879,7 +877,6 @@
" <td>[Action, Adventure, Comedy, War]</td>\n", " <td>[Action, Adventure, Comedy, War]</td>\n",
" <td>1.95</td>\n", " <td>1.95</td>\n",
" <td>1.50</td>\n", " <td>1.50</td>\n",
" <td>1.50</td>\n",
" <td>3.28</td>\n", " <td>3.28</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -892,7 +889,6 @@
" <td>[Adventure, Animation, Comedy]</td>\n", " <td>[Adventure, Animation, Comedy]</td>\n",
" <td>1.41</td>\n", " <td>1.41</td>\n",
" <td>4.50</td>\n", " <td>4.50</td>\n",
" <td>4.50</td>\n",
" <td>5.74</td>\n", " <td>5.74</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -906,7 +902,6 @@
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>27654</th>\n", " <th>27654</th>\n",
@ -918,7 +913,6 @@
" <td>[Comedy, Drama, Romance]</td>\n", " <td>[Comedy, Drama, Romance]</td>\n",
" <td>3.28</td>\n", " <td>3.28</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.10</td>\n", " <td>4.10</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -931,7 +925,6 @@
" <td>[Drama]</td>\n", " <td>[Drama]</td>\n",
" <td>3.37</td>\n", " <td>3.37</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.64</td>\n", " <td>1.64</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -944,7 +937,6 @@
" <td>[Drama, Romance]</td>\n", " <td>[Drama, Romance]</td>\n",
" <td>3.28</td>\n", " <td>3.28</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.38</td>\n", " <td>7.38</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -957,7 +949,6 @@
" <td>[Drama]</td>\n", " <td>[Drama]</td>\n",
" <td>3.37</td>\n", " <td>3.37</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.82</td>\n", " <td>0.82</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -970,21 +961,20 @@
" <td>[Drama, War]</td>\n", " <td>[Drama, War]</td>\n",
" <td>3.43</td>\n", " <td>3.43</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.82</td>\n", " <td>0.82</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>27659 rows × 10 columns</p>\n", "<p>27659 rows × 9 columns</p>\n",
"</div>" "</div>"
] ]
}, },
"execution_count": 47, "execution_count": 52,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"execution_count": 47 "execution_count": 52
} }
], ],
"metadata": { "metadata": {