remove duplicate columns

This commit is contained in:
Marysia 2024-12-11 12:06:46 +01:00
parent 7da7a5288e
commit e6bb2e6970
5 changed files with 103964 additions and 103974 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -438,8 +438,8 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T22:45:09.915862Z",
"start_time": "2024-12-09T22:45:09.344595Z"
"end_time": "2024-12-11T11:04:32.648126Z",
"start_time": "2024-12-11T11:04:31.503628Z"
}
},
"cell_type": "code",
@ -449,13 +449,40 @@
],
"id": "f7b5130c72ad35af",
"outputs": [],
"execution_count": 43
"execution_count": 53
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# train_data = train_data.drop(columns=['similarUsers_y'])\n",
"# test_data = test_data.drop(columns=['similarUsers_y'])"
],
"id": "4cd347cc3bfd35aa",
"outputs": [],
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T22:45:09.942641Z",
"start_time": "2024-12-09T22:45:09.916835Z"
"end_time": "2024-12-11T11:03:13.267007Z",
"start_time": "2024-12-11T11:03:13.258841Z"
}
},
"cell_type": "code",
"source": [
"# train_data.rename(columns={'similarUsers_x': 'similarUsers'}, inplace=True)\n",
"# test_data.rename(columns={'similarUsers_x': 'similarUsers'}, inplace=True)"
],
"id": "aa8a10762dd70a4d",
"outputs": [],
"execution_count": 50
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-11T11:04:43.594264Z",
"start_time": "2024-12-11T11:04:43.495639Z"
}
},
"cell_type": "code",
@ -469,13 +496,13 @@
],
"id": "20dba13e7a3d105b",
"outputs": [],
"execution_count": 44
"execution_count": 54
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T22:45:09.968052Z",
"start_time": "2024-12-09T22:45:09.943175Z"
"end_time": "2024-12-11T11:04:45.194584Z",
"start_time": "2024-12-11T11:04:45.132035Z"
}
},
"cell_type": "code",
@ -485,13 +512,13 @@
],
"id": "be9f6106c5e4b04a",
"outputs": [],
"execution_count": 45
"execution_count": 55
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T22:49:01.766906Z",
"start_time": "2024-12-09T22:49:01.744809Z"
"end_time": "2024-12-11T11:03:47.746798Z",
"start_time": "2024-12-11T11:03:47.714278Z"
}
},
"cell_type": "code",
@ -527,20 +554,20 @@
"73175 [Action, Sci-Fi] 3.63 \n",
"73176 [Action, Crime, Drama, Thriller] 3.71 \n",
"\n",
" similarUsers_x similarUsers_y popularity \n",
"0 4.50 4.50 33.81 \n",
"1 4.00 4.00 8.81 \n",
"2 3.00 3.00 16.19 \n",
"3 3.88 3.88 33.20 \n",
"4 4.75 4.75 32.38 \n",
"... ... ... ... \n",
"73172 NaN NaN 0.82 \n",
"73173 5.00 5.00 1.02 \n",
"73174 NaN NaN 2.66 \n",
"73175 4.50 4.50 4.30 \n",
"73176 NaN NaN 0.41 \n",
" similarUsers popularity \n",
"0 4.50 33.81 \n",
"1 4.00 8.81 \n",
"2 3.00 16.19 \n",
"3 3.88 33.20 \n",
"4 4.75 32.38 \n",
"... ... ... \n",
"73172 NaN 0.82 \n",
"73173 5.00 1.02 \n",
"73174 NaN 2.66 \n",
"73175 4.50 4.30 \n",
"73176 NaN 0.41 \n",
"\n",
"[73177 rows x 10 columns]"
"[73177 rows x 9 columns]"
],
"text/html": [
"<div>\n",
@ -568,8 +595,7 @@
" <th>title</th>\n",
" <th>genres</th>\n",
" <th>genreMatch</th>\n",
" <th>similarUsers_x</th>\n",
" <th>similarUsers_y</th>\n",
" <th>similarUsers</th>\n",
" <th>popularity</th>\n",
" </tr>\n",
" </thead>\n",
@ -584,7 +610,6 @@
" <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n",
" <td>4.44</td>\n",
" <td>4.50</td>\n",
" <td>4.50</td>\n",
" <td>33.81</td>\n",
" </tr>\n",
" <tr>\n",
@ -597,7 +622,6 @@
" <td>[Comedy, Romance]</td>\n",
" <td>4.29</td>\n",
" <td>4.00</td>\n",
" <td>4.00</td>\n",
" <td>8.81</td>\n",
" </tr>\n",
" <tr>\n",
@ -610,7 +634,6 @@
" <td>[Action, Crime, Thriller]</td>\n",
" <td>4.27</td>\n",
" <td>3.00</td>\n",
" <td>3.00</td>\n",
" <td>16.19</td>\n",
" </tr>\n",
" <tr>\n",
@ -623,7 +646,6 @@
" <td>[Mystery, Thriller]</td>\n",
" <td>4.16</td>\n",
" <td>3.88</td>\n",
" <td>3.88</td>\n",
" <td>33.20</td>\n",
" </tr>\n",
" <tr>\n",
@ -636,7 +658,6 @@
" <td>[Crime, Mystery, Thriller]</td>\n",
" <td>4.22</td>\n",
" <td>4.75</td>\n",
" <td>4.75</td>\n",
" <td>32.38</td>\n",
" </tr>\n",
" <tr>\n",
@ -650,7 +671,6 @@
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73172</th>\n",
@ -662,7 +682,6 @@
" <td>[Drama, Horror, Thriller]</td>\n",
" <td>3.65</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.82</td>\n",
" </tr>\n",
" <tr>\n",
@ -675,7 +694,6 @@
" <td>[Action, Crime, Thriller]</td>\n",
" <td>3.66</td>\n",
" <td>5.00</td>\n",
" <td>5.00</td>\n",
" <td>1.02</td>\n",
" </tr>\n",
" <tr>\n",
@ -688,7 +706,6 @@
" <td>[Horror]</td>\n",
" <td>3.51</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.66</td>\n",
" </tr>\n",
" <tr>\n",
@ -701,7 +718,6 @@
" <td>[Action, Sci-Fi]</td>\n",
" <td>3.63</td>\n",
" <td>4.50</td>\n",
" <td>4.50</td>\n",
" <td>4.30</td>\n",
" </tr>\n",
" <tr>\n",
@ -714,27 +730,26 @@
" <td>[Action, Crime, Drama, Thriller]</td>\n",
" <td>3.71</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.41</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>73177 rows × 10 columns</p>\n",
"<p>73177 rows × 9 columns</p>\n",
"</div>"
]
},
"execution_count": 46,
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 46
"execution_count": 51
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T22:49:14.765827Z",
"start_time": "2024-12-09T22:49:14.748582Z"
"end_time": "2024-12-11T11:04:01.183022Z",
"start_time": "2024-12-11T11:04:01.159989Z"
}
},
"cell_type": "code",
@ -770,33 +785,20 @@
"27657 Return of Martin Guerre, The (Retour de Martin... \n",
"27658 Tin Drum, The (Blechtrommel, Die) (1979) \n",
"\n",
" genres genreMatch similarUsers_x \\\n",
"0 [Drama] 0.75 2.00 \n",
"1 [Drama, War] 0.62 3.67 \n",
"2 [Action, Crime, Drama, War] 1.33 3.00 \n",
"3 [Action, Adventure, Comedy, War] 1.95 1.50 \n",
"4 [Adventure, Animation, Comedy] 1.41 4.50 \n",
"... ... ... ... \n",
"27654 [Comedy, Drama, Romance] 3.28 NaN \n",
"27655 [Drama] 3.37 NaN \n",
"27656 [Drama, Romance] 3.28 NaN \n",
"27657 [Drama] 3.37 NaN \n",
"27658 [Drama, War] 3.43 NaN \n",
" genres genreMatch similarUsers popularity \n",
"0 [Drama] 0.75 2.00 5.74 \n",
"1 [Drama, War] 0.62 3.67 31.97 \n",
"2 [Action, Crime, Drama, War] 1.33 3.00 5.74 \n",
"3 [Action, Adventure, Comedy, War] 1.95 1.50 3.28 \n",
"4 [Adventure, Animation, Comedy] 1.41 4.50 5.74 \n",
"... ... ... ... ... \n",
"27654 [Comedy, Drama, Romance] 3.28 NaN 4.10 \n",
"27655 [Drama] 3.37 NaN 1.64 \n",
"27656 [Drama, Romance] 3.28 NaN 7.38 \n",
"27657 [Drama] 3.37 NaN 0.82 \n",
"27658 [Drama, War] 3.43 NaN 0.82 \n",
"\n",
" similarUsers_y popularity \n",
"0 2.00 5.74 \n",
"1 3.67 31.97 \n",
"2 3.00 5.74 \n",
"3 1.50 3.28 \n",
"4 4.50 5.74 \n",
"... ... ... \n",
"27654 NaN 4.10 \n",
"27655 NaN 1.64 \n",
"27656 NaN 7.38 \n",
"27657 NaN 0.82 \n",
"27658 NaN 0.82 \n",
"\n",
"[27659 rows x 10 columns]"
"[27659 rows x 9 columns]"
],
"text/html": [
"<div>\n",
@ -824,8 +826,7 @@
" <th>title</th>\n",
" <th>genres</th>\n",
" <th>genreMatch</th>\n",
" <th>similarUsers_x</th>\n",
" <th>similarUsers_y</th>\n",
" <th>similarUsers</th>\n",
" <th>popularity</th>\n",
" </tr>\n",
" </thead>\n",
@ -840,7 +841,6 @@
" <td>[Drama]</td>\n",
" <td>0.75</td>\n",
" <td>2.00</td>\n",
" <td>2.00</td>\n",
" <td>5.74</td>\n",
" </tr>\n",
" <tr>\n",
@ -853,7 +853,6 @@
" <td>[Drama, War]</td>\n",
" <td>0.62</td>\n",
" <td>3.67</td>\n",
" <td>3.67</td>\n",
" <td>31.97</td>\n",
" </tr>\n",
" <tr>\n",
@ -866,7 +865,6 @@
" <td>[Action, Crime, Drama, War]</td>\n",
" <td>1.33</td>\n",
" <td>3.00</td>\n",
" <td>3.00</td>\n",
" <td>5.74</td>\n",
" </tr>\n",
" <tr>\n",
@ -879,7 +877,6 @@
" <td>[Action, Adventure, Comedy, War]</td>\n",
" <td>1.95</td>\n",
" <td>1.50</td>\n",
" <td>1.50</td>\n",
" <td>3.28</td>\n",
" </tr>\n",
" <tr>\n",
@ -892,7 +889,6 @@
" <td>[Adventure, Animation, Comedy]</td>\n",
" <td>1.41</td>\n",
" <td>4.50</td>\n",
" <td>4.50</td>\n",
" <td>5.74</td>\n",
" </tr>\n",
" <tr>\n",
@ -906,7 +902,6 @@
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27654</th>\n",
@ -918,7 +913,6 @@
" <td>[Comedy, Drama, Romance]</td>\n",
" <td>3.28</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.10</td>\n",
" </tr>\n",
" <tr>\n",
@ -931,7 +925,6 @@
" <td>[Drama]</td>\n",
" <td>3.37</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.64</td>\n",
" </tr>\n",
" <tr>\n",
@ -944,7 +937,6 @@
" <td>[Drama, Romance]</td>\n",
" <td>3.28</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.38</td>\n",
" </tr>\n",
" <tr>\n",
@ -957,7 +949,6 @@
" <td>[Drama]</td>\n",
" <td>3.37</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.82</td>\n",
" </tr>\n",
" <tr>\n",
@ -970,21 +961,20 @@
" <td>[Drama, War]</td>\n",
" <td>3.43</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.82</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>27659 rows × 10 columns</p>\n",
"<p>27659 rows × 9 columns</p>\n",
"</div>"
]
},
"execution_count": 47,
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 47
"execution_count": 52
}
],
"metadata": {