remove duplicate columns
This commit is contained in:
parent
7da7a5288e
commit
e6bb2e6970
55320
datasets/test_all.csv
55320
datasets/test_all.csv
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
146356
datasets/train_all.csv
146356
datasets/train_all.csv
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -438,8 +438,8 @@
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-09T22:45:09.915862Z",
|
||||
"start_time": "2024-12-09T22:45:09.344595Z"
|
||||
"end_time": "2024-12-11T11:04:32.648126Z",
|
||||
"start_time": "2024-12-11T11:04:31.503628Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -449,13 +449,40 @@
|
||||
],
|
||||
"id": "f7b5130c72ad35af",
|
||||
"outputs": [],
|
||||
"execution_count": 43
|
||||
"execution_count": 53
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# train_data = train_data.drop(columns=['similarUsers_y'])\n",
|
||||
"# test_data = test_data.drop(columns=['similarUsers_y'])"
|
||||
],
|
||||
"id": "4cd347cc3bfd35aa",
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-09T22:45:09.942641Z",
|
||||
"start_time": "2024-12-09T22:45:09.916835Z"
|
||||
"end_time": "2024-12-11T11:03:13.267007Z",
|
||||
"start_time": "2024-12-11T11:03:13.258841Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# train_data.rename(columns={'similarUsers_x': 'similarUsers'}, inplace=True)\n",
|
||||
"# test_data.rename(columns={'similarUsers_x': 'similarUsers'}, inplace=True)"
|
||||
],
|
||||
"id": "aa8a10762dd70a4d",
|
||||
"outputs": [],
|
||||
"execution_count": 50
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-11T11:04:43.594264Z",
|
||||
"start_time": "2024-12-11T11:04:43.495639Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -469,13 +496,13 @@
|
||||
],
|
||||
"id": "20dba13e7a3d105b",
|
||||
"outputs": [],
|
||||
"execution_count": 44
|
||||
"execution_count": 54
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-09T22:45:09.968052Z",
|
||||
"start_time": "2024-12-09T22:45:09.943175Z"
|
||||
"end_time": "2024-12-11T11:04:45.194584Z",
|
||||
"start_time": "2024-12-11T11:04:45.132035Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -485,13 +512,13 @@
|
||||
],
|
||||
"id": "be9f6106c5e4b04a",
|
||||
"outputs": [],
|
||||
"execution_count": 45
|
||||
"execution_count": 55
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-09T22:49:01.766906Z",
|
||||
"start_time": "2024-12-09T22:49:01.744809Z"
|
||||
"end_time": "2024-12-11T11:03:47.746798Z",
|
||||
"start_time": "2024-12-11T11:03:47.714278Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -527,20 +554,20 @@
|
||||
"73175 [Action, Sci-Fi] 3.63 \n",
|
||||
"73176 [Action, Crime, Drama, Thriller] 3.71 \n",
|
||||
"\n",
|
||||
" similarUsers_x similarUsers_y popularity \n",
|
||||
"0 4.50 4.50 33.81 \n",
|
||||
"1 4.00 4.00 8.81 \n",
|
||||
"2 3.00 3.00 16.19 \n",
|
||||
"3 3.88 3.88 33.20 \n",
|
||||
"4 4.75 4.75 32.38 \n",
|
||||
"... ... ... ... \n",
|
||||
"73172 NaN NaN 0.82 \n",
|
||||
"73173 5.00 5.00 1.02 \n",
|
||||
"73174 NaN NaN 2.66 \n",
|
||||
"73175 4.50 4.50 4.30 \n",
|
||||
"73176 NaN NaN 0.41 \n",
|
||||
" similarUsers popularity \n",
|
||||
"0 4.50 33.81 \n",
|
||||
"1 4.00 8.81 \n",
|
||||
"2 3.00 16.19 \n",
|
||||
"3 3.88 33.20 \n",
|
||||
"4 4.75 32.38 \n",
|
||||
"... ... ... \n",
|
||||
"73172 NaN 0.82 \n",
|
||||
"73173 5.00 1.02 \n",
|
||||
"73174 NaN 2.66 \n",
|
||||
"73175 4.50 4.30 \n",
|
||||
"73176 NaN 0.41 \n",
|
||||
"\n",
|
||||
"[73177 rows x 10 columns]"
|
||||
"[73177 rows x 9 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
@ -568,8 +595,7 @@
|
||||
" <th>title</th>\n",
|
||||
" <th>genres</th>\n",
|
||||
" <th>genreMatch</th>\n",
|
||||
" <th>similarUsers_x</th>\n",
|
||||
" <th>similarUsers_y</th>\n",
|
||||
" <th>similarUsers</th>\n",
|
||||
" <th>popularity</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
@ -584,7 +610,6 @@
|
||||
" <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n",
|
||||
" <td>4.44</td>\n",
|
||||
" <td>4.50</td>\n",
|
||||
" <td>4.50</td>\n",
|
||||
" <td>33.81</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -597,7 +622,6 @@
|
||||
" <td>[Comedy, Romance]</td>\n",
|
||||
" <td>4.29</td>\n",
|
||||
" <td>4.00</td>\n",
|
||||
" <td>4.00</td>\n",
|
||||
" <td>8.81</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -610,7 +634,6 @@
|
||||
" <td>[Action, Crime, Thriller]</td>\n",
|
||||
" <td>4.27</td>\n",
|
||||
" <td>3.00</td>\n",
|
||||
" <td>3.00</td>\n",
|
||||
" <td>16.19</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -623,7 +646,6 @@
|
||||
" <td>[Mystery, Thriller]</td>\n",
|
||||
" <td>4.16</td>\n",
|
||||
" <td>3.88</td>\n",
|
||||
" <td>3.88</td>\n",
|
||||
" <td>33.20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -636,7 +658,6 @@
|
||||
" <td>[Crime, Mystery, Thriller]</td>\n",
|
||||
" <td>4.22</td>\n",
|
||||
" <td>4.75</td>\n",
|
||||
" <td>4.75</td>\n",
|
||||
" <td>32.38</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -650,7 +671,6 @@
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>73172</th>\n",
|
||||
@ -662,7 +682,6 @@
|
||||
" <td>[Drama, Horror, Thriller]</td>\n",
|
||||
" <td>3.65</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.82</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -675,7 +694,6 @@
|
||||
" <td>[Action, Crime, Thriller]</td>\n",
|
||||
" <td>3.66</td>\n",
|
||||
" <td>5.00</td>\n",
|
||||
" <td>5.00</td>\n",
|
||||
" <td>1.02</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -688,7 +706,6 @@
|
||||
" <td>[Horror]</td>\n",
|
||||
" <td>3.51</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2.66</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -701,7 +718,6 @@
|
||||
" <td>[Action, Sci-Fi]</td>\n",
|
||||
" <td>3.63</td>\n",
|
||||
" <td>4.50</td>\n",
|
||||
" <td>4.50</td>\n",
|
||||
" <td>4.30</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -714,27 +730,26 @@
|
||||
" <td>[Action, Crime, Drama, Thriller]</td>\n",
|
||||
" <td>3.71</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.41</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>73177 rows × 10 columns</p>\n",
|
||||
"<p>73177 rows × 9 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 46
|
||||
"execution_count": 51
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-09T22:49:14.765827Z",
|
||||
"start_time": "2024-12-09T22:49:14.748582Z"
|
||||
"end_time": "2024-12-11T11:04:01.183022Z",
|
||||
"start_time": "2024-12-11T11:04:01.159989Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -770,33 +785,20 @@
|
||||
"27657 Return of Martin Guerre, The (Retour de Martin... \n",
|
||||
"27658 Tin Drum, The (Blechtrommel, Die) (1979) \n",
|
||||
"\n",
|
||||
" genres genreMatch similarUsers_x \\\n",
|
||||
"0 [Drama] 0.75 2.00 \n",
|
||||
"1 [Drama, War] 0.62 3.67 \n",
|
||||
"2 [Action, Crime, Drama, War] 1.33 3.00 \n",
|
||||
"3 [Action, Adventure, Comedy, War] 1.95 1.50 \n",
|
||||
"4 [Adventure, Animation, Comedy] 1.41 4.50 \n",
|
||||
"... ... ... ... \n",
|
||||
"27654 [Comedy, Drama, Romance] 3.28 NaN \n",
|
||||
"27655 [Drama] 3.37 NaN \n",
|
||||
"27656 [Drama, Romance] 3.28 NaN \n",
|
||||
"27657 [Drama] 3.37 NaN \n",
|
||||
"27658 [Drama, War] 3.43 NaN \n",
|
||||
" genres genreMatch similarUsers popularity \n",
|
||||
"0 [Drama] 0.75 2.00 5.74 \n",
|
||||
"1 [Drama, War] 0.62 3.67 31.97 \n",
|
||||
"2 [Action, Crime, Drama, War] 1.33 3.00 5.74 \n",
|
||||
"3 [Action, Adventure, Comedy, War] 1.95 1.50 3.28 \n",
|
||||
"4 [Adventure, Animation, Comedy] 1.41 4.50 5.74 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"27654 [Comedy, Drama, Romance] 3.28 NaN 4.10 \n",
|
||||
"27655 [Drama] 3.37 NaN 1.64 \n",
|
||||
"27656 [Drama, Romance] 3.28 NaN 7.38 \n",
|
||||
"27657 [Drama] 3.37 NaN 0.82 \n",
|
||||
"27658 [Drama, War] 3.43 NaN 0.82 \n",
|
||||
"\n",
|
||||
" similarUsers_y popularity \n",
|
||||
"0 2.00 5.74 \n",
|
||||
"1 3.67 31.97 \n",
|
||||
"2 3.00 5.74 \n",
|
||||
"3 1.50 3.28 \n",
|
||||
"4 4.50 5.74 \n",
|
||||
"... ... ... \n",
|
||||
"27654 NaN 4.10 \n",
|
||||
"27655 NaN 1.64 \n",
|
||||
"27656 NaN 7.38 \n",
|
||||
"27657 NaN 0.82 \n",
|
||||
"27658 NaN 0.82 \n",
|
||||
"\n",
|
||||
"[27659 rows x 10 columns]"
|
||||
"[27659 rows x 9 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
@ -824,8 +826,7 @@
|
||||
" <th>title</th>\n",
|
||||
" <th>genres</th>\n",
|
||||
" <th>genreMatch</th>\n",
|
||||
" <th>similarUsers_x</th>\n",
|
||||
" <th>similarUsers_y</th>\n",
|
||||
" <th>similarUsers</th>\n",
|
||||
" <th>popularity</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
@ -840,7 +841,6 @@
|
||||
" <td>[Drama]</td>\n",
|
||||
" <td>0.75</td>\n",
|
||||
" <td>2.00</td>\n",
|
||||
" <td>2.00</td>\n",
|
||||
" <td>5.74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -853,7 +853,6 @@
|
||||
" <td>[Drama, War]</td>\n",
|
||||
" <td>0.62</td>\n",
|
||||
" <td>3.67</td>\n",
|
||||
" <td>3.67</td>\n",
|
||||
" <td>31.97</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -866,7 +865,6 @@
|
||||
" <td>[Action, Crime, Drama, War]</td>\n",
|
||||
" <td>1.33</td>\n",
|
||||
" <td>3.00</td>\n",
|
||||
" <td>3.00</td>\n",
|
||||
" <td>5.74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -879,7 +877,6 @@
|
||||
" <td>[Action, Adventure, Comedy, War]</td>\n",
|
||||
" <td>1.95</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>3.28</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -892,7 +889,6 @@
|
||||
" <td>[Adventure, Animation, Comedy]</td>\n",
|
||||
" <td>1.41</td>\n",
|
||||
" <td>4.50</td>\n",
|
||||
" <td>4.50</td>\n",
|
||||
" <td>5.74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -906,7 +902,6 @@
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>27654</th>\n",
|
||||
@ -918,7 +913,6 @@
|
||||
" <td>[Comedy, Drama, Romance]</td>\n",
|
||||
" <td>3.28</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>4.10</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -931,7 +925,6 @@
|
||||
" <td>[Drama]</td>\n",
|
||||
" <td>3.37</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.64</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -944,7 +937,6 @@
|
||||
" <td>[Drama, Romance]</td>\n",
|
||||
" <td>3.28</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>7.38</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -957,7 +949,6 @@
|
||||
" <td>[Drama]</td>\n",
|
||||
" <td>3.37</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.82</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@ -970,21 +961,20 @@
|
||||
" <td>[Drama, War]</td>\n",
|
||||
" <td>3.43</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.82</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>27659 rows × 10 columns</p>\n",
|
||||
"<p>27659 rows × 9 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 47
|
||||
"execution_count": 52
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
Reference in New Issue
Block a user