From 6a24c1a4bc3e5b895432d7c82b3fff06d3fa71ee Mon Sep 17 00:00:00 2001
From: Mateusz <matpiw1@st.amu.edu.pl>
Date: Sun, 9 Jun 2024 16:41:14 +0200
Subject: [PATCH] init

---
 .gitignore      |    3 +
 464913.docx     |  Bin 0 -> 17385 bytes
 6_Projekt.ipynb | 1576 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1579 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 464913.docx
 create mode 100644 6_Projekt.ipynb
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..accd1fd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+twitter_training.csv
+twitter_validation.csv
+twitter-entity-sentiment-analysis.zip
\ No newline at end of file
diff --git a/464913.docx b/464913.docx
new file mode 100644
index 0000000000000000000000000000000000000000..665fe82a0a4e05608855f0974357a32ea19e9cb4
GIT binary patch
literal 17385
zcmeHv1AAr5wsvgW>Daby+eyc^(P787Z9D1Mww;b`J74y}J^So)zF%<fT+gg$t(x_Y
zS!>OARE;re)RLD120;M;1AqVk03ZNhjI~uW0R#Xj1O)&<27mz460)^%GPZHjQ+BsA
zcGRJDv$pz{2LeQ%0|4~Z{{N2u#lJv9!k9%LJ%Y$%;77nr)3RhMd13!ZUbHc`*$ZG8
zv#jvFV1D~Y=Po-#Au$8<$l$lBb<ZgXhJcyXG%HBNMpu$kbe<%?cwKWgs+FZZ8XqG}
z((MHLO@R?sUn}PJ4oqoQKzgFazPJe`D7lRMi~v;iCx9r*#AsDw&OuCgLGN(Pt5@Ai
zKdDQkUkSyl0kGh7@}YqeE2nARY{ZB#b!RQ+74?#&g7lhU!p@yeCvuXVQsm+-=RGZO
zZep+z_|hd*L%=Sr@U3Yjs78i}b8R*A18TgKRqfj9ifF~waCR?cvq+jh&e8IceZ^gE
zWM)(=rfsHt68dP)PzWjL;Ab^m1;4LibdMo3cWzwAAVF3E);L}WtM}a9TA*}|a|uw6
zW`>mdjTts9W+H?x0x<vnSi#XsSBjnIh~oWLnfMf4`{<dkddO_&n{s!ownQ3tzj7<k
z5r<L&yyDen6wLR<tj|wi0QrAYPrO*H=F6`uvR~Q<{iU9I4#rlFbhLkz|EHG!i*@l&
zU%eu(=SwYN_%8$A17<oDR(mk=W$6s3*DzMVA+#i<k=K?jmOkFOmzRNbj`hXHX6NFk
zJsdMcowpKnuCS6+V8c2f7oT<ev|iiY01^Y+3mM!N?RH?&4<1b3M~NgTMgzh%P{XFM
zz+#_5QYN~Q_bG)R_KTs-2`Q%L4T<aXv+Pxt9#g!Ta#9%<7A?fJJi*I&L^^|TTPHES
zp-lK{lbL8@qI);%bo!M~;(swlT2S0mV_DJDkz~XgR6t=}wa=V*<aZShga%SW=0b+j
zy?U8wBWZpdh8<w?v0f5rH0W*HVrS?G+<r|`|EVANU^LW3@BjdnssI3pUq6bgt%DJr
zk*%S#_17Hs$K-XUy%v=&j@7AW{u}Uqp<;gHz{*U<17z8#PBKrGlPUJWL^;T!Z8b&E
zcXz8?p};Jg0PEc1H$ijDs#Di6_sGXU57=2zPq0y8s<6db-qR_c_op8C+dKTF3Em@G
zFk<dUJo0u1-*(NVeDc#S85v(k(^La8ZCLCjVf4$>h=hNBJU;E`OY$W;I<4~o)7cPC
z%vFB4$QCz8YQMeR%&EvE$dtSjFOjS`kq2@%m(yZC8(V)*2Po`D)-bf!3k5`JKt9A^
zeat7C2pI>*p$?aYO$gc*JrVh11|*c>n~jXnk+ksfF^_e8%r9Vq7<m*{35iUJQl7yD
zFamcn!R`ex5@R+=t)EJ+`NTGgCzqlUs|5w;qUL>>Ch2gpz-N6E*jSVDFnWFVwn4qe
z-0Z-;fMFvcZp5;_q}6Cf2-n42b|7+<<m1H6Oo86EqB>_1HUfdX9I3${A|G!nC+>qU
zA`f2P8a#d-E`7ar9`Ie^9ptoGuxm%Oo?bD)Qe}<0Aw1JW_xX^Jsz>x|iCF6yD8iep
z6kF%xR4+6@xyB_tAl3onFqZz-%Tv~Dw~UkN6i{B!mW)W{WUDpuxb<5`V0i6#Ze$Ay
z2~-Ur+5W!21o)lp#*7rErya0(OmR0N;LI^qK{LlGBj7ZsRLw&DoJ}M_*4@dF@|3T7
zXwR;ZDu*LCB-k7RBR$SEQ1T(eR0aCTcWi(tsT@Ws3NF95>&f%L9&@WJ5xpvSHU%w#
zQaN!>j|SAkV9a=rYs<+4lyEuHW<~zM%8LcZ5h$@opwYGfd_d2L(wU8Z(pjCfn&AEA
z7?ry?LNaeobV7QD)eZYh$WPz_X0w@0ywZfaO>7-Vjb5)Rfxs-IntP<B%`bD*>EQsh
z6WY$RrzQjJ6>y;2?tU1?q7hU!EXrwThgy_;%6{#;9o`HvJ9O`b62II>#^XXa-ujdE
zeix!OXP97*#SCUIGgk}i!|VxBANV`UfT*zi^8vlB;Uiha?jl(xIW0?iKgr56+`XB4
zP2d5;L|Q%o$9Mmo!PIWkb0wS?k^GYVAIT!&d1y3f@%i(MV})mxD7|1ZEbgdD2~Xfk
zLpy|->;-pzso(k$;ePMLOZAE2X%N!-XJpp&!&Z&Y_&$MfAlr;EK-r6u-B-oTrt1&0
zN5=z@ivT{fpJGrpuMd38yOf=eEkPp<u~!_^z1|MZ_Fr~+3s<h29ko5z3#^v<=akum
zh2_#$P!pK(eX5DbE7eImQu(W1Gzd=PGZ@S{=7Rbob@e!%9Rw7WbGNYKT6%Xo<v=}6
zwH9a6xLl4vDnXAiXzW_<1k4B3E%Xb{wAU4p+h{Ubdg3TEb$pZ(9PGDyxX|!<jzF8L
zaaW%T;sSjrZwQsGW#yG|m~n9qr{x?4DVJ9(>%<ce#8Do_fP2j-3kIKLP9DjJMU5j;
zpV|95HM?O=X$?;|)9A!h3aEgI)VoQkmP0g`!^l{G*<4!<(6LEg{8pk3%T?Aj$o@{N
zGnJC<F!wZ8Oz#&Z)&A|`4J~^y{AXhD5~VLVm`f7mAtAo_Ecs^fUJ1O~-c}W4Nj=Cq
z4cKJ$5o2h4V0MNB-8DubJ~m6=ql(gmhVj(+E`oawxGnFFE>gR8OdGtE{J1;xp@KW&
z0XlHf8lF_N9vuMK@DPm~I&7P5*>Sh=rpn0tM326I^T;)3@|Nf1&(^Uv_;k4C8O$Ui
z!9Mqs8F_&vbK|h*_a&cZi~_S&@<MzJ4%f#3km)`{J>5QPznaqC4~7L5!6+HExDa8j
zh1U1f-#+i?heWhj^5bZ003)lbj}BpjhVb<yKjC)mf4lc*>o#B(3XE9s5VaoNqlFJV
zr$ypv;fLAb+}i|nZYDd!KJJX4n2uB0I}DOKlsZq!$5+vQE`q7NB<0+O^<3p=#69`x
zlaF32&7xW_8esCHD|kIJTp=g*Cb8@0Amqd=-`d=o6|Z}x3M=h3V>OzzF;bP~NqC?~
z14hLkzkBX^KQ{~`QwtHhO4Nw?;o9l(qnhm>!0{)<+x<Qt$|Wp&2S1(lxMWpX8Ld>!
zYhcA0e)A1>*z5Wn*^eUVnH|`4fR4+t7)r$MH%HYhv%m({H9Y(HBtAp07_^M9BhkXZ
zs<Xh4ea$FjbC)<lKtm!ZSu#b0s`caFsGv`*n@!Me*upk}{Gq#e!qP-jmS$Y7bGW7H
zBb)aG<%SVVo?`Soojp~T)$e(V`hH`K{_Am^W)(_lAKnV%92GCXb(3&1-w=b;KFRxZ
zt9)sIW26jt3F3%z%F@Fy;tf}ZLM(tiwXi&#0Tz+1>ocRj0d(8qtqG%&rapa^cX<wb
zj=mI~9o{wDdM`B!%G29qRjyhqx>-M)boG<pHxp6EO2R5SIcUX=0B?#1g;Z*9ee;Kv
zh>^}C%P7sosRi9Mvw;m5N1WT;M{}Ex1IQ@B_XInJeUPj}t)ANNX00p@W!>Flb)09X
zFtm%(u*z9<`XZ1RWv-pL^iDd3Q_quD?@-SdqCIWVkzB9Zz>ch-t*_GDVC}^4=d^aS
z)-MTj)uHlOce6M0D6!aewQSQ8Fd`Tag+Q>B1iYYIy#%SY;~Jc01eJUQL#%>YsnPi*
z_0)$-JMjU>6`-ULt(D@MxX3TSrM8a)_aD7z^~nsm4u|RJ(6(krVSI+cS`JgydR=D1
zdUNv5eHP3Y*j3-u$>w`V@Nn)NUdO+4siaWJFumCl+TM|DjJurpac9-d7C@igl8Zve
z$&G$%IV<Js4do67=G+(+a>P|dGnNzKqkjj(&)9aG0xc{PfFw?CRcQmUht5=$F7OS5
zANq)R>SA~5T$)u=gY0(<VMHrmci^)n--i&G|D|>I!W?$pkj86}GVB12BFZ4$JmiX>
zJ7m6!4ro-uc)05>{)3sQI0Jifs6quvj)T)b6@%&;{pjHW*p=zAxB^IM4nuRfxFbS7
zbXD!?S<dt5(_SR=;D*R1c75~n0AX6%kemXjS~4)&NOGCU4w|PK*O?15g}NDBC=R=_
zG3Q=;U1bpXIf(XgVf))*DDHDwuB(`Hv_*T4RT(>#jBBcRP`C*KRyg_7&GAP^y|h=h
zG-OsSD)(8uiO;Azd0HJ|odNYn#vwNP)oO@-@q?%~N-(+Xvehwyz-l+k6@us!I`{xa
zf+)`3dvDZnEy-yN*!@JEP9@lNID1la+MSZ$y%Ev7i?C`G=d#=l&8!dw<1Q@6B#ucv
zxRR`)#0()@q14ayqYEOf%uV>LUm;f1q)xJl2%3+Vb1a_+k!`u(uIpl`1kBH|*2?Q0
zTs1CFO>R-lrg+Y8Ad-ntxZ-&-azWdI4Ot1tTBC7NmYf9GQ(3A3R}J(ik|vc~DjC)$
z5JhHcs9#f{w>OfngQ52(2w+vpW6puDSbqm=oLsBjG-KQGb7VM5<*cVElJl)#+cAq=
z8MO-8$9#;Kj+sN(=1EJH6KQh!pX?^!Q<87j3oUE;U<Vt+5yH!hWi<yd!%s<_D$M#r
zYq5vpVA&W2p3zDhBB=r=o=wQF$sBTqK$pl}@QF$F514@{xi|daBExfkCc$^!Q`y;J
z;p+HgCT@wbdxc%s^c7>g9KWmKeSm7tR^p_=J1{0XcM-~Y^ASO};sEXRApdOYG*__Y
zHLSKndkhb12$%C=j-B$SdVO$wsDxN#yp2$Ku!m90iM^&YYfoTsOE7Vt5M%Jt7U)r7
zPmTsU3dqeJJ!6*zmTi{}=at&MHwYgY`8WXLYGF}4PzAI9o$FuNZU}1xtArjnG`I<v
zzvy)!o2JU9g+ph2{{3dSh`A=n;fMZp%uU0_3WE!OuVINWMo9a&m3yY|V)WN4fCFM|
zj(W_}0<&;Fq}6Sydd{*4m8fx(PpcHd^B_9-MW!0jR(qADC4@+AMwzK^k<;Kxm;)$*
zVqUlG)5UOietDsdwV*C9z4VV9*e$trG(!+g3(?@Bf-+47xjK&!8hDbN)eR`NYfYy?
zZg1{g9;BxjJL2s*6iuVE-tOp_?OAME_;ltJQK}oB>|yMLw(0E?gzdypW^h)i=;gLF
z7q*!vEft2{(cf?D3@{96k{bL*9<{PfxP9xLeepD~N`%g9o3o;TdGZQjNQBCY>2&bm
zMBF=;o<mxjtb8tFIy;|Je?~-SM&aWmL<<gpbE(^*tjh-_m6o27tBp7+8=Ap6$EtK#
zbCc->c~Vca!mTEis;PPx<nI>dl@#T{T8v<+ZvnamfhWB_?Lg!i9p{Z4{35Z>u^xV$
zQ>*L@#T?k!yNDqpwgvzV(T&$aa2{x?Xwqn}T!-DMky@<Hi>$17)KXK<_m=;p+J{tW
zCf2Gfm1=N9onLN}97fkoUr^y{cX)G4B9=*rSY0K*vIr^KZ2C?wtRZ1ovVypG(t5|x
zk0|3vPrtXiZ3=PYXH{hW5wmlKQKZw4S)%&8=47FiAVjFg;?&i8pwy-b57ZgOu*4>V
zANBjix78Ol6f!YfmytzHOHFhUoq8o{bN;lB2k2H?xAtqu$f5;c$W(df2bD;4>ft$+
z2UMN<pfpXM!*tWkr0<O;e22#*t<IOfr!M(+wQNFdE_v%7&FeRO+BsR0i7?k{A7;*(
z-)=_yE}oS$5{h=79@+z)@?cT9#MR?%azc{|RMnvCeAWu6jMt<S&Ks$Q54XQ*`UloI
z|G<iEgE#!V=zD*)B5Qw+5IQJ@1+I`q<`~I(Jub|3NtbjoeDhtu__6t1?%)}dKe54K
zbEDY1V<9bcQ2g3rcrZIQG3{*j@ZpLw4eGp7`fVJPK=T)~Z#5CxMfmF@xxw1J`y1P(
z8dV<i7Pg+C*7dY%!5B#zB8YTdjspg;;iztjBdN9v#KTyz2)6IGvH5oxCFP27JLN#O
zLKfAef@L6jl4r%Lxp$VcHunAtR4hZLezP=Auy3CI4*|tzkKi}&-<O02#uipQ^|bpp
zSMl8UKA1ok+7iMc@L2F=OL4H)Xx)BhKAjM>f$Um7%%M?_KzoqxgYcH8C47p@(tG<U
zx)uXAj5!1~7;%R#>4`B%9b3I`^syDTA&a|VKt1qglYU}L?#?b<XLQ`X7O(?f$Vb0d
zh+dFa$gQ3!2A^Lsto9G75xe+GNyy-zpb{40*Jsn5R{EGe-1vO?%m3+7*LQ~Ff`0kh
zO~3#EH~<iUfAgsS9(MY(U;WPz6yR56_^b8*?xP}p;*ZGa&&$BhfMrh_E^8skSVsy!
zZLLQ@BhPoCd&2@5&8{CJl1RZ|a{*CI2Bw{X3m&(Xs9ZU>9V|d~#GpY%b<Vb$fcOIs
z+n_0A<`$(D0)Rvw%X^Qmr)<h`y)l<BJ@qjL?Y>cF>h>4qaE*7NxYIwA2RUgd#FuUa
zRI}?uI>qNmT<2hhs~K0S(6(IeiE0dRuJ~4(%wM4NtQ(iijzmEp#Zg#q36xsVB)Uf=
z1ktY6Ai_%^m?<-WaPGB#pw(}lIfv3(c@eN{IU<auAfT9B1KA~XnXnFB;l$_G20>|*
zK@P2?1-OMlUI*^KX2)lwkRf>AuifDPR~ii=g?V0ottoh50RW(WwfGy2oXm`^jp_bY
zGW;z#qtO$!#){B^e#8gS%9Ug-(S&{t)?`IyT{lA5CJ{?SGlOO*$xLqH8u=}G4?65&
zQ^-#@tN}DFFWBmpX$?)rbC@onfI7UvoY=aFAI=guiQS9ID+3db;`Al59z=^Q#w4VG
zr_Fw%mL?^LSn#;Ql#d;6iL)t@Ax0D+9_8VTWc&>K72n4R_pBC!bb9B-yb0XHWqjG7
zT6ap27Hoy3&X?jc9Vrc9Fx^Qtnw)*#1zFpcGfGbmK+>8D6q3!8fB9x!)0e}ay7@w;
znEdC1H^(r<ZFYjOAcBDhagZ<7givXInec7UFQYN)?s-nYU#7SEL=GM>Qz_Q^F$|kC
z^|;Tqr>{$=mL%gYN0c6h&l<;-D0%KmCo?NXl}D{vr;keyBmni+>m@ulVjrtnAinf_
z$h5zXSl1@Hb@Wl??Ja9~tl+g^VXK#It+!vZo-EuTDQz}r-rR7Md6oGGDw_iz!8Us2
zo9P4wD!T%0x!!Zv7O#*{3=<}sr=t#={T-SthZDB6MBSDght3b6baPK6<4|8H<`k>s
z5W5){kgigu87`A+L17Bfv}m_bep+J4og^5wA2VnA?Q+p}CFx;`*MR%wsf6@eNfDc=
zYlzSI-=E2~sA&1MbNE~zyosGoJj0_;>FRR7KJPr=hMWGzMzQsCDx&<jpBwAy;$Q=2
zLVbE!IP7}A3B(*$-uem>JRX);f4E(<EV;d(Cxb=uf+DNr+a>fcW=y;G;0^(l{_>rM
z06eATX7LrHUhH;F1^@=O0dN-MDQJgF@nyRm!Wzp@Ie@|RZQd0Ih7-ub>t$vY7?6j$
zW0x~Ms5P(Jxd65^z~WqnI3U-%@)9MD7l2_&3Qie9AOAiYUU5X^2mxymjWE0J;VUS~
z$Y{JsWCRPq=&^?((=cC}V3YeZ*w~zKIKT4wU@Mp9+p>{2gq#!c)DK4>fr^%E(qT;T
zhzJ3Hl|9^U1LTo>5?GdC(}<fH0@6ite+`q_7O`ZkjWypTBEx0)E5@1FgxnTLcW{Kf
zeVkNq25~ge$&kZ5|99eaWPd+r@swDQ8T1gdTXtV3(_i>EkuN%nx4_JEA<E_i9u5|o
z5HUyLcA>=pML-(A$>$&?t1TW>COXSj)L352Onkao+9>C4oOH8#zT%dAF}8fbvWSCX
zR-xOO(u>a#Q;b-SxmMyE_lDhs2hHUQEb9?K?zRfOyh7BxVLkrQi!nswb&1CsCN$!M
zk5Ck>t7y3Fa&Y3x;@FWlBk@!Ksw23zG>5N9R0ZB`9{rl>$(5;IgtU+l8$LpUn9^i3
zj4@`(z0EN(Ol>G=I>6yk^j-0R?G<^w$+@8YJ{VbA^gHI@^bx`tQ16Vb$#};)lk>$J
z%+l3VzjS2_Sdb)JkBJkQBLqO$29j}3ZBP{>v+M>~BV7(ZLmEp+up0r3J;pfqEvxqJ
znF!SH&M5eZZq|%?&W-cDfVibRU4~VoKIjsS1(;YtG_<>o)EGWN7{qBJkg`rKFzht#
zEt%I0qVUOEs&2R4Wp(Gj^&O)GSp^qZ+;sMa)IRd`<Ow*)zK}=BRzgl5|LDfuZ^Z8P
zLmXXRO-Ia=bWqb>Cs!*kEO{qVrBU!jXHY*Uw_u>#y786~OAR(^<)G^5NE<gI^}pL}
zyI4yU4pwA#wfbhT#Jm*!X&bMmuHvj`sFH9op}9n3xlyIF^UekmsL7HdDnrjIUtYl3
zD8-FH-dCyQxP)iF($-_L7zUfitvkB7hz-H$KF;JbELX{4l3D;Y^^`9t|6#7J3`9Xw
zK89KE5a#iKJapP7Fb8cRJA}Qko^(3TsoSM;>Zx1KaDz2zH|;f=*}GG)jcOSEYzV7c
zrQ0C>yf~Irz-_GuEulzugQJ~1dSfh=U)o_sF*&kWd(=$R&vV;v8`V~M(OvDV^=?0b
z3&+`LvjuM<#AgOS)YpCLG$dzzD(t`EO=b74P<+7VpL6gMmGk>}an+`N#dYm{a`>E2
z=Q(Yvy*=q7vHjumzXtD#&?n?RK)%e3uK@o41n(V<ot(^VOdbE29~#tt+wQW$c;nZ6
z5)9wYJ~bon&IaIewQa6$<Q~(1w+*W#7PKCV*(++%|9mB137ac-G2^-;iG7a6r%1~?
zg2%@%cewM#Bx}%(IES|6?sb-hR=h;p<-1UgUvcXPErV8`#iAP&8y~`_^Z7XLBiK=g
zkl!WBDREcc55Lw8Ax)EY?;JwiL2u|`@zoloB;JZWFG4kVv}PJHU-%}#T0p$R?o3UE
z@yP7NV55P1j}jlY7va!~I!m0lMl)jnSxmDm_2xoKF|h`<B<P;Os4WQ7aNs#>H=){*
z2;1F3>i#rCidUw1ifeTWy2c}~nnA6Ll*Md@F~M+n<zJ=<E`HKe`xtSw_X_0osHLp@
z-A2p88L#D&6X+0AAZB-H$$s1;ib6f>HfG0}yC*alCbTi28Pri!zM(qLP8&D)E&jk%
zyB%TR!`bNtSEi5#$t?P*l#qnTdjsqpd7~EVW@0Q^`8&8yJgiU!*Y6+CD(+5=<rG5c
zNWFfHI07V*%1$@vx*=~$Qg0=DjPMSCD=j~(ZX;(coI<e19oRjl>!>Zc69G`TH>yC9
z_))IGk_%7t-9r^N%u(FJR~6TDCp4$Q8t@RTNE=LW+xb{MwacTf(C;`SJ)?eaKjSeZ
zK#woNj{12@SoWBe-b1}`p-LTD!bIuV=C-caXu9GeQ(GwaJY+ldU~BSAaS789(fP>D
z^1C-X$B2rdaB>rYcE+3Qg&T&dA-tt|_1Wuq*|dD?E^+zbBYaW-h*7(G1BKeZSl&Vv
z%?o7ih-bPdRV6XEfiO(aSpZcYZQ~$XqcPd?<=@}ncSjV3r<}^Br`$65LDoex<m1nn
zq4Wqhdc&J5IDIlF-)H++4mW?-?xoL7{!4dVky5s7krB>|?t(D!$alGP_g5~Y{mwUS
z5>aCNod&*XIm9IgCZ=>t&{QTif-K9mp4pJ`KKGq8gx^g-OJ=)VHc*aXfRGjR;^QHP
zm5jUI{{1?ngE&8{(AR#*d4!Fh+u>D5grI|ISO|NK={zLf;W9Q?q%zpvChktY1l_<u
z{>Ya(bMrR1Lk_;+zLpk80>52OIW@KtLK+|TS<td*(KQ*DJrRJ4Xp#NF1s&-e1D4dt
z*f}@uLNkS-)#UUhZ*1zp$)rLI-d8PMZM2K4k~x5+f-O3IBpMZ^R9dV52wBR~*<)9{
z<7l*D!_1!Q1W+xnQI!K~#-FH#iu#j{VzbQIJz&{p>zpM;0jhcL5IMQskl{6Prs;SZ
zSV(3`zA^0Fy@RB5fTgq{qdC2?sZ1Y=Jz%x<>84vbE9ee~5D->UM|w$|g|l!iA-;O4
zjzzy;WE@g6;_`T}d5!kdePpq&dK-J5S;uqz>}=o{aa%`CVXAL25HII^pUL4}N<MvY
z-#qZ>zKKAa9t@+{*yNoyjU#w!vGL}&+s1KxBflh2MKD8f6Z-OXq2fg;ir=$!1>o9V
zSXZWbw2)rWSQ##Tu*x{+Any%*V(^IGKLsGsg}a-zg~FhCMq5S{7B(!CkHodEh1Coc
zTii##k&S)Wr13JSs(k8DI~<KdDj&V5TL)<z=Rvm<<6Ja?4N?lE4tgu3oQr@?RPQk#
zIXn1(4o#j!*PfT#OP7?xZ@$uOl7{T&4K<YhPW{LX%;j?2v5Rxp;JjvJOYaZ;I&IEc
z>@c5YEk5V6wca&6pXvUqLCu9HlsWz7lktC9(@6g`sBN6B4U8SWhRZ)j%kktjn_X6f
zpiAgGT#ya2`H?17b<<dxG(wR!@)`;c8L0-5pWuE4XHPDAMbXnkt7=E`WNQj-F?$X7
zk>>GC*`j<Leb=w>bQv64dCZ9h$1h7+rwFm55n&Uv@8`$clrFf|HN*7r{?r>hGZ(?>
zmtCG+Qo<u@!hX5HFuCzMv9ufODTD&z#T?t}dX8z7rTeSe#c@O}x^oFRnaY?@+`r2^
zQ6-CnEw|S7YX?@X?T*LjHH)s`GHB=p&7%?t=)i;cuo#6d<=4SMk@>HJ{C<lC<|{ZF
z<)8TFNqh?_6HAjJdY>4ZD30~#HD`!Kd62kfR=i}t{s2q{MXdRi6lMs_w*Y2eku|mh
z)eXw(B!bD}`Q76dEuhpb3jxv183fNH9}>FXSX4cw5v9dRiX?9Ev+TAxm#oo;$qDZ#
ztS2@11uh|7konw88Nuqp#UmpP2V#*TEJZKB7=@E;copYWDYN>x*6@QCkQImJxl14)
z{lZ6S!^YB@9RM6;*9zC<$#9p<T?b&BIb!Be41#R-ifHdP2tFRSL>iI(yWc=DC<sqC
zHwiFZcoHHTOaflrn!{0dg9XQZl{y>`#3_7*Uv&TnkkLN5FhZ04a!)IMt&>Te7f#g%
zk<pLZ9*#wwS#?6pNB?@b=0h<L^z_@f6ETvp9sc~o89exXXH%|B#0pZ5Xy_nbx|Asn
z+MJUcX7_c7(yK55;TQ`zqj05j-}j$Froksv?xhFkx@&YA$IE5=oBa7fYOCZKX>ug4
zvfezs=D|VmDWQbZ`xIOnk50{&Li1uGSM@BlD7oizaWp|x-O=s%Xz-8Cv~^VNMnNCa
zO+R_gp=Ve3UK8_H=v*RK8dn#u{2S4>H-^^eqjo(4t<Ql1bKxi%Km%SE!{1U5-MvwT
zPz}+A&%v#afG<haNDTX(`pJF;tQJ=5eViTmNUmv!E=Iavm)8NKKCqnM&FX-t5ObiV
zml41nW;Kdr)>;ICQ$#fFoFw#eytN-vjZiLWO`jY2Ljjl?MGB;XsVc~$E?bhkWg+Ra
z06$`ChiMtmOsa}TEeu5GnfI=bC@SP-$Qij7OqQ=GKf(@`qTMN{cXU|jb-gjDHg{*s
z&7Hf}G<}ZT#aa|Q?t+xT&zSqaLjy(1{`wa6c^MH-%ec!Q96tGrvx}B)H_y%h(&scH
zUC(TAqCK?m?VQoZJ{FTrOS)p=Q(<rbJ<a~s%m9*)Z+BY4b*;Tb>Bp+-5~=nAGns})
z<Gc&wRZiWYKFYk#g@o&Rr!96@&9p~F+g^Itf7O3@U--{aSO9<<KLCI)d+F~EwWE`}
zmGR$9rL#;a$2E4Op-ZjC-+qO27d^52!5IaEsdbrd=bBvC!jpS2u`z)R5@r&k(a(3#
zzTZI0Aj4n{bR3r@Rj6Z6@bNZ2!MYFWx;lyNuY~I}uJ4?f-JAKK%&=}k3gL_M@gI{t
zFU16OW0(mU<c^Pz5)bnA_}<@bySRPE!<SMH`~V7`hlsgxX~GTe?YSrWNMyu6qN>d(
zU1;5sVg$V=@CG4A@RrIuV#ufzYdFF+t~q&^e`iNAlb#A~hl4^No8na=r=g8{XUwQ0
zxTn#rP*$5w{V<#24p+s5H_r+eC9@xqf{?Nt--GZ)LNnn$4ByE#i>QhfguET;d`alX
z?q-0q0|1zd>D<D&WHF?smVpbm%P^nNkL|H9j{po=B=7IhSAl;POT(_<rpzOOQ+qa}
zww4e`BGyY24zt{{kMxu=@*&$EIt@-RHNu0A3(4UJ5y<Jj@tH}!iGC5+n|E0Oc@|(k
zi<ejH@0)TGShSBFt6tG5>N>GKnIe%3=XU!Lzlo{!RFF|n?G>(7Gc}(SHGn5^w@+f4
zZFQvJYq)^C?OpboBxJ=JL%QV~m?T5Wj)H?Lqd~6&i?n8tz%R2dce}h8tQa{CDe~kq
zahTJ#lM^FeOV5<(tF=*oUNXC|cul8t6A>&LB?$Kq>t{&4kTM%L%Hm}IkpJ=AnMC{T
z)c;fvCFf1ca42U;gcoteSe2BcHksDO?MXaegTN&tN?F5TH~O?K;#!)xZf*u#rD&Nw
zAF|CrL0D-w_&84F?l7*1&jHK2>C>e&E_M^)#+i~AKTBE#rQiK-pQL~)b+vypK|I?l
z5v=y@KFu0R?clTYJMwub>njNfV;6Y5_0KY1DEn_T4*8Ae<M*m-3)S+VxSc8jVzw&N
z*IKB|Kg#`OH_LJ!ZB=4=-{_zX%!y~aUU;a%<X!ElErcDZ4TK!2H3c216$KorCHWnx
z{h|)kcQ7Vue^U4>T}`6}iq_z3=5?gEc}CdnUf?0E|7sHhV7s{Zc{#X|`FrD6+guB_
ztLuZy?uG24n+?YTR8`>ktU=8vS_V1=otgY}@-}lERFdZ1%l3#sw23a(F`$wOD-QyE
zQK0aT-?#*l1sA*S1WTTPdLYmbHIrt%zjl~^<T2>Y_zK5vxr3NAsGBC?*@qrS(3*+P
z#k{~+`N+2*!OS`{J|d)3-E2jQ%nds}stD6b<ieTNdh1HY(!&d6#?bHfl@EqCzuEQO
zZ2ur3=y~RhDkWP~<0B|i?^^<cV)8Q2q3=`s<Bk6Jr*SoO^R(iuS!OxhBJ-@5$_P2V
z_dnjq%yHkR7r|aOd#d(R4y4ydNqic*qBA5HQ-CcQ4&o^ik^=MvWB(Xu{zZi8K0IuJ
zIh;Rxu3kT}>@ewmhqeBSF=r81?cM<u3q50zR2A%M7H}Y?%`z3=ZjnXix6r<g)P_1|
z#39gjQ=gSuc_>nDhFY3zW=gD^jJ&#SK{Rj$u?DdfXWYVu*<i>ce5J;bnRsy*`^?&i
zqgxAW`KzL-K5ps6ydYR}klX7nKB!t(jTw2YZm6d~3UBBwRrDjIo^aQ=A~lXrp7xmJ
znQp{F!l7tU5?1jgI;1@}G4{U2Vq}2P<`IxStI?hwNfcD1kO^`<~lH}An!#98_
zcZI8dY$bfiw;?&s&*E&TtWXiVmy^;)(=0f6ie=DS`r!m(Ja@uSb5g0)WCrEy4;j6~
z45L{5RUOeW%P^>uP<En`vJ5A;>7?QCDZ)>;6j+MQOJc*DUomtF8=iic!LYaYZ}3=f
za?X10Tl-)69?rQ@d}XCobq~SVyfS86L2`%*Yy(%{c9(<<v43qxT5ox=JP?w)PgWGm
zXS#Q$X<d6YyFTs6-MWp}4{$y#`Jqm0!K?Dy>J0BX#~tu$Rk-0{SGaYqoiZ4qO!X0P
z47{oh653A6)9Yn@oi<-5uRMJ<9ENvb6d<;D>K{d=EaWXSDE;@&o$=GF%VX+jz*|&P
zr~|4=lyS8Tij-=0t~Z?N0YPd1cUT6nS+#LYaUWnA&1s_nrnn9dLGRFG;A=bIhJp=a
zF9KWEZF6_TX7B=TQC!aqt7LXP*$;`s%`#FA-qZc<5b{vNNgiCSH5m5MI`{KcS|(?*
zp)Jbw0phR6C&pagFk(ZsI97kt6mfo1ZgIzX=H7>wBtoyXDE2Bd?&MQ!-1yx#<(#&r
za;kXYo!Z)@nc3j9pp3jd)xcx<x4MW^-;-JMwF&7=4flmP<XFZ<)B1QEb`lv6jehE-
zrzT}4D|Or5RjN^RrJvm_D&IZU6p|O2j)F3FGuj%ec%!ivM3$3?f)Hho2F7ST=rq?m
zvtjck8D1NYsE7-csH}^mt<DOf4$!M?b_&i4gM?C|njR0}O=p_$<7h@#rMvrjr+shf
zr;N^$!A`;fv)Mk*1E2lEFyn*wS*Fcd$fo;H)KRAIS%_}xutk)lgp1_mx{=tOtfC7D
zr&bRlU~**+53QDfXR;M21?`GOH+6T^5f5$+(X?P7M1wr|ILaABg1lc#ygUIYjdBP-
zG%DfO6=eDh`CJ~^lIcy6l9(MrWpRPO*MA`hfd1BO!QRG}#TALNASVzC(JWUGM6<-A
zlbye08jnob?D7j?kND?n2!3dk!hbX+&joTM%N2yuEcr*3=BC9Ft+H7(GG&A77vvWN
zs^Z_f_KzF=tv~<SMHMn=a0t}Z-JekBIzOQ(XaxgN{-<w$hy3x8MEUy#@&EKbr0QwD
znA|#fMea(Br&8_gk<Yo1>VBg=aZMQaK}@BwuKwkBxZUKl;k3u@cCM!FT|kZd_%BXj
zYmQ1AhXoSE66EaPw5^v{49CFutp%5X@`uuZZza_v>_raqs5eC<>U+4(BCY0-CYfS7
zD@zR^eOn4@vns6>w33K@i1ZhdOhMQE2AheMu#U?EIvlI?KY7*@Q(=Xcdvu<c>AB&I
z95$PHFIME{US1UWkD$Z?><q}90!i*v5~FhIUpOaZk4|~Ga;ve=K@#{XG1?8!$s2;t
z$=TF|QuTGtX_dJEPc6jl99X)4wPEFD=)fx2R0V}1e${oiuJUlKLCRKvVYsRZCDfM_
zj7}`rk@vM?A=Oj`fmD=(!wsuOw0%{tEFkP3Sg^PJ>k>iF$O6Fbg++*i3yW||3syzk
z%-<jcf4!Mbm8zHmR+XsxGX0QDFO1N-uNB5lT)qBLjV5CSu9_5Bk9{M^50z(}W=+OS
z6SBMetjy`9NEe{dY*m=eS{IbnK9_{_a{KZ^R<$<h1=;RatY*!+KZ+A47MsTTV~cf*
zk&CaQbYkJxf?|64Mtptd*4`f#bAMD6e?dvw{*K>Om9nlHtV~+f=CR{%GFaD%)S_|z
zo4mF!c@~a;$UFF_sJMlH!V6{9zK|||6#syK;Tl{1vmeK*DfIBszG)=HR6ssc;cbM^
zb>I5%WuJ&9%qDlqut2@jJpwHDpIWY8s76$6Uy-}$gtJG;a<H4JMD~%tv&g?+ahB;R
zkwhBH0KE&+cW9`nXIH6Y*K1dl-rGJr(oGf(iJ7+2Lbc>0IoG~W-+-XByPqdg+R6%h
zc8z!gElBPYA!{{_$(J^%4QNwKs9tP<*xzH*%<)!M=vh99AD@(y>siO^{X8_Q>a^Z;
zV@;OBM&_}zE4Ub)rQ@G-IN0CKu*uqK{k9R^8Zy~D6wAlCiAUz@J+rp=F$lefm6VP?
zJTh^354|Ty@mZz|o;)OmuM8y5r&_x0B&=*8Ut7RBR;~ha^j1e~r7PgNd;`_$kbX_d
zA$9h%*P3`<PPiVV#=Fk`hh>&W)wPSV2A+>qC<Lx2HfmCrrCANr0d%S;*Ga{!Rq@TJ
zv=yudZhe~t`3Y0>7&{Vvl@jLs)C-A|u}nU?VqX6TG}cga!5fR-iVxOYAWGIU*NPf^
z@A4`iUG}kA<JA0?Eoek~B1M<Wki>Hm_z#sZmYEU6s=~Gf5=9};R#TYZQ*Ecn(WSL}
z%Yw3V&67-ngN6|!t<qW*@Wr2BR>msw*5_QDwK!Gv#_EH1OXGz8a;<c1D_C79o~Aqz
zrkg<Vv&W>vmVfnOLY4S_y3``^Vssoi*99HDi#1jOR)^aCDAB}872kCHs|-8NDM>IU
z+&=3b`9khhB(PS1QAtCjc>U0zp3<`IWxWgJl3eU>7x|99;20|P;w0xFQF9HLfPHW@
z+oHq;MS;tP@P>1e{zk3FqsTW;FVauzL%mcK)|JPsk)^$?<8X}Tc5a3A_mT3MN>*?<
zR1GGCGw;*t`Ywy+W-8&yL{_<;<qKnH82x(BWcBZlxF=M`3i?y??W&WK`>5xf&Q7rM
zJfs&BrLElYF!4>Cs(CJrEb|$=YnpA2O)N$+o8%kXAQ`vRd8wT0^0GH7r8m|rt=m3X
zQS%Ji8l;uKU3FxuPPX;v05hyk_x<9X8{O;}O$>0tjuz-|e{YPBIZJQ6TaTrkD199N
zcehW(bZ)}xYx4*BSJ(&TpKhP4v4PT`!sKTu*0$;N2)<o%t3Lc*wzaSf2rQ-ZDw(7*
zbMwx8-GF53^&CcAg~!#|GP;sW=9KBJd%$5R_@);l9d@P(;bY(=0iIQnz8+{hjwoR5
zv^gmE7HHmGZ5(60-Ri2z(t;DF+d=794j&&IB3#zbJJ`*O8|FY_r4!1i^)+=(siLtI
zlvODl!9hzlj*yxRr?mH!iqfCzY^I<kK^nCPipw&>2zlFZ`FcWr`-&)USYeGjjpt`X
z6NN;A)|4|TJe5CSD?xQwv7l@s!o@~t<QNq2>Dk=@74xRQe>b1fq34R`Okf%+iBIqX
zk`4kKh6bQ{>Z2EJG8CSyn?l^DhvTsmUfF%iWa8m(!Tb5`dK13O?G|q=p{XKh`++5_
zou<CRpy&koxdwm?d|mX}tK8tlTqVE+`dIt|5^7xt!Rh<wxx;a?>>hmPX7Cm$`to|C
zI>p6=4eV+u9hNMFgd>(*>pN&d`l-!jmP9kAsO~A&8I#^>2O4fevw_OH=nwD-seM#+
z=o9pg+ugEx-8wneMr@eeHu~rfUOMKAEv^;-$g1p>)m}6L&n(z_ZSGsMl*$V+-hSp2
z@bj_jT%<ZLUF0|u3K_G_L?h{sPZ{r|JR`w+NZk5QmzCdPug-cwe3q{wk~axRIQd2y
znq+hA7(-R|W>LGfj#07Rt|tAMVb>4D_;WsS0aRk-L@A_ZINHT?9PVia@x($#XBk8V
z+20>WB%}w~s@SAI`QWQE6s5^G(adqQ#mF~q=kN&P=8P(aX0%2Ob87`d#k_%EB!w)0
zU3f=IK%Otr;Ws~Ye6k(#{nr(oxh+n@;4gN*L;fi!Y4Wv$P+8x=%J`3<<b2|qO*%b>
z?<LteF2W;ha7KQSN<8bFjYqBXYNMfaO^~%wI*zWHT+POLXlwGaEieUQ19UlaX^!u+
zwa>@9e*D-ek5nd2Y#0=;Ye3c|=TAdKh0fGAJv>!(<Rt~HYluSs_!iG^qaB?dpBD~n
z;h;;SejnNs7+Gzhi!0-|D6$YjWn_%Kxr}IW1hCN#y9~r#`zxSm)Iw=h=92e4&_8_k
zk}%xiMEV&9_Efhr5UZ?x<l<05{L6oul;xdI=AQqIV5Fc|fI1kT^&djm7n7zp^TilY
zBu6~OlP($`&dc%KT0@e`2I#Z#tpUW)ue%&fNq~X(Sma<n8N|fpU@o)&ph?C0Hp)fN
z>zx?fl6t|C6y}uR)TeSe%=9Bx6jD(lQ@mBi&aEFe<$B`kY&Qrxbde(FJ19pD1P(N-
znw_~#llTLxP0@EK`r2{_M~*g3F_;%0-kL^nYi=QJawPr*v_i)v{9eo_lbOhO#P#5h
z<$GzNjBlkO`Y5n{gg8W^b3&+z{sg+pM#{SdVYaaar1yD}GqB2g7U#gl-0Or$-E6ze
zNUW=Oazx7xq{Xp0i(rFyP=K(A@Ck_f>+zYlBPN?B{q5_qzrm4#p91Bv8nA1LIyQ%K
z_i#2VRpmsJ5HRI}!h>Rur9{sR<?SZf>QGt=gt5q`mN5eSN*fKVU^rL46B5?Rh)F*Z
zbN(L4`Q+2~3WWdlysJ2MmM}P@U!0>I&eot{^R4m$Ui66WyC=+<KlvmrwBUJ(qOF;y
z*RaxP1$e@VCxi+l-^bha>ua?GX=T`%+rmnjnPQM$xtV?tl&wXW=-k@1Tg%sk*!4?!
z3s80J&Ngh+zxCxSV*Z~wPRSkkbFp89e*K~@;#ZE-S7MXAgRPw-ouRG6-&p&#x9<NE
zo4#fnuecr?Yyt%S%fuUastZf?HHWzc4<eFSxkjUrDx`bS?>h*WK>qe2KhEgxk;^{R
zmUTRFixq5{#&;QDP}HiO6We`nM~^5*z#@oq0%Iksi}!{sl62$xiI(EjMEerXy5q}l
zcBBZbXd?mwnb2@3uOyhwv!S_2<U&}B=flZLTJVJhhj#MqS%y+tSsJyip0JK$Y9T8D
zTlR*{iKMM<iNOm|I%aKj7N81p<ON`{cqYIwd1@i59<puBs4j;!w<mjxk7}3as&((b
zL+B%j(qO$#9lgK3)^%8vIWbWI7El)qE9iq73&`n5ygh7Vg1C(RKr-C+&9~q82Y-5{
z(y;k>bMb`m&eyzCw*l+&)1m)L4&WFgB<+DNrXD;zi(=ObaT?&6+<oPxj9F@Pm_%&y
z&ZbtT_iKA-@F_LP4?tgdi@T{Kd1+veXg@_TLJ~`K4d79|5M^sF$D|0hN-4C8tE;AB
z9brmhdZ;(QvHFxy$H#i=vR}vQs;sl;{rJue)yk3Svvl(n`Tb8G$EF20{r=+d_7}?$
z{>)>2JG(yy-~XfW7l*%&45c2MHF}s1_+>tfR<^o$0xe*Xff$1$eJ$XA>!~zpvR)&J
znB|<6rRX~JsTGehxy#tQIb?~@^yEo2v5*nBW|M^qBT)8p?A-^;2YH{Cm;SJyqb#Ey
z;Fd1_{Wg2At_IjYd_%w#1(0YfR{$9**yUaTG9{TLxkej!U7$h-c^<7Tnb$qu*ZqWe
z>BdNc(Lt8@9Z^LESjnrRo>XTBPtWLh7K`~k6z8Ww%Mtb---<I?@VI)0N=kSrwbJ4C
z40II?3Jl60V)nGJ07J;|v3HiBd%($A!BG=MPjV}g@Jk(#5VD18M5ziMMr}pKvc19w
zeq6Q;1NqBIfovFpzi?o2k0@SIpz1{ufum+}f4f0j;4QNgcZh|Jl0IvwtI0(hR$T${
z|AY<Ul6P;{GOp_(s*1{)86r&8W8UJR@-6{svA1+lo*@smBF7ewwid@Qww{iA81`hR
zNjJC^Q2hbuH`beVhX<k8dMYRWNV5T}$Dk!M@qh<bj?ATGddT#lu<=AoE(Uu*ln3@H
z4WIka?9TIXcg4t_`WoNUM@ybr?AX!1>?<L8Mh*oV>4Y<if#B*CXm$&DSG^|WSoO>o
z>w04ER@-1#AJ^#}I?1s5^O|Dh_t&HSyHWrG(R^9Q|NTD={>Fem760WQ4&<f&UBJKZ
zy8aUuAp1+x{<H`CufV@<NBtMH3+%saPW>zV-*a#O1^$|)(Eb7c|77C+RnlM6FaIS=
z9pnF#jQLj)f6dPMmk2Mse=a}$$qD%@{IBcP|ALni{R94QtJi<U|8>3VPk6F_-u{19
zzy1pTtJm}|a1P5qz<=?f{wm?GmhHbJ@Us6y!oS(Mf5rd1iSaKq0Kh#b0Kk739DjxX
j`^f%hcn8m);Qtus<)uKs=mh`(^YsG!V(2XYAD{jo`?9$t

literal 0
HcmV?d00001

diff --git a/6_Projekt.ipynb b/6_Projekt.ipynb
new file mode 100644
index 0000000..96d86ae
--- /dev/null
+++ b/6_Projekt.ipynb
@@ -0,0 +1,1576 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5K66MlDpZDnE"
+      },
+      "source": [
+        "## Analiza sentymentu w opiniach z Twitter'a\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OaqaYfQFZDnH"
+      },
+      "source": [
+        "### Download dataset and prepare data\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "23k83t7RNCJa"
+      },
+      "source": [
+        "#### Installation of packages\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 98,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "I5pSpk6PNCJb",
+        "outputId": "3f30ecd9-104a-496a-fd52-447f4d64e814"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.0.3)\n",
+            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
+            "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1)\n",
+            "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
+            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n",
+            "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.25.2)\n",
+            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.11.4)\n",
+            "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n",
+            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n",
+            "Requirement already satisfied: emoji in /usr/local/lib/python3.10/dist-packages (2.12.1)\n",
+            "Requirement already satisfied: typing-extensions>=4.7.0 in /usr/local/lib/python3.10/dist-packages (from emoji) (4.12.1)\n",
+            "Requirement already satisfied: gensim in /usr/local/lib/python3.10/dist-packages (4.3.2)\n",
+            "Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.10/dist-packages (from gensim) (1.25.2)\n",
+            "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim) (1.11.4)\n",
+            "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim) (6.4.0)\n"
+          ]
+        }
+      ],
+      "source": [
+        "%pip install pandas\n",
+        "%pip install scikit-learn\n",
+        "%pip install emoji\n",
+        "%pip install gensim"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FA_aZGAkNCJd"
+      },
+      "source": [
+        "#### Importing libraries\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 99,
+      "metadata": {
+        "id": "yQvOCaX2NCJd"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "import emoji\n",
+        "from gensim.utils import simple_preprocess"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gp8ITdbPNCJe"
+      },
+      "source": [
+        "#### Download the dataset\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 100,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "DlcNiu4UNCJe",
+        "outputId": "015b3ad1-6b9d-4845-dd98-0b0c085b12c9"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Dataset URL: https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis\n",
+            "License(s): CC0-1.0\n",
+            "twitter-entity-sentiment-analysis.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!kaggle datasets download -d jp797498e/twitter-entity-sentiment-analysis"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yU4XFDrUNCJf"
+      },
+      "source": [
+        "#### Unzip the dataset\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 101,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "G2gaml-MNCJf",
+        "outputId": "e327c071-a0cd-480f-92d3-66388fd4dfcb"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Archive:  twitter-entity-sentiment-analysis.zip\n",
+            "  inflating: twitter_training.csv    \n",
+            "  inflating: twitter_validation.csv  \n"
+          ]
+        }
+      ],
+      "source": [
+        "!unzip -o twitter-entity-sentiment-analysis.zip"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBO6YHwyNCJg"
+      },
+      "source": [
+        "#### Load the dataset\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 102,
+      "metadata": {
+        "id": "9KlnXJTtNCJg"
+      },
+      "outputs": [],
+      "source": [
+        "cols = [\"tweetid\", \"entity\", \"sentiment\", \"content\"]\n",
+        "twitter_training = pd.read_csv(\"twitter_training.csv\", names=cols)\n",
+        "twitter_validation = pd.read_csv(\"twitter_validation.csv\", names=cols)\n",
+        "dataset = pd.concat([twitter_training, twitter_validation])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XIslo9YQNCJg"
+      },
+      "source": [
+        "#### Info about the dataset\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 103,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rnh5-0SZNCJh",
+        "outputId": "99319b5c-f4e2-4aee-e963-13e8d2e938ee"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "Index: 75682 entries, 0 to 999\n",
+            "Data columns (total 4 columns):\n",
+            " #   Column     Non-Null Count  Dtype \n",
+            "---  ------     --------------  ----- \n",
+            " 0   tweetid    75682 non-null  int64 \n",
+            " 1   entity     75682 non-null  object\n",
+            " 2   sentiment  75682 non-null  object\n",
+            " 3   content    74996 non-null  object\n",
+            "dtypes: int64(1), object(3)\n",
+            "memory usage: 2.9+ MB\n"
+          ]
+        }
+      ],
+      "source": [
+        "dataset.info()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 104,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rpHNMU57NCJh",
+        "outputId": "576fba81-c5fc-47ee-aae9-4f1734081e97"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(75682, 4)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 104
+        }
+      ],
+      "source": [
+        "dataset.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 105,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "eylMuu0GNCJj",
+        "outputId": "d04a8e0a-42ac-4f70-f277-5b9300e97016"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "sentiment\n",
+              "Negative      22808\n",
+              "Positive      21109\n",
+              "Neutral       18603\n",
+              "Irrelevant    13162\n",
+              "Name: count, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 105
+        }
+      ],
+      "source": [
+        "dataset[\"sentiment\"].value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 106,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fm7H57JINCJj",
+        "outputId": "6af989a7-c3e7-4666-afef-c2859265d027"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "tweetid        0\n",
+              "entity         0\n",
+              "sentiment      0\n",
+              "content      686\n",
+              "dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 106
+        }
+      ],
+      "source": [
+        "dataset.isna().sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 107,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "AF_ZNH6pNCJk",
+        "outputId": "f3191e1e-1176-4c08-9c3e-31eee38020d8"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "3217"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 107
+        }
+      ],
+      "source": [
+        "dataset.duplicated().sum()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LeIs8ceHNCJl"
+      },
+      "source": [
+        "#### Prepare the dataset\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GJfxQkbWNCJl"
+      },
+      "source": [
+        "##### Drop tweetid and entity columns\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 108,
+      "metadata": {
+        "id": "X3GwAqSQNCJl"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = dataset.drop(columns=[\"tweetid\", \"entity\"], axis=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WpBDzbx6NCJm"
+      },
+      "source": [
+        "##### Drop null values\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 109,
+      "metadata": {
+        "id": "ixlmP6cwNCJm"
+      },
+      "outputs": [],
+      "source": [
+        "dataset.dropna(inplace=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z_0UNES2NCJm"
+      },
+      "source": [
+        "##### Remove emojis\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 110,
+      "metadata": {
+        "id": "I9Mr8rAQNCJm"
+      },
+      "outputs": [],
+      "source": [
+        "dataset[\"content\"] = dataset[\"content\"].apply(\n",
+        "    lambda x: emoji.replace_emoji(x, replace=\"\")\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wLptSqrzNCJm"
+      },
+      "source": [
+        "##### Simple Preprocess\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 111,
+      "metadata": {
+        "id": "gw8HC9XBNCJm"
+      },
+      "outputs": [],
+      "source": [
+        "dataset[\"content\"] = dataset[\"content\"].apply(lambda x: \" \".join(simple_preprocess(x)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vayfwdnkNCJm"
+      },
+      "source": [
+        "##### Drop null values\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 112,
+      "metadata": {
+        "id": "6r1_Hk1JNCJn"
+      },
+      "outputs": [],
+      "source": [
+        "dataset.dropna(inplace=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k2aDiHxrNCJn"
+      },
+      "source": [
+        "##### Drop duplicates\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 113,
+      "metadata": {
+        "id": "56YaoLvjNCJn"
+      },
+      "outputs": [],
+      "source": [
+        "dataset.drop_duplicates(inplace=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "exgWXEmNNCJn"
+      },
+      "source": [
+        "#### Info about the dataset after cleaning\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 114,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1oaFMRqANCJn",
+        "outputId": "05560ac6-9dd5-4397-8730-59239af28fc6"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "Index: 65839 entries, 0 to 991\n",
+            "Data columns (total 2 columns):\n",
+            " #   Column     Non-Null Count  Dtype \n",
+            "---  ------     --------------  ----- \n",
+            " 0   sentiment  65839 non-null  object\n",
+            " 1   content    65839 non-null  object\n",
+            "dtypes: object(2)\n",
+            "memory usage: 1.5+ MB\n"
+          ]
+        }
+      ],
+      "source": [
+        "dataset.info()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 115,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "92f8IAAINCJo",
+        "outputId": "383826e9-6f8b-4e66-c7f9-2efacb8a5c96"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(65839, 2)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 115
+        }
+      ],
+      "source": [
+        "dataset.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 116,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "F7a05XcCNCJo",
+        "outputId": "4b189aa3-8df7-44be-aa20-9066d4cde04a"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "sentiment\n",
+              "Negative      20147\n",
+              "Positive      17868\n",
+              "Neutral       16193\n",
+              "Irrelevant    11631\n",
+              "Name: count, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 116
+        }
+      ],
+      "source": [
+        "dataset[\"sentiment\"].value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 117,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GG3Qgk44NCJo",
+        "outputId": "4959a695-513d-47e0-cbe9-b05d149478cf"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "sentiment    0\n",
+              "content      0\n",
+              "dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 117
+        }
+      ],
+      "source": [
+        "dataset.isna().sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 118,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "u5g9cVa1NCJo",
+        "outputId": "214e37d0-71b0-4616-fb37-2b47e365ee14"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 118
+        }
+      ],
+      "source": [
+        "dataset.duplicated().sum()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eVZuSi-SNCJo"
+      },
+      "source": [
+        "#### Split the dataset into training and testing sets\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 119,
+      "metadata": {
+        "id": "BCy_q1GHNCJp"
+      },
+      "outputs": [],
+      "source": [
+        "X_train, X_test, y_train, y_test = train_test_split(\n",
+        "    dataset[\"content\"], dataset[\"sentiment\"], test_size=0.2, random_state=0\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 120,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-qSKofcjNCJt",
+        "outputId": "d2e09e82-4174-4e87-a0be-5e9158668733"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "((52671,), (13168,), (52671,), (13168,))"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 120
+        }
+      ],
+      "source": [
+        "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UmMEl5AYNCJt"
+      },
+      "source": [
+        "### TD-IDF - Logistic Regression\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2_scDhXqZDnJ"
+      },
+      "source": [
+        "#### Importing libraries\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 121,
+      "metadata": {
+        "id": "ugm_fVSiZDnK"
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "from sklearn.metrics import classification_report"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "X8DY5eStNCJu"
+      },
+      "source": [
+        "#### Text Vectorization Using TF-IDF\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 122,
+      "metadata": {
+        "id": "IBAy8zjcNCJu"
+      },
+      "outputs": [],
+      "source": [
+        "vectorizer = TfidfVectorizer()\n",
+        "X_train_tfidf = vectorizer.fit_transform(X_train)\n",
+        "X_test_tfidf = vectorizer.transform(X_test)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rS5pptZINCJu"
+      },
+      "source": [
+        "#### Training a Logistic Regression model\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 123,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 75
+        },
+        "id": "m3tmiTWVNCJu",
+        "outputId": "e9372b78-9ea9-4a4a-8289-f3e9ee6ba511"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "LogisticRegression(max_iter=1000)"
+            ],
+            "text/html": [
+              "<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(max_iter=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" checked><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression(max_iter=1000)</pre></div></div></div></div></div>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 123
+        }
+      ],
+      "source": [
+        "model = LogisticRegression(solver=\"lbfgs\", penalty=\"l2\", max_iter=1000)\n",
+        "model.fit(X_train_tfidf, y_train)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GiY_P6PKNCJu"
+      },
+      "source": [
+        "#### Predicting\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 124,
+      "metadata": {
+        "id": "CJ_9qh6ONCJu"
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = model.predict(X_test_tfidf)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "koeb78PsNCJu"
+      },
+      "source": [
+        "#### Classification report\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 125,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hsABx8mJNCJv",
+        "outputId": "c4c23ca6-c88a-4db9-fe66-36a7febd3594"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "  Irrelevant       0.82      0.70      0.75      2304\n",
+            "    Negative       0.80      0.86      0.83      4024\n",
+            "     Neutral       0.79      0.74      0.77      3169\n",
+            "    Positive       0.78      0.82      0.80      3671\n",
+            "\n",
+            "    accuracy                           0.79     13168\n",
+            "   macro avg       0.80      0.78      0.79     13168\n",
+            "weighted avg       0.79      0.79      0.79     13168\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(classification_report(y_test, y_pred))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Y17ccTy1NCJv"
+      },
+      "source": [
+        "### TD-IDF - Random Forest Classifier\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yyk_baF-NCJv"
+      },
+      "source": [
+        "#### Importing libraries\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 126,
+      "metadata": {
+        "id": "-xjXLHpQNCJv"
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.metrics import classification_report"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Tl6mOx92NCJw"
+      },
+      "source": [
+        "#### Text Vectorization Using TF-IDF\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 127,
+      "metadata": {
+        "id": "bE9h15BcNCJw"
+      },
+      "outputs": [],
+      "source": [
+        "vectorizer = TfidfVectorizer()\n",
+        "X_train_tfidf = vectorizer.fit_transform(X_train)\n",
+        "X_test_tfidf = vectorizer.transform(X_test)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cNUGJWXINCJw"
+      },
+      "source": [
+        "#### Training a Random Forest Classifier model\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 128,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 75
+        },
+        "id": "WTrPtycbNCJw",
+        "outputId": "e97b690c-f698-414a-cc40-4843d12e2073"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "RandomForestClassifier()"
+            ],
+            "text/html": [
+              "<style>#sk-container-id-4 {color: black;background-color: white;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier()</pre></div></div></div></div></div>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 128
+        }
+      ],
+      "source": [
+        "model = RandomForestClassifier(criterion=\"gini\")\n",
+        "model.fit(X_train_tfidf, y_train)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HPlAbp8PNCJx"
+      },
+      "source": [
+        "#### Predicting\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 129,
+      "metadata": {
+        "id": "0ePAr1uZNCJx"
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = model.predict(X_test_tfidf)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oPnSaSB-NCJx"
+      },
+      "source": [
+        "#### Classification report\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 130,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "gqRJRLcKNCJx",
+        "outputId": "b4b1bbfb-5b76-4936-cb74-e200dc72e1c6"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "  Irrelevant       0.95      0.87      0.91      2304\n",
+            "    Negative       0.92      0.95      0.93      4024\n",
+            "     Neutral       0.94      0.91      0.93      3169\n",
+            "    Positive       0.90      0.94      0.92      3671\n",
+            "\n",
+            "    accuracy                           0.93     13168\n",
+            "   macro avg       0.93      0.92      0.92     13168\n",
+            "weighted avg       0.93      0.93      0.92     13168\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(classification_report(y_test, y_pred))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "18jz3yhuNCJy"
+      },
+      "source": [
+        "### Word2Vec - LSTM\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0gizZeVCNCJy"
+      },
+      "source": [
+        "#### Installation of packages\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 131,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Sy0x-OwPNCJy",
+        "outputId": "9815f9df-920a-48c0-f8c3-c174b2544ee4"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: tensorflow in /usr/local/lib/python3.10/dist-packages (2.15.0)\n",
+            "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.4.0)\n",
+            "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.6.3)\n",
+            "Requirement already satisfied: flatbuffers>=23.5.26 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (24.3.25)\n",
+            "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.5.4)\n",
+            "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0)\n",
+            "Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.9.0)\n",
+            "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (18.1.1)\n",
+            "Requirement already satisfied: ml-dtypes~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0)\n",
+            "Requirement already satisfied: numpy<2.0.0,>=1.23.5 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.25.2)\n",
+            "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.3.0)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorflow) (24.0)\n",
+            "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.20.3)\n",
+            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow) (67.7.2)\n",
+            "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0)\n",
+            "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.4.0)\n",
+            "Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (4.12.1)\n",
+            "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.14.1)\n",
+            "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.37.0)\n",
+            "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.64.1)\n",
+            "Requirement already satisfied: tensorboard<2.16,>=2.15 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.15.2)\n",
+            "Requirement already satisfied: tensorflow-estimator<2.16,>=2.15.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.15.0)\n",
+            "Requirement already satisfied: keras<2.16,>=2.15.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.15.0)\n",
+            "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n",
+            "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.16,>=2.15->tensorflow) (2.27.0)\n",
+            "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.16,>=2.15->tensorflow) (1.2.0)\n",
+            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.16,>=2.15->tensorflow) (3.6)\n",
+            "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.16,>=2.15->tensorflow) (2.31.0)\n",
+            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.16,>=2.15->tensorflow) (0.7.2)\n",
+            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.16,>=2.15->tensorflow) (3.0.3)\n",
+            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow) (5.3.3)\n",
+            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow) (0.4.0)\n",
+            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow) (4.9)\n",
+            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow) (1.3.1)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow) (3.7)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow) (2.0.7)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow) (2024.6.2)\n",
+            "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.16,>=2.15->tensorflow) (2.1.5)\n",
+            "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow) (0.6.0)\n",
+            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow) (3.2.2)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.25.2)\n"
+          ]
+        }
+      ],
+      "source": [
+        "%pip install tensorflow\n",
+        "%pip install numpy"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nyh33SHPNCJy"
+      },
+      "source": [
+        "#### Importing libraries\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 132,
+      "metadata": {
+        "id": "WGINcl6pNCJy"
+      },
+      "outputs": [],
+      "source": [
+        "from gensim.models import Word2Vec\n",
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "from sklearn.calibration import LabelEncoder"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JrQ66Il0NCJy"
+      },
+      "source": [
+        "#### Function to convert text to Word2Vec vectors\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 133,
+      "metadata": {
+        "id": "3MEhNRL0NCJz"
+      },
+      "outputs": [],
+      "source": [
+        "def text_to_vector(text, word2vec, vector_size):\n",
+        "    words = simple_preprocess(text)\n",
+        "    text_vector = np.zeros(vector_size)\n",
+        "    word_count = 0\n",
+        "    for word in words:\n",
+        "        if word in word2vec.wv:\n",
+        "            text_vector += word2vec.wv[word]\n",
+        "            word_count += 1\n",
+        "    if word_count > 0:\n",
+        "        text_vector /= word_count\n",
+        "    return text_vector"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JH_t6_ZtNCJz"
+      },
+      "source": [
+        "#### Tokenize texts\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 134,
+      "metadata": {
+        "id": "KVzBCEbWNCJz"
+      },
+      "outputs": [],
+      "source": [
+        "tokenized_text = dataset[\"content\"].apply(lambda x: x.split())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "o_fLSc_uNCJz"
+      },
+      "source": [
+        "#### Vector size parameter\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 147,
+      "metadata": {
+        "id": "sLY4J1nTNCJ0"
+      },
+      "outputs": [],
+      "source": [
+        "vector_size = 100"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XIMHHrRqNCJ0"
+      },
+      "source": [
+        "#### Train Word2Vec model\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 148,
+      "metadata": {
+        "id": "UysosPtiNCJ1"
+      },
+      "outputs": [],
+      "source": [
+        "model_word2vec = Word2Vec(\n",
+        "    tokenized_text, window=5, min_count=2, workers=4, vector_size=vector_size, epochs=20\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xufmoLDlNCJ1"
+      },
+      "source": [
+        "#### Convert texts to Word2Vec vectors\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 149,
+      "metadata": {
+        "id": "QrY3vXcXNCJ1"
+      },
+      "outputs": [],
+      "source": [
+        "train_vectors = np.array(\n",
+        "    [text_to_vector(text, model_word2vec, vector_size) for text in X_train]\n",
+        ")\n",
+        "\n",
+        "test_vectors = np.array(\n",
+        "    [text_to_vector(text, model_word2vec, vector_size) for text in X_test]\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-3hER130NCJ2"
+      },
+      "source": [
+        "#### Find the maximum sequence length in the training set\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 150,
+      "metadata": {
+        "id": "gcxbUr4lNCJ2"
+      },
+      "outputs": [],
+      "source": [
+        "max_len = max(len(seq) for seq in train_vectors)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ezJ1OadFNCJ3"
+      },
+      "source": [
+        "#### Pad sequences to the same length\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 151,
+      "metadata": {
+        "id": "1oGWbRZtNCJ3"
+      },
+      "outputs": [],
+      "source": [
+        "X_train_emb = tf.keras.preprocessing.sequence.pad_sequences(\n",
+        "    train_vectors, maxlen=max_len, dtype=\"float32\", padding=\"post\"\n",
+        ")\n",
+        "X_test_emb = tf.keras.preprocessing.sequence.pad_sequences(\n",
+        "    test_vectors, maxlen=max_len, dtype=\"float32\", padding=\"post\"\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LaBGGtm3NCJ4"
+      },
+      "source": [
+        "#### Encode labels\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 152,
+      "metadata": {
+        "id": "Xe96PgKtNCJ4"
+      },
+      "outputs": [],
+      "source": [
+        "label_encoder = LabelEncoder()\n",
+        "y_train_enc = label_encoder.fit_transform(y_train)\n",
+        "y_test_enc = label_encoder.transform(y_test)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P_fI-cZHNCJ4"
+      },
+      "source": [
+        "#### Define LSTM model\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [],
+      "metadata": {
+        "id": "sEUnVQJEP-hy"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 153,
+      "metadata": {
+        "id": "pF5HZSRKNCJ4"
+      },
+      "outputs": [],
+      "source": [
+        "model = tf.keras.Sequential(\n",
+        "    [\n",
+        "        tf.keras.layers.Embedding(input_dim=X_train_emb.shape[1], output_dim=100),\n",
+        "        tf.keras.layers.LSTM(128),\n",
+        "        tf.keras.layers.Dense(64, activation=\"relu\"),\n",
+        "        tf.keras.layers.Dense(32, activation=\"relu\"),\n",
+        "        tf.keras.layers.Dense(4, activation=\"softmax\"),\n",
+        "    ]\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YNk8m5lnNCJ4"
+      },
+      "source": [
+        "#### Compile the model\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 154,
+      "metadata": {
+        "id": "IqDLE1FuNCJ5"
+      },
+      "outputs": [],
+      "source": [
+        "model.compile(\n",
+        "    optimizer=tf.optimizers.Adam(learning_rate=1e-3),\n",
+        "    loss=\"sparse_categorical_crossentropy\",\n",
+        "    metrics=[\"accuracy\"],\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cIRnkLT0NCJ5"
+      },
+      "source": [
+        "#### Train the model\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 155,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "QFpStkX9NCJ5",
+        "outputId": "af188e61-04a4-45e7-e6f3-ef50cba478da"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/50\n",
+            "823/823 [==============================] - 10s 9ms/step - loss: 1.3439 - accuracy: 0.3438\n",
+            "Epoch 2/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.3261 - accuracy: 0.3678\n",
+            "Epoch 3/50\n",
+            "823/823 [==============================] - 6s 8ms/step - loss: 1.3163 - accuracy: 0.3774\n",
+            "Epoch 4/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.3020 - accuracy: 0.3975\n",
+            "Epoch 5/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.2904 - accuracy: 0.4119\n",
+            "Epoch 6/50\n",
+            "823/823 [==============================] - 8s 9ms/step - loss: 1.2814 - accuracy: 0.4186\n",
+            "Epoch 7/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.2741 - accuracy: 0.4262\n",
+            "Epoch 8/50\n",
+            "823/823 [==============================] - 8s 9ms/step - loss: 1.2667 - accuracy: 0.4325\n",
+            "Epoch 9/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.2588 - accuracy: 0.4372\n",
+            "Epoch 10/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.2513 - accuracy: 0.4407\n",
+            "Epoch 11/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.2451 - accuracy: 0.4450\n",
+            "Epoch 12/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 1.2365 - accuracy: 0.4491\n",
+            "Epoch 13/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.2291 - accuracy: 0.4560\n",
+            "Epoch 14/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.2218 - accuracy: 0.4593\n",
+            "Epoch 15/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.2144 - accuracy: 0.4636\n",
+            "Epoch 16/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.2066 - accuracy: 0.4669\n",
+            "Epoch 17/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.1989 - accuracy: 0.4707\n",
+            "Epoch 18/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.1887 - accuracy: 0.4759\n",
+            "Epoch 19/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.1810 - accuracy: 0.4803\n",
+            "Epoch 20/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.1717 - accuracy: 0.4846\n",
+            "Epoch 21/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.1631 - accuracy: 0.4883\n",
+            "Epoch 22/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 1.1533 - accuracy: 0.4948\n",
+            "Epoch 23/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.1426 - accuracy: 0.4983\n",
+            "Epoch 24/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.1338 - accuracy: 0.5040\n",
+            "Epoch 25/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.1229 - accuracy: 0.5075\n",
+            "Epoch 26/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 1.1126 - accuracy: 0.5125\n",
+            "Epoch 27/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.1042 - accuracy: 0.5167\n",
+            "Epoch 28/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 1.0920 - accuracy: 0.5237\n",
+            "Epoch 29/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.0809 - accuracy: 0.5266\n",
+            "Epoch 30/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 1.0730 - accuracy: 0.5307\n",
+            "Epoch 31/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.0628 - accuracy: 0.5357\n",
+            "Epoch 32/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.0536 - accuracy: 0.5422\n",
+            "Epoch 33/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.0399 - accuracy: 0.5480\n",
+            "Epoch 34/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.0350 - accuracy: 0.5503\n",
+            "Epoch 35/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.0237 - accuracy: 0.5553\n",
+            "Epoch 36/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 1.0217 - accuracy: 0.5550\n",
+            "Epoch 37/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 1.0073 - accuracy: 0.5633\n",
+            "Epoch 38/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 0.9927 - accuracy: 0.5703\n",
+            "Epoch 39/50\n",
+            "823/823 [==============================] - 6s 8ms/step - loss: 0.9848 - accuracy: 0.5732\n",
+            "Epoch 40/50\n",
+            "823/823 [==============================] - 6s 8ms/step - loss: 0.9786 - accuracy: 0.5748\n",
+            "Epoch 41/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 0.9735 - accuracy: 0.5774\n",
+            "Epoch 42/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 0.9633 - accuracy: 0.5839\n",
+            "Epoch 43/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 0.9530 - accuracy: 0.5873\n",
+            "Epoch 44/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 0.9506 - accuracy: 0.5893\n",
+            "Epoch 45/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 0.9364 - accuracy: 0.5958\n",
+            "Epoch 46/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 0.9260 - accuracy: 0.6006\n",
+            "Epoch 47/50\n",
+            "823/823 [==============================] - 7s 9ms/step - loss: 0.9257 - accuracy: 0.6008\n",
+            "Epoch 48/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 0.9155 - accuracy: 0.6048\n",
+            "Epoch 49/50\n",
+            "823/823 [==============================] - 7s 8ms/step - loss: 0.9103 - accuracy: 0.6066\n",
+            "Epoch 50/50\n",
+            "823/823 [==============================] - 6s 7ms/step - loss: 0.8999 - accuracy: 0.6122\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<keras.src.callbacks.History at 0x790a27fa2b60>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 155
+        }
+      ],
+      "source": [
+        "model.fit(X_train_emb, y_train_enc, epochs=50, batch_size=64)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CAoGoGZ7NCJ5"
+      },
+      "source": [
+        "#### Predicting\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 156,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "LCtJlNP9NCJ5",
+        "outputId": "15942b31-270a-4817-9b55-f5a48663dbed"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "412/412 [==============================] - 2s 4ms/step\n"
+          ]
+        }
+      ],
+      "source": [
+        "y_pred = model.predict(X_test_emb)\n",
+        "\n",
+        "y_preds_argmax = []\n",
+        "for i in range(len(y_pred)):\n",
+        "    y_preds_argmax.append(y_pred[i].argmax())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ee3GIUHJNCJ6"
+      },
+      "source": [
+        "#### Classification report\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 157,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "MMCmZDLgNCJ6",
+        "outputId": "67ad7bb2-386c-432f-dd95-0c3105a13f0c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "           0       0.32      0.20      0.25      2304\n",
+            "           1       0.46      0.62      0.53      4024\n",
+            "           2       0.44      0.43      0.44      3169\n",
+            "           3       0.45      0.39      0.42      3671\n",
+            "\n",
+            "    accuracy                           0.44     13168\n",
+            "   macro avg       0.42      0.41      0.41     13168\n",
+            "weighted avg       0.43      0.44      0.42     13168\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(classification_report(y_test_enc, y_preds_argmax))"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file