This commit is contained in:
BartoszOwczarek22 2025-01-15 12:24:10 +01:00
parent b5b17859e9
commit 7ed72c8c04
13 changed files with 365 additions and 618 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@ -3,7 +3,7 @@
BitSent to aplikacja do analizy nastrojów rynku kryptowalut na podstawie ostatnich tweetów z hashtagiem `#BTC`. Narzędzie wykorzystuje historyczne dane o tweetach oraz bieżące tweety, aby przewidzieć, czy nastrój rynku jest **wzrostowy** czy **spadkowy**.
## Funkcjonalności
- Pobieranie najnowszych tweetów z hashtagiem `#BTC` za pomocą `snscrape`.
- Pobieranie najnowszych tweetów z hashtagiem `#BTC` za pomocą `tweepy`.
- Analiza sentymentu tweetów na podstawie wcześniej wytrenowanego modelu.
- Wyświetlenie przewidywanego nastroju rynku.
@ -19,4 +19,8 @@
1. Sklonuj repozytorium:
```bash
git clone https://github.com/yourusername/bitsent.git
cd bitsent
cd bitsent
2. Upewnij się że python jest zainstalowany
```python --version```
3. Uruchom skrypt z lokalizacji pliku
```python main.py```

BIN
data/.DS_Store vendored

Binary file not shown.

219
data/sentiment.csv Normal file
View File

@ -0,0 +1,219 @@
timestamp,vader_sentiment,Gain/Loss
2021-02-07 10:00:00,-0.4588,-0.6788333333315677
2021-02-07 12:00:00,0.21332,6.400249999991502
2021-02-07 14:00:00,0.01019,1.3066666666636593
2021-02-07 16:00:00,0.009415384615384612,0.7320000000036089
2021-02-07 18:00:00,0.09711666666666667,-3.5455833333326154
2021-02-07 20:00:00,0.15535714285714283,2.6428333333387855
2021-02-07 22:00:00,0.5515625,1.1895833333328483
2021-02-08 00:00:00,0.20713750000000003,-0.19383333333826158
2021-03-07 02:00:00,0.1654875,0.5811666666704696
2021-03-07 04:00:00,0.551075,0.3430000000007567
2021-03-07 06:00:00,0.4704,-1.1864166666637175
2021-03-07 08:00:00,0.1485625,-1.993833333333896
2021-03-07 10:00:00,-0.1795888888888889,-0.22058333332824986
2021-03-07 12:00:00,0.2149857142857143,7.492249999995693
2021-03-07 14:00:00,0.56125,2.582249999999476
2021-03-07 16:00:00,0.4662307692307692,-1.4725000000034925
2021-03-07 18:00:00,0.4016777777777778,-2.1731666666601086
2021-03-07 20:00:00,-0.030000000000000002,3.4504999999990105
2021-03-07 22:00:00,0.16445,-4.590250000001106
2021-03-08 00:00:00,0.4204,-0.09775000000081491
2021-04-07 02:00:00,0.38886666666666664,-0.3353333333361661
2021-04-07 04:00:00,0.3067,-1.1171666666705278
2021-04-07 06:00:00,0.3860428571428572,2.4347500000003492
2021-04-07 08:00:00,0.4668666666666667,6.90849999999773
2021-04-07 10:00:00,0.1549,-3.6265000000057626
2021-04-07 12:00:00,0.16542857142857142,-2.6046666666588862
2021-04-07 14:00:00,0.2331,-10.873500000001513
2021-04-07 16:00:00,0.46502,4.004333333337854
2021-04-07 18:00:00,0.4878166666666666,-2.458166666670877
2021-04-07 20:00:00,0.3259666666666667,-2.629416666663019
2021-04-07 22:00:00,0.2620625,2.2340000000040163
2021-04-08 00:00:00,0.779925,2.775166666673613
2021-04-08 22:00:00,-0.5245,2.317250000000058
2021-04-09 00:00:00,0.434575,-0.5196666666670353
2021-05-02 14:00:00,-0.11995000000000003,10.756000000001222
2021-05-02 20:00:00,0.0,1.5450000000055297
2021-05-03 00:00:00,0.0,0.12341666666907258
2021-05-04 16:00:00,0.4404,-6.38216666666267
2021-05-04 18:00:00,0.0,-13.469833333336283
2021-05-04 22:00:00,0.0,1.6697500000009313
2021-05-05 00:00:00,0.1366,-4.916749999996682
2021-05-07 02:00:00,0.7772333333333333,-0.12258333333011251
2021-05-07 04:00:00,0.2733666666666667,-0.992500000007567
2021-05-07 06:00:00,0.16635,-3.8770000000004075
2021-05-07 08:00:00,0.3377666666666667,2.7214999999996508
2021-05-07 10:00:00,0.44745999999999997,-3.0332500000004075
2021-05-07 12:00:00,0.09823333333333334,3.53041666666104
2021-05-07 14:00:00,0.17858000000000002,-3.262749999994412
2021-05-07 16:00:00,0.39808571428571426,5.318583333340939
2021-05-07 18:00:00,0.47093333333333337,5.213916666667501
2021-05-07 20:00:00,0.4985555555555556,4.666250000002037
2021-05-07 22:00:00,0.4201,-4.9601666666640085
2021-05-08 00:00:00,0.33936,-2.767500000009022
2021-05-08 02:00:00,0.65265,1.1218333333308692
2021-05-08 04:00:00,0.25,2.6674166666634846
2021-05-08 06:00:00,0.6531,3.3850000000020373
2021-05-08 08:00:00,0.0343,2.966166666665231
2021-05-08 10:00:00,0.1341,6.882833333336748
2021-05-08 12:00:00,0.2842,1.922166666656267
2021-05-08 14:00:00,0.09386666666666665,-2.432749999999942
2021-05-08 16:00:00,0.16463333333333333,1.1237500000061118
2021-05-08 18:00:00,0.43216666666666664,-13.482499999998254
2021-05-08 20:00:00,0.10203999999999999,7.5151666666642996
2021-05-08 22:00:00,0.11926666666666663,1.2554166666668607
2021-05-09 00:00:00,0.3818,-2.3267500000001746
2021-06-02 02:00:00,0.0,4.328083333326504
2021-06-02 06:00:00,-0.4019,3.2506666666668025
2021-06-02 08:00:00,0.0,4.16608333332988
2021-06-02 10:00:00,0.5411,5.885833333326445
2021-06-02 12:00:00,0.0,2.295749999997497
2021-06-02 16:00:00,0.1806,5.274499999999534
2021-06-02 20:00:00,0.4569,-0.5001666666576057
2021-06-02 22:00:00,0.0,0.4448333333275514
2021-06-03 00:00:00,0.2233,1.529916666666395
2021-06-04 02:00:00,0.18036666666666668,6.992166666670528
2021-06-04 08:00:00,0.0,-7.88749999999709
2021-06-04 14:00:00,0.0,-3.3074999999953434
2021-06-04 16:00:00,-0.11315,1.0437500000043656
2021-06-04 18:00:00,-0.10313333333333334,4.11699999999837
2021-06-04 20:00:00,0.0,1.5028333333320916
2021-06-05 00:00:00,0.6474500000000001,1.6735833333295886
2021-06-08 02:00:00,0.8519,-5.279750000001513
2021-06-08 04:00:00,0.03889999999999999,2.045250000002852
2021-06-08 06:00:00,-0.212,-4.252083333332848
2021-06-08 08:00:00,0.0421,1.2044166666673846
2021-06-08 10:00:00,0.4404,-1.4051666666637175
2021-06-08 12:00:00,-0.27875000000000005,-1.4900833333304035
2021-06-08 14:00:00,0.12541250000000004,1.7237500000046566
2021-06-08 16:00:00,0.13175,-7.653499999993073
2021-06-08 18:00:00,0.42533333333333334,-5.440916666670091
2021-06-08 20:00:00,0.38624444444444445,2.150583333335817
2021-06-08 22:00:00,0.10626666666666668,4.075833333332412
2021-06-09 00:00:00,0.0,4.534416666661855
2021-07-02 02:00:00,0.148,2.257833333336748
2021-07-02 04:00:00,-0.1531,-0.28416666667180834
2021-07-02 06:00:00,0.2764,-3.5486666666693054
2021-07-02 14:00:00,0.3094,0.11041666667006211
2021-07-02 16:00:00,-0.586,5.521916666664765
2021-07-04 02:00:00,-0.3818,1.9293333333334886
2021-07-04 04:00:00,0.24209999999999998,3.0608333333366318
2021-07-04 08:00:00,0.5994,3.5314999999973224
2021-07-04 10:00:00,0.38775,1.960416666668607
2021-07-04 12:00:00,0.3818,-0.5524166666655219
2021-07-04 14:00:00,0.8067,-1.1629166666607489
2021-07-04 16:00:00,-0.20095,-1.7917499999966822
2021-07-04 18:00:00,0.37565000000000004,0.6822499999980209
2021-07-04 20:00:00,0.1206,1.4725833333286573
2021-07-05 00:00:00,0.0,3.2391666666590027
2021-07-08 02:00:00,0.3825,-5.219250000001921
2021-07-08 04:00:00,0.60815,-3.7955833333326154
2021-07-08 06:00:00,0.0,2.691416666668374
2021-07-08 08:00:00,0.1779,-3.7397500000006403
2021-07-08 10:00:00,-0.049671428571428575,-2.146083333333081
2021-07-08 12:00:00,0.4199,-2.81741666666494
2021-07-08 14:00:00,0.19883125000000001,1.0437500000007276
2021-07-08 16:00:00,0.1419,-1.3783333333340124
2021-07-08 18:00:00,0.11880000000000002,4.8664166666640085
2021-07-08 20:00:00,0.3992,-1.0279999999984284
2021-07-08 22:00:00,0.48712,-1.814666666672565
2021-07-09 00:00:00,0.2766,-2.90625
2021-08-02 02:00:00,0.5411,-11.591249999990396
2021-08-02 08:00:00,0.5411,0.4622499999968568
2021-08-02 12:00:00,0.0,-6.393333333326154
2021-08-02 14:00:00,-0.3382,1.262333333339484
2021-08-02 16:00:00,0.2732,-0.7651666666642996
2021-08-02 18:00:00,-0.47209999999999996,-1.2415000000037253
2021-08-02 22:00:00,0.6249,-4.872749999994994
2021-08-03 00:00:00,0.7040500000000001,-1.4337500000110595
2021-08-04 02:00:00,0.0,-9.005250000001979
2021-08-04 04:00:00,0.0,-3.34641666666721
2021-08-04 06:00:00,0.05382,-5.943916666663426
2021-08-04 08:00:00,0.0,0.0850833333315677
2021-08-04 10:00:00,0.0,-3.796166666666977
2021-08-04 12:00:00,0.4939,-2.1774999999979627
2021-08-04 14:00:00,0.0,-1.9329166666648234
2021-08-04 20:00:00,0.0073999999999999995,0.26041666666424135
2021-08-04 22:00:00,0.0,3.473999999994703
2021-08-05 00:00:00,0.0,-0.08133333332807524
2021-08-08 02:00:00,0.08147777777777777,9.928249999997206
2021-08-08 04:00:00,0.3189888888888889,-1.5674999999973807
2021-08-08 06:00:00,0.3427142857142857,2.9571666666670353
2021-08-08 08:00:00,0.3369375,8.09100000000035
2021-08-08 10:00:00,0.23824444444444445,-2.268083333328832
2021-08-08 12:00:00,0.4447181818181818,-3.1174166666605743
2021-08-08 14:00:00,0.26695,-0.7463333333362243
2021-08-08 16:00:00,0.48765,2.492999999994936
2021-08-08 18:00:00,0.4374909090909091,-2.580166666666628
2021-08-08 20:00:00,0.2960714285714286,-3.052000000003318
2021-08-08 22:00:00,0.8264,2.9570833333418705
2021-08-09 00:00:00,0.23571,7.135500000003958
2021-09-02 08:00:00,-0.4451,1.1718333333337796
2021-09-02 10:00:00,0.4588,0.37950000000273576
2021-09-02 16:00:00,-0.296,-1.260083333334478
2021-09-02 20:00:00,-0.14049999999999999,2.1992499999978463
2021-09-03 00:00:00,0.0,4.547500000000582
2021-09-04 02:00:00,-0.4329,3.0987499999973807
2021-09-04 04:00:00,0.0,-1.6913333333359333
2021-09-04 10:00:00,0.27055,-0.7534166666664532
2021-09-04 12:00:00,0.0,2.776916666669422
2021-09-04 14:00:00,0.12319999999999999,-3.861333333326911
2021-09-04 16:00:00,0.0,1.6710000000093714
2021-09-04 18:00:00,0.07433333333333335,-4.692749999994703
2021-09-04 20:00:00,0.4885,0.19258333333709743
2021-09-04 22:00:00,0.5083,0.4804166666654055
2021-09-08 02:00:00,0.24712222222222222,4.934333333330869
2021-09-08 04:00:00,0.42169999999999996,1.886000000005879
2021-09-08 06:00:00,0.099675,-7.976333333324874
2021-09-08 08:00:00,0.4492416666666667,-5.861250000001746
2021-09-08 10:00:00,0.47060434782608696,-9.397583333331568
2021-09-08 12:00:00,0.2766888888888889,12.831749999997555
2021-09-08 14:00:00,0.3398764705882353,3.402249999999185
2021-09-08 16:00:00,0.483352,-2.9609999999956926
2021-09-08 18:00:00,0.3543045454545454,0.34724999999889405
2021-09-08 20:00:00,0.58472,1.786083333332499
2021-09-08 22:00:00,0.35475625,5.9135000000023865
2021-09-09 00:00:00,0.21288333333333334,-3.1002499999958673
2021-10-02 02:00:00,0.0,-0.22308333333785413
2021-10-02 04:00:00,0.5719,-0.6001666666634264
2021-10-02 06:00:00,0.0,0.16516666666575475
2021-10-02 08:00:00,0.296,2.641250000000582
2021-10-02 16:00:00,-0.5093,1.5320833333316841
2021-10-02 18:00:00,0.0,1.3044999999983702
2021-10-02 22:00:00,0.0,3.1049166666707606
2021-10-03 00:00:00,0.4404,-0.6039999999920838
2021-10-04 04:00:00,0.0,-4.226499999997031
2021-10-04 08:00:00,0.41965,3.0988333333370974
2021-10-04 10:00:00,0.34,1.2683333333334303
2021-10-04 12:00:00,0.09,0.37924999999813735
2021-10-04 16:00:00,0.18494000000000002,4.97591666666267
2021-10-04 18:00:00,-0.1027,0.11441666666360106
2021-10-04 20:00:00,0.0,7.5363333333298215
2021-10-04 22:00:00,0.6124,-1.9645000000018626
2021-10-05 00:00:00,-0.4939,-1.784749999998894
2021-10-09 08:00:00,0.6354000000000001,2.367500000000291
2021-10-09 10:00:00,0.418175,2.1697500000009313
2021-10-09 12:00:00,0.26295,-0.12108333333162591
2021-10-09 14:00:00,0.36776666666666663,0.3730833333393093
2021-10-09 16:00:00,0.31528333333333336,4.474083333341696
2021-10-09 18:00:00,0.16872307692307692,-1.6348333333298797
2021-10-09 20:00:00,0.25524444444444444,-1.0650833333347691
2021-10-09 22:00:00,0.4478166666666667,-1.1366666666581295
2021-10-10 00:00:00,0.29610000000000003,1.2497500000026776
2021-11-04 00:00:00,0.3182,-1.052416666665522
2021-11-04 12:00:00,0.0,-0.0565000000060536
2021-11-04 20:00:00,0.4019,0.5690833333283081
2021-11-05 00:00:00,0.5411,1.189250000003085
2021-12-03 06:00:00,0.0,-0.6670833333264454
2021-12-03 08:00:00,0.6124,2.875500000001921
2021-12-03 12:00:00,0.23835,0.46766666667099344
2021-12-03 14:00:00,0.6486,4.723916666662262
2021-12-03 16:00:00,0.0,2.0446666666684905
2021-12-03 18:00:00,0.2023,-3.9345833333354676
2021-12-03 20:00:00,0.5574,-9.742916666669771
2021-12-04 00:00:00,0.35645000000000004,-8.961499999997613
2021-12-04 12:00:00,0.2985,1.570833333331393
2021-12-04 14:00:00,0.0,-5.483083333332615
2021-12-04 16:00:00,0.0,-3.8566666666665697
2021-12-04 20:00:00,0.0,-4.480749999995169
1 timestamp vader_sentiment Gain/Loss
2 2021-02-07 10:00:00 -0.4588 -0.6788333333315677
3 2021-02-07 12:00:00 0.21332 6.400249999991502
4 2021-02-07 14:00:00 0.01019 1.3066666666636593
5 2021-02-07 16:00:00 0.009415384615384612 0.7320000000036089
6 2021-02-07 18:00:00 0.09711666666666667 -3.5455833333326154
7 2021-02-07 20:00:00 0.15535714285714283 2.6428333333387855
8 2021-02-07 22:00:00 0.5515625 1.1895833333328483
9 2021-02-08 00:00:00 0.20713750000000003 -0.19383333333826158
10 2021-03-07 02:00:00 0.1654875 0.5811666666704696
11 2021-03-07 04:00:00 0.551075 0.3430000000007567
12 2021-03-07 06:00:00 0.4704 -1.1864166666637175
13 2021-03-07 08:00:00 0.1485625 -1.993833333333896
14 2021-03-07 10:00:00 -0.1795888888888889 -0.22058333332824986
15 2021-03-07 12:00:00 0.2149857142857143 7.492249999995693
16 2021-03-07 14:00:00 0.56125 2.582249999999476
17 2021-03-07 16:00:00 0.4662307692307692 -1.4725000000034925
18 2021-03-07 18:00:00 0.4016777777777778 -2.1731666666601086
19 2021-03-07 20:00:00 -0.030000000000000002 3.4504999999990105
20 2021-03-07 22:00:00 0.16445 -4.590250000001106
21 2021-03-08 00:00:00 0.4204 -0.09775000000081491
22 2021-04-07 02:00:00 0.38886666666666664 -0.3353333333361661
23 2021-04-07 04:00:00 0.3067 -1.1171666666705278
24 2021-04-07 06:00:00 0.3860428571428572 2.4347500000003492
25 2021-04-07 08:00:00 0.4668666666666667 6.90849999999773
26 2021-04-07 10:00:00 0.1549 -3.6265000000057626
27 2021-04-07 12:00:00 0.16542857142857142 -2.6046666666588862
28 2021-04-07 14:00:00 0.2331 -10.873500000001513
29 2021-04-07 16:00:00 0.46502 4.004333333337854
30 2021-04-07 18:00:00 0.4878166666666666 -2.458166666670877
31 2021-04-07 20:00:00 0.3259666666666667 -2.629416666663019
32 2021-04-07 22:00:00 0.2620625 2.2340000000040163
33 2021-04-08 00:00:00 0.779925 2.775166666673613
34 2021-04-08 22:00:00 -0.5245 2.317250000000058
35 2021-04-09 00:00:00 0.434575 -0.5196666666670353
36 2021-05-02 14:00:00 -0.11995000000000003 10.756000000001222
37 2021-05-02 20:00:00 0.0 1.5450000000055297
38 2021-05-03 00:00:00 0.0 0.12341666666907258
39 2021-05-04 16:00:00 0.4404 -6.38216666666267
40 2021-05-04 18:00:00 0.0 -13.469833333336283
41 2021-05-04 22:00:00 0.0 1.6697500000009313
42 2021-05-05 00:00:00 0.1366 -4.916749999996682
43 2021-05-07 02:00:00 0.7772333333333333 -0.12258333333011251
44 2021-05-07 04:00:00 0.2733666666666667 -0.992500000007567
45 2021-05-07 06:00:00 0.16635 -3.8770000000004075
46 2021-05-07 08:00:00 0.3377666666666667 2.7214999999996508
47 2021-05-07 10:00:00 0.44745999999999997 -3.0332500000004075
48 2021-05-07 12:00:00 0.09823333333333334 3.53041666666104
49 2021-05-07 14:00:00 0.17858000000000002 -3.262749999994412
50 2021-05-07 16:00:00 0.39808571428571426 5.318583333340939
51 2021-05-07 18:00:00 0.47093333333333337 5.213916666667501
52 2021-05-07 20:00:00 0.4985555555555556 4.666250000002037
53 2021-05-07 22:00:00 0.4201 -4.9601666666640085
54 2021-05-08 00:00:00 0.33936 -2.767500000009022
55 2021-05-08 02:00:00 0.65265 1.1218333333308692
56 2021-05-08 04:00:00 0.25 2.6674166666634846
57 2021-05-08 06:00:00 0.6531 3.3850000000020373
58 2021-05-08 08:00:00 0.0343 2.966166666665231
59 2021-05-08 10:00:00 0.1341 6.882833333336748
60 2021-05-08 12:00:00 0.2842 1.922166666656267
61 2021-05-08 14:00:00 0.09386666666666665 -2.432749999999942
62 2021-05-08 16:00:00 0.16463333333333333 1.1237500000061118
63 2021-05-08 18:00:00 0.43216666666666664 -13.482499999998254
64 2021-05-08 20:00:00 0.10203999999999999 7.5151666666642996
65 2021-05-08 22:00:00 0.11926666666666663 1.2554166666668607
66 2021-05-09 00:00:00 0.3818 -2.3267500000001746
67 2021-06-02 02:00:00 0.0 4.328083333326504
68 2021-06-02 06:00:00 -0.4019 3.2506666666668025
69 2021-06-02 08:00:00 0.0 4.16608333332988
70 2021-06-02 10:00:00 0.5411 5.885833333326445
71 2021-06-02 12:00:00 0.0 2.295749999997497
72 2021-06-02 16:00:00 0.1806 5.274499999999534
73 2021-06-02 20:00:00 0.4569 -0.5001666666576057
74 2021-06-02 22:00:00 0.0 0.4448333333275514
75 2021-06-03 00:00:00 0.2233 1.529916666666395
76 2021-06-04 02:00:00 0.18036666666666668 6.992166666670528
77 2021-06-04 08:00:00 0.0 -7.88749999999709
78 2021-06-04 14:00:00 0.0 -3.3074999999953434
79 2021-06-04 16:00:00 -0.11315 1.0437500000043656
80 2021-06-04 18:00:00 -0.10313333333333334 4.11699999999837
81 2021-06-04 20:00:00 0.0 1.5028333333320916
82 2021-06-05 00:00:00 0.6474500000000001 1.6735833333295886
83 2021-06-08 02:00:00 0.8519 -5.279750000001513
84 2021-06-08 04:00:00 0.03889999999999999 2.045250000002852
85 2021-06-08 06:00:00 -0.212 -4.252083333332848
86 2021-06-08 08:00:00 0.0421 1.2044166666673846
87 2021-06-08 10:00:00 0.4404 -1.4051666666637175
88 2021-06-08 12:00:00 -0.27875000000000005 -1.4900833333304035
89 2021-06-08 14:00:00 0.12541250000000004 1.7237500000046566
90 2021-06-08 16:00:00 0.13175 -7.653499999993073
91 2021-06-08 18:00:00 0.42533333333333334 -5.440916666670091
92 2021-06-08 20:00:00 0.38624444444444445 2.150583333335817
93 2021-06-08 22:00:00 0.10626666666666668 4.075833333332412
94 2021-06-09 00:00:00 0.0 4.534416666661855
95 2021-07-02 02:00:00 0.148 2.257833333336748
96 2021-07-02 04:00:00 -0.1531 -0.28416666667180834
97 2021-07-02 06:00:00 0.2764 -3.5486666666693054
98 2021-07-02 14:00:00 0.3094 0.11041666667006211
99 2021-07-02 16:00:00 -0.586 5.521916666664765
100 2021-07-04 02:00:00 -0.3818 1.9293333333334886
101 2021-07-04 04:00:00 0.24209999999999998 3.0608333333366318
102 2021-07-04 08:00:00 0.5994 3.5314999999973224
103 2021-07-04 10:00:00 0.38775 1.960416666668607
104 2021-07-04 12:00:00 0.3818 -0.5524166666655219
105 2021-07-04 14:00:00 0.8067 -1.1629166666607489
106 2021-07-04 16:00:00 -0.20095 -1.7917499999966822
107 2021-07-04 18:00:00 0.37565000000000004 0.6822499999980209
108 2021-07-04 20:00:00 0.1206 1.4725833333286573
109 2021-07-05 00:00:00 0.0 3.2391666666590027
110 2021-07-08 02:00:00 0.3825 -5.219250000001921
111 2021-07-08 04:00:00 0.60815 -3.7955833333326154
112 2021-07-08 06:00:00 0.0 2.691416666668374
113 2021-07-08 08:00:00 0.1779 -3.7397500000006403
114 2021-07-08 10:00:00 -0.049671428571428575 -2.146083333333081
115 2021-07-08 12:00:00 0.4199 -2.81741666666494
116 2021-07-08 14:00:00 0.19883125000000001 1.0437500000007276
117 2021-07-08 16:00:00 0.1419 -1.3783333333340124
118 2021-07-08 18:00:00 0.11880000000000002 4.8664166666640085
119 2021-07-08 20:00:00 0.3992 -1.0279999999984284
120 2021-07-08 22:00:00 0.48712 -1.814666666672565
121 2021-07-09 00:00:00 0.2766 -2.90625
122 2021-08-02 02:00:00 0.5411 -11.591249999990396
123 2021-08-02 08:00:00 0.5411 0.4622499999968568
124 2021-08-02 12:00:00 0.0 -6.393333333326154
125 2021-08-02 14:00:00 -0.3382 1.262333333339484
126 2021-08-02 16:00:00 0.2732 -0.7651666666642996
127 2021-08-02 18:00:00 -0.47209999999999996 -1.2415000000037253
128 2021-08-02 22:00:00 0.6249 -4.872749999994994
129 2021-08-03 00:00:00 0.7040500000000001 -1.4337500000110595
130 2021-08-04 02:00:00 0.0 -9.005250000001979
131 2021-08-04 04:00:00 0.0 -3.34641666666721
132 2021-08-04 06:00:00 0.05382 -5.943916666663426
133 2021-08-04 08:00:00 0.0 0.0850833333315677
134 2021-08-04 10:00:00 0.0 -3.796166666666977
135 2021-08-04 12:00:00 0.4939 -2.1774999999979627
136 2021-08-04 14:00:00 0.0 -1.9329166666648234
137 2021-08-04 20:00:00 0.0073999999999999995 0.26041666666424135
138 2021-08-04 22:00:00 0.0 3.473999999994703
139 2021-08-05 00:00:00 0.0 -0.08133333332807524
140 2021-08-08 02:00:00 0.08147777777777777 9.928249999997206
141 2021-08-08 04:00:00 0.3189888888888889 -1.5674999999973807
142 2021-08-08 06:00:00 0.3427142857142857 2.9571666666670353
143 2021-08-08 08:00:00 0.3369375 8.09100000000035
144 2021-08-08 10:00:00 0.23824444444444445 -2.268083333328832
145 2021-08-08 12:00:00 0.4447181818181818 -3.1174166666605743
146 2021-08-08 14:00:00 0.26695 -0.7463333333362243
147 2021-08-08 16:00:00 0.48765 2.492999999994936
148 2021-08-08 18:00:00 0.4374909090909091 -2.580166666666628
149 2021-08-08 20:00:00 0.2960714285714286 -3.052000000003318
150 2021-08-08 22:00:00 0.8264 2.9570833333418705
151 2021-08-09 00:00:00 0.23571 7.135500000003958
152 2021-09-02 08:00:00 -0.4451 1.1718333333337796
153 2021-09-02 10:00:00 0.4588 0.37950000000273576
154 2021-09-02 16:00:00 -0.296 -1.260083333334478
155 2021-09-02 20:00:00 -0.14049999999999999 2.1992499999978463
156 2021-09-03 00:00:00 0.0 4.547500000000582
157 2021-09-04 02:00:00 -0.4329 3.0987499999973807
158 2021-09-04 04:00:00 0.0 -1.6913333333359333
159 2021-09-04 10:00:00 0.27055 -0.7534166666664532
160 2021-09-04 12:00:00 0.0 2.776916666669422
161 2021-09-04 14:00:00 0.12319999999999999 -3.861333333326911
162 2021-09-04 16:00:00 0.0 1.6710000000093714
163 2021-09-04 18:00:00 0.07433333333333335 -4.692749999994703
164 2021-09-04 20:00:00 0.4885 0.19258333333709743
165 2021-09-04 22:00:00 0.5083 0.4804166666654055
166 2021-09-08 02:00:00 0.24712222222222222 4.934333333330869
167 2021-09-08 04:00:00 0.42169999999999996 1.886000000005879
168 2021-09-08 06:00:00 0.099675 -7.976333333324874
169 2021-09-08 08:00:00 0.4492416666666667 -5.861250000001746
170 2021-09-08 10:00:00 0.47060434782608696 -9.397583333331568
171 2021-09-08 12:00:00 0.2766888888888889 12.831749999997555
172 2021-09-08 14:00:00 0.3398764705882353 3.402249999999185
173 2021-09-08 16:00:00 0.483352 -2.9609999999956926
174 2021-09-08 18:00:00 0.3543045454545454 0.34724999999889405
175 2021-09-08 20:00:00 0.58472 1.786083333332499
176 2021-09-08 22:00:00 0.35475625 5.9135000000023865
177 2021-09-09 00:00:00 0.21288333333333334 -3.1002499999958673
178 2021-10-02 02:00:00 0.0 -0.22308333333785413
179 2021-10-02 04:00:00 0.5719 -0.6001666666634264
180 2021-10-02 06:00:00 0.0 0.16516666666575475
181 2021-10-02 08:00:00 0.296 2.641250000000582
182 2021-10-02 16:00:00 -0.5093 1.5320833333316841
183 2021-10-02 18:00:00 0.0 1.3044999999983702
184 2021-10-02 22:00:00 0.0 3.1049166666707606
185 2021-10-03 00:00:00 0.4404 -0.6039999999920838
186 2021-10-04 04:00:00 0.0 -4.226499999997031
187 2021-10-04 08:00:00 0.41965 3.0988333333370974
188 2021-10-04 10:00:00 0.34 1.2683333333334303
189 2021-10-04 12:00:00 0.09 0.37924999999813735
190 2021-10-04 16:00:00 0.18494000000000002 4.97591666666267
191 2021-10-04 18:00:00 -0.1027 0.11441666666360106
192 2021-10-04 20:00:00 0.0 7.5363333333298215
193 2021-10-04 22:00:00 0.6124 -1.9645000000018626
194 2021-10-05 00:00:00 -0.4939 -1.784749999998894
195 2021-10-09 08:00:00 0.6354000000000001 2.367500000000291
196 2021-10-09 10:00:00 0.418175 2.1697500000009313
197 2021-10-09 12:00:00 0.26295 -0.12108333333162591
198 2021-10-09 14:00:00 0.36776666666666663 0.3730833333393093
199 2021-10-09 16:00:00 0.31528333333333336 4.474083333341696
200 2021-10-09 18:00:00 0.16872307692307692 -1.6348333333298797
201 2021-10-09 20:00:00 0.25524444444444444 -1.0650833333347691
202 2021-10-09 22:00:00 0.4478166666666667 -1.1366666666581295
203 2021-10-10 00:00:00 0.29610000000000003 1.2497500000026776
204 2021-11-04 00:00:00 0.3182 -1.052416666665522
205 2021-11-04 12:00:00 0.0 -0.0565000000060536
206 2021-11-04 20:00:00 0.4019 0.5690833333283081
207 2021-11-05 00:00:00 0.5411 1.189250000003085
208 2021-12-03 06:00:00 0.0 -0.6670833333264454
209 2021-12-03 08:00:00 0.6124 2.875500000001921
210 2021-12-03 12:00:00 0.23835 0.46766666667099344
211 2021-12-03 14:00:00 0.6486 4.723916666662262
212 2021-12-03 16:00:00 0.0 2.0446666666684905
213 2021-12-03 18:00:00 0.2023 -3.9345833333354676
214 2021-12-03 20:00:00 0.5574 -9.742916666669771
215 2021-12-04 00:00:00 0.35645000000000004 -8.961499999997613
216 2021-12-04 12:00:00 0.2985 1.570833333331393
217 2021-12-04 14:00:00 0.0 -5.483083333332615
218 2021-12-04 16:00:00 0.0 -3.8566666666665697
219 2021-12-04 20:00:00 0.0 -4.480749999995169

View File

@ -1,4 +1,5 @@
tweepy~=4.14.0
numpy~=2.1.3
pandas~=2.2.3
vaderSentiment~=3.3.2
vaderSentiment~=3.3.2
scikit-learn~=1.6.1

Binary file not shown.

Binary file not shown.

View File

@ -1,18 +1,33 @@
import pandas as pd
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import requests
def fetch_bitcoin_price():
url = "https://api.coingecko.com/api/v3/simple/price"
params = {
"ids": "bitcoin",
"vs_currencies": "usd"
}
try:
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
return data['bitcoin']['usd']
except requests.RequestException as e:
print(f"Error fetching Bitcoin price: {e}")
return None
def load_csv(file_path):
df = pd.read_csv(file_path)
print(f"CSV loaded. Columns: {df.columns}")
return df
def get_sentiment(text):
analyzer = SentimentIntensityAnalyzer()
sentiment = analyzer.polarity_scores(text)
return sentiment['compound'] # Compound score represents overall sentiment
return sentiment['compound']
def classify_sentiment(compound_score):
@ -28,8 +43,36 @@ def analyze_sentiment(df):
df['vader_sentiment_class'] = df['vader_sentiment'].apply(classify_sentiment)
return df
def save_to_csv(df, output_path):
df.to_csv(output_path, index=False)
print(f"Results saved to {output_path}")
def date_format(df):
df.drop(columns='author_id', inplace=True)
df['timestamp'] = pd.to_datetime(df['created_at'])
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%d-%m %H:%M:%S')
df['timestamp'] = df['timestamp'].dt.ceil('2h')
df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
return df
def btc_data_(date):
btc_data = pd.read_csv('../data/bitstamp.csv')
btc_data['timestamp'] = pd.to_datetime(btc_data['Timestamp'], unit='s')
btc_data.set_index('timestamp', inplace=True)
btc_data.drop(columns=['Timestamp'], inplace=True)
btc_data = btc_data.resample('2h').mean()
filtered_btc_data = btc_data.loc[date:date]
columns_to_drop_btc = [
"High", "Low"
]
filtered_btc_data = filtered_btc_data.drop(columns=columns_to_drop_btc)
filtered_btc_data['Gain/Loss'] = filtered_btc_data['Close'] - filtered_btc_data['Open']
filtered_btc_data.reset_index(inplace=True)
return filtered_btc_data
def merge(btc, tweets):
btc['timestamp'] = pd.to_datetime(btc['timestamp'])
tweets['timestamp'] = pd.to_datetime(tweets['timestamp'])
result = pd.merge(btc, tweets, on='timestamp', how='left')
result.dropna(subset=['text'], inplace=True)
return result

View File

@ -6,14 +6,14 @@
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-01-14T17:06:55.821536Z",
"start_time": "2025-01-14T17:06:48.631852Z"
"end_time": "2025-01-15T11:14:44.304056Z",
"start_time": "2025-01-15T11:14:36.737765Z"
}
},
"source": [
"import pandas as pd\n",
"\n",
"from src.analysis import analyze_sentiment\n",
"from src.analysis import analyze_sentiment, save_to_csv\n",
"\n",
"btc_tweets = pd.read_csv('../data/Bitcoin_tweets.csv')\n",
"btc_data = pd.read_csv('../data/bitstamp.csv')"
@ -23,18 +23,18 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/7p/w3y96f6s701d9vv4h5vptdz00000gn/T/ipykernel_14923/4274364300.py:5: DtypeWarning: Columns (5,6,7,12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"/var/folders/7p/w3y96f6s701d9vv4h5vptdz00000gn/T/ipykernel_19189/1429944408.py:5: DtypeWarning: Columns (5,6,7,12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" btc_tweets = pd.read_csv('../data/Bitcoin_tweets.csv')\n"
]
}
],
"execution_count": 31
"execution_count": 49
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T17:06:59.590206Z",
"start_time": "2025-01-14T17:06:58.392209Z"
"end_time": "2025-01-15T11:14:50.559541Z",
"start_time": "2025-01-15T11:14:49.318953Z"
}
},
"cell_type": "code",
@ -44,13 +44,13 @@
],
"id": "67e2290f7b4ac803",
"outputs": [],
"execution_count": 32
"execution_count": 50
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T17:07:02.514048Z",
"start_time": "2025-01-14T17:07:01.025617Z"
"end_time": "2025-01-15T11:14:54.134908Z",
"start_time": "2025-01-15T11:14:52.720595Z"
}
},
"cell_type": "code",
@ -61,26 +61,26 @@
],
"id": "b7cd551da68b0193",
"outputs": [],
"execution_count": 33
"execution_count": 51
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T17:07:04.510210Z",
"start_time": "2025-01-14T17:07:03.912184Z"
"end_time": "2025-01-15T11:14:55.478557Z",
"start_time": "2025-01-15T11:14:54.885194Z"
}
},
"cell_type": "code",
"source": "btc_data = btc_data.resample('4h').mean()",
"source": "btc_data = btc_data.resample('2h').mean()",
"id": "89ca0634382ae0a8",
"outputs": [],
"execution_count": 34
"execution_count": 52
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T17:07:07.786155Z",
"start_time": "2025-01-14T17:07:06.155961Z"
"end_time": "2025-01-15T11:14:58.284073Z",
"start_time": "2025-01-15T11:14:56.702218Z"
}
},
"cell_type": "code",
@ -99,483 +99,68 @@
],
"id": "50dcf3fac2b8f0ef",
"outputs": [],
"execution_count": 35
"execution_count": 53
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T20:19:53.479883Z",
"start_time": "2025-01-14T20:19:53.434390Z"
"end_time": "2025-01-15T11:15:23.983659Z",
"start_time": "2025-01-15T11:15:23.973039Z"
}
},
"cell_type": "code",
"source": [
"start_date = \"2021-02-07\"\n",
"end_date = \"2021-12-04\"\n",
"start_date = \"2021-01-01\"\n",
"end_date = \"2021-12-29\"\n",
"\n",
"filtered_btc_data = btc_data.loc[start_date:end_date]\n",
"columns_to_drop_btc = [\n",
" \"High\", \"Low\"\n",
"]\n",
"filtered_btc_data = filtered_btc_data.drop(columns=columns_to_drop_btc)\n",
"filtered_btc_data['Gain/Loss'] = filtered_btc_data['Open'] - filtered_btc_data['Close']\n",
"\n",
"print(filtered_btc_data)"
"filtered_btc_data['Gain/Loss'] = filtered_btc_data['Close'] - filtered_btc_data['Open']\n"
],
"id": "b06730669d1a9264",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Open Close Volume Gain/Loss\n",
"timestamp \n",
"2021-02-07 00:00:00 40165.899167 40165.286000 4.759081 0.613167\n",
"2021-02-07 04:00:00 39218.887125 39213.334292 6.409186 5.552833\n",
"2021-02-07 08:00:00 38588.391458 38588.979458 2.909470 -0.588000\n",
"2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 -3.853458\n",
"2021-02-07 16:00:00 38829.270625 38827.863833 4.306438 1.406792\n",
"... ... ... ... ...\n",
"2021-12-04 04:00:00 53281.233417 53281.023333 0.929084 0.210083\n",
"2021-12-04 08:00:00 50036.703292 50009.813667 18.521395 26.889625\n",
"2021-12-04 12:00:00 47438.188292 47436.232167 9.826317 1.956125\n",
"2021-12-04 16:00:00 47215.739500 47219.201833 5.740048 -3.462333\n",
"2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 -0.056708\n",
"\n",
"[1806 rows x 4 columns]\n"
]
}
],
"execution_count": 64
"outputs": [],
"execution_count": 54
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T17:07:12.251364Z",
"start_time": "2025-01-14T17:07:12.172844Z"
"end_time": "2025-01-15T11:15:25.778595Z",
"start_time": "2025-01-15T11:15:25.745985Z"
}
},
"cell_type": "code",
"source": [
"btc_tweets_cleaned['timestamp'] = btc_tweets_cleaned['date'].dt.ceil('4h')\n",
"btc_tweets_cleaned = btc_tweets_cleaned.sort_values('timestamp')\n",
"btc_tweets_cleaned"
"btc_tweets_cleaned['timestamp'] = btc_tweets_cleaned['date'].dt.ceil('2h')\n",
"btc_tweets_cleaned = btc_tweets_cleaned.sort_values('timestamp')"
],
"id": "e2cbd50d300a0e9f",
"outputs": [
{
"data": {
"text/plain": [
" user_followers date \\\n",
"344135 30925.0 2021-02-07 11:10:55 \n",
"344628 59242.0 2021-02-07 10:45:42 \n",
"344627 33.0 2021-02-07 10:45:48 \n",
"344626 52.0 2021-02-07 10:45:52 \n",
"344625 22.0 2021-02-07 10:45:53 \n",
"... ... ... \n",
"48958 2155.0 2021-12-04 16:53:52 \n",
"48957 3605.0 2021-12-04 16:54:04 \n",
"48956 9.0 2021-12-04 16:54:44 \n",
"48962 169.0 2021-12-04 16:51:37 \n",
"48931 531.0 2021-12-04 17:00:42 \n",
"\n",
" text \\\n",
"344135 Yoh my friends, I know you all love buying and... \n",
"344628 Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle... \n",
"344627 @MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR... \n",
"344626 New distribution: MooniWarPrizes: 50.000 MWar ... \n",
"344625 #China #elsalvadorbitcoin #Nigeria #Bitcoin #P... \n",
"... ... \n",
"48958 $MYST #MYST #BTC #Bitcoin \n",
"48957 Team Bankroller on Telegram25 BNKRX prize for ... \n",
"48956 I set up my @cryptocom account to buy $100 #bt... \n",
"48962 Riot Blockchain Bitcoin production jumps 80% o... \n",
"48931 An ECB Board Member Attacks Bitcoin, Says It '... \n",
"\n",
" hashtags timestamp \n",
"344135 ['Crypto', 'Bitcoin', 'BZExchange'] 2021-02-07 12:00:00 \n",
"344628 ['bitcoin'] 2021-02-07 12:00:00 \n",
"344627 ['Bitcoin'] 2021-02-07 12:00:00 \n",
"344626 ['Airdrop', 'Cryptocurrency', 'Free', 'Income'... 2021-02-07 12:00:00 \n",
"344625 ['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit... 2021-02-07 12:00:00 \n",
"... ... ... \n",
"48958 ['MYST', 'BTC', 'Bitcoin'] 2021-12-04 20:00:00 \n",
"48957 ['TRX', 'TRON'] 2021-12-04 20:00:00 \n",
"48956 ['btc', 'cro', 'crypto', 'bitcoin'] 2021-12-04 20:00:00 \n",
"48962 ['bitcoin'] 2021-12-04 20:00:00 \n",
"48931 NaN 2021-12-04 20:00:00 \n",
"\n",
"[297728 rows x 5 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_followers</th>\n",
" <th>date</th>\n",
" <th>text</th>\n",
" <th>hashtags</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>344135</th>\n",
" <td>30925.0</td>\n",
" <td>2021-02-07 11:10:55</td>\n",
" <td>Yoh my friends, I know you all love buying and...</td>\n",
" <td>['Crypto', 'Bitcoin', 'BZExchange']</td>\n",
" <td>2021-02-07 12:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344628</th>\n",
" <td>59242.0</td>\n",
" <td>2021-02-07 10:45:42</td>\n",
" <td>Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle...</td>\n",
" <td>['bitcoin']</td>\n",
" <td>2021-02-07 12:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344627</th>\n",
" <td>33.0</td>\n",
" <td>2021-02-07 10:45:48</td>\n",
" <td>@MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR...</td>\n",
" <td>['Bitcoin']</td>\n",
" <td>2021-02-07 12:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344626</th>\n",
" <td>52.0</td>\n",
" <td>2021-02-07 10:45:52</td>\n",
" <td>New distribution: MooniWarPrizes: 50.000 MWar ...</td>\n",
" <td>['Airdrop', 'Cryptocurrency', 'Free', 'Income'...</td>\n",
" <td>2021-02-07 12:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344625</th>\n",
" <td>22.0</td>\n",
" <td>2021-02-07 10:45:53</td>\n",
" <td>#China #elsalvadorbitcoin #Nigeria #Bitcoin #P...</td>\n",
" <td>['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit...</td>\n",
" <td>2021-02-07 12:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48958</th>\n",
" <td>2155.0</td>\n",
" <td>2021-12-04 16:53:52</td>\n",
" <td>$MYST #MYST #BTC #Bitcoin</td>\n",
" <td>['MYST', 'BTC', 'Bitcoin']</td>\n",
" <td>2021-12-04 20:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48957</th>\n",
" <td>3605.0</td>\n",
" <td>2021-12-04 16:54:04</td>\n",
" <td>Team Bankroller on Telegram25 BNKRX prize for ...</td>\n",
" <td>['TRX', 'TRON']</td>\n",
" <td>2021-12-04 20:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48956</th>\n",
" <td>9.0</td>\n",
" <td>2021-12-04 16:54:44</td>\n",
" <td>I set up my @cryptocom account to buy $100 #bt...</td>\n",
" <td>['btc', 'cro', 'crypto', 'bitcoin']</td>\n",
" <td>2021-12-04 20:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48962</th>\n",
" <td>169.0</td>\n",
" <td>2021-12-04 16:51:37</td>\n",
" <td>Riot Blockchain Bitcoin production jumps 80% o...</td>\n",
" <td>['bitcoin']</td>\n",
" <td>2021-12-04 20:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48931</th>\n",
" <td>531.0</td>\n",
" <td>2021-12-04 17:00:42</td>\n",
" <td>An ECB Board Member Attacks Bitcoin, Says It '...</td>\n",
" <td>NaN</td>\n",
" <td>2021-12-04 20:00:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>297728 rows × 5 columns</p>\n",
"</div>"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 37
"outputs": [],
"execution_count": 55
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T17:07:14.783610Z",
"start_time": "2025-01-14T17:07:14.687756Z"
"end_time": "2025-01-15T11:15:27.471413Z",
"start_time": "2025-01-15T11:15:27.291684Z"
}
},
"cell_type": "code",
"source": [
"result = pd.merge(filtered_btc_data, btc_tweets_cleaned, on='timestamp', how='left')\n",
"result.dropna(subset=['text'], inplace=True)\n",
"result"
"result.dropna(subset=['text'], inplace=True)"
],
"id": "66cff75a7a827a8d",
"outputs": [
{
"data": {
"text/plain": [
" timestamp Open Close Volume \\\n",
"3 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
"4 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
"5 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
"6 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
"7 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
"... ... ... ... ... \n",
"299375 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
"299376 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
"299377 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
"299378 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
"299379 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
"\n",
" user_followers date \\\n",
"3 30925.0 2021-02-07 11:10:55 \n",
"4 59242.0 2021-02-07 10:45:42 \n",
"5 33.0 2021-02-07 10:45:48 \n",
"6 52.0 2021-02-07 10:45:52 \n",
"7 22.0 2021-02-07 10:45:53 \n",
"... ... ... \n",
"299375 2155.0 2021-12-04 16:53:52 \n",
"299376 3605.0 2021-12-04 16:54:04 \n",
"299377 9.0 2021-12-04 16:54:44 \n",
"299378 169.0 2021-12-04 16:51:37 \n",
"299379 531.0 2021-12-04 17:00:42 \n",
"\n",
" text \\\n",
"3 Yoh my friends, I know you all love buying and... \n",
"4 Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle... \n",
"5 @MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR... \n",
"6 New distribution: MooniWarPrizes: 50.000 MWar ... \n",
"7 #China #elsalvadorbitcoin #Nigeria #Bitcoin #P... \n",
"... ... \n",
"299375 $MYST #MYST #BTC #Bitcoin \n",
"299376 Team Bankroller on Telegram25 BNKRX prize for ... \n",
"299377 I set up my @cryptocom account to buy $100 #bt... \n",
"299378 Riot Blockchain Bitcoin production jumps 80% o... \n",
"299379 An ECB Board Member Attacks Bitcoin, Says It '... \n",
"\n",
" hashtags \n",
"3 ['Crypto', 'Bitcoin', 'BZExchange'] \n",
"4 ['bitcoin'] \n",
"5 ['Bitcoin'] \n",
"6 ['Airdrop', 'Cryptocurrency', 'Free', 'Income'... \n",
"7 ['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit... \n",
"... ... \n",
"299375 ['MYST', 'BTC', 'Bitcoin'] \n",
"299376 ['TRX', 'TRON'] \n",
"299377 ['btc', 'cro', 'crypto', 'bitcoin'] \n",
"299378 ['bitcoin'] \n",
"299379 NaN \n",
"\n",
"[297728 rows x 8 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>Open</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>user_followers</th>\n",
" <th>date</th>\n",
" <th>text</th>\n",
" <th>hashtags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021-02-07 12:00:00</td>\n",
" <td>39308.021458</td>\n",
" <td>39311.874917</td>\n",
" <td>3.499237</td>\n",
" <td>30925.0</td>\n",
" <td>2021-02-07 11:10:55</td>\n",
" <td>Yoh my friends, I know you all love buying and...</td>\n",
" <td>['Crypto', 'Bitcoin', 'BZExchange']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021-02-07 12:00:00</td>\n",
" <td>39308.021458</td>\n",
" <td>39311.874917</td>\n",
" <td>3.499237</td>\n",
" <td>59242.0</td>\n",
" <td>2021-02-07 10:45:42</td>\n",
" <td>Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle...</td>\n",
" <td>['bitcoin']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2021-02-07 12:00:00</td>\n",
" <td>39308.021458</td>\n",
" <td>39311.874917</td>\n",
" <td>3.499237</td>\n",
" <td>33.0</td>\n",
" <td>2021-02-07 10:45:48</td>\n",
" <td>@MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR...</td>\n",
" <td>['Bitcoin']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2021-02-07 12:00:00</td>\n",
" <td>39308.021458</td>\n",
" <td>39311.874917</td>\n",
" <td>3.499237</td>\n",
" <td>52.0</td>\n",
" <td>2021-02-07 10:45:52</td>\n",
" <td>New distribution: MooniWarPrizes: 50.000 MWar ...</td>\n",
" <td>['Airdrop', 'Cryptocurrency', 'Free', 'Income'...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2021-02-07 12:00:00</td>\n",
" <td>39308.021458</td>\n",
" <td>39311.874917</td>\n",
" <td>3.499237</td>\n",
" <td>22.0</td>\n",
" <td>2021-02-07 10:45:53</td>\n",
" <td>#China #elsalvadorbitcoin #Nigeria #Bitcoin #P...</td>\n",
" <td>['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299375</th>\n",
" <td>2021-12-04 20:00:00</td>\n",
" <td>48301.942792</td>\n",
" <td>48301.999500</td>\n",
" <td>3.640812</td>\n",
" <td>2155.0</td>\n",
" <td>2021-12-04 16:53:52</td>\n",
" <td>$MYST #MYST #BTC #Bitcoin</td>\n",
" <td>['MYST', 'BTC', 'Bitcoin']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299376</th>\n",
" <td>2021-12-04 20:00:00</td>\n",
" <td>48301.942792</td>\n",
" <td>48301.999500</td>\n",
" <td>3.640812</td>\n",
" <td>3605.0</td>\n",
" <td>2021-12-04 16:54:04</td>\n",
" <td>Team Bankroller on Telegram25 BNKRX prize for ...</td>\n",
" <td>['TRX', 'TRON']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299377</th>\n",
" <td>2021-12-04 20:00:00</td>\n",
" <td>48301.942792</td>\n",
" <td>48301.999500</td>\n",
" <td>3.640812</td>\n",
" <td>9.0</td>\n",
" <td>2021-12-04 16:54:44</td>\n",
" <td>I set up my @cryptocom account to buy $100 #bt...</td>\n",
" <td>['btc', 'cro', 'crypto', 'bitcoin']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299378</th>\n",
" <td>2021-12-04 20:00:00</td>\n",
" <td>48301.942792</td>\n",
" <td>48301.999500</td>\n",
" <td>3.640812</td>\n",
" <td>169.0</td>\n",
" <td>2021-12-04 16:51:37</td>\n",
" <td>Riot Blockchain Bitcoin production jumps 80% o...</td>\n",
" <td>['bitcoin']</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299379</th>\n",
" <td>2021-12-04 20:00:00</td>\n",
" <td>48301.942792</td>\n",
" <td>48301.999500</td>\n",
" <td>3.640812</td>\n",
" <td>531.0</td>\n",
" <td>2021-12-04 17:00:42</td>\n",
" <td>An ECB Board Member Attacks Bitcoin, Says It '...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>297728 rows × 8 columns</p>\n",
"</div>"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 38
"outputs": [],
"execution_count": 56
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T20:20:03.203053Z",
"start_time": "2025-01-14T20:19:59.756279Z"
"end_time": "2025-01-15T11:15:33.655160Z",
"start_time": "2025-01-15T11:15:30.228882Z"
}
},
"cell_type": "code",
@ -589,140 +174,31 @@
],
"id": "f2d740d611c43cbc",
"outputs": [],
"execution_count": 65
"execution_count": 57
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-14T20:20:13.500329Z",
"start_time": "2025-01-14T20:20:13.493261Z"
"end_time": "2025-01-15T11:15:34.200051Z",
"start_time": "2025-01-15T11:15:34.194465Z"
}
},
"cell_type": "code",
"source": "final",
"source": [
"from analysis import save_to_csv\n",
"save_to_csv(final, '../data/sentiment.csv')"
],
"id": "9c29a057abb9a8e1",
"outputs": [
{
"data": {
"text/plain": [
" timestamp vader_sentiment Gain/Loss\n",
"0 2021-02-07 12:00:00 0.159488 -3.853458\n",
"1 2021-02-07 16:00:00 0.206750 1.406792\n",
"2 2021-02-07 20:00:00 0.342183 -1.916208\n",
"3 2021-02-08 00:00:00 0.392456 -0.425292\n",
"4 2021-03-07 04:00:00 0.301357 0.421708\n",
".. ... ... ...\n",
"125 2021-12-03 20:00:00 0.263333 4.646417\n",
"126 2021-12-04 00:00:00 0.135840 5.825708\n",
"127 2021-12-04 12:00:00 0.000000 1.956125\n",
"128 2021-12-04 16:00:00 0.000000 -3.462333\n",
"129 2021-12-04 20:00:00 0.066533 -0.056708\n",
"\n",
"[130 rows x 3 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>vader_sentiment</th>\n",
" <th>Gain/Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2021-02-07 12:00:00</td>\n",
" <td>0.159488</td>\n",
" <td>-3.853458</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2021-02-07 16:00:00</td>\n",
" <td>0.206750</td>\n",
" <td>1.406792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2021-02-07 20:00:00</td>\n",
" <td>0.342183</td>\n",
" <td>-1.916208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021-02-08 00:00:00</td>\n",
" <td>0.392456</td>\n",
" <td>-0.425292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021-03-07 04:00:00</td>\n",
" <td>0.301357</td>\n",
" <td>0.421708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>2021-12-03 20:00:00</td>\n",
" <td>0.263333</td>\n",
" <td>4.646417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>2021-12-04 00:00:00</td>\n",
" <td>0.135840</td>\n",
" <td>5.825708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>2021-12-04 12:00:00</td>\n",
" <td>0.000000</td>\n",
" <td>1.956125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>2021-12-04 16:00:00</td>\n",
" <td>0.000000</td>\n",
" <td>-3.462333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>129</th>\n",
" <td>2021-12-04 20:00:00</td>\n",
" <td>0.066533</td>\n",
" <td>-0.056708</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>130 rows × 3 columns</p>\n",
"</div>"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"Results saved to ../data/sentiment.csv\n"
]
}
],
"execution_count": 66
"execution_count": 58
}
],
"metadata": {

View File

@ -1,7 +1,5 @@
import tweepy
import csv
import os
import time
import pandas as pd
# Twitter API credentials
API_KEY = "gDLV23ofFr7xEj38SkBHJeCMl"
@ -30,40 +28,10 @@ def fetch_tweets_with_hashtags(hashtags, max_results=10):
"text": tweet.text
})
except tweepy.TooManyRequests as e:
print("Rate limit reached. Waiting for 15 minutes...")
time.sleep(15 * 60)
return fetch_tweets_with_hashtags(hashtags, max_results)
print("Rate limit reached.")
tweets = pd.read_csv('../data/tweets_with_hashtags.csv')
return tweets
except Exception as e:
print(f"Error occurred while fetching tweets: {e}")
return tweets_data
def save_to_csv(data):
try:
current_dir = os.path.dirname(__file__)
data_dir = os.path.join(current_dir, "..", "data")
os.makedirs(data_dir, exist_ok=True)
filepath = os.path.join(data_dir, "tweets_with_hashtags.csv")
file_exists = os.path.isfile(filepath)
with open(filepath, "a", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["author_id", "created_at", "text"])
if not file_exists:
writer.writeheader()
writer.writerows(data)
print(f"Data successfully appended to {filepath}")
except Exception as e:
print(f"Error while saving data to CSV: {e}")
if __name__ == "__main__":
hashtags = ["btc", "bitcoin"]
max_results = 100
tweets = fetch_tweets_with_hashtags(hashtags, max_results)
save_to_csv(tweets)

View File

@ -0,0 +1,16 @@
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def model(sentiment):
sentiment_df = pd.read_csv('../data/sentiment.csv')
x = sentiment_df[['vader_sentiment']]
y = sentiment_df['Gain/Loss']
x_train = x
y_train = y
model = LinearRegression()
model.fit(x_train, y_train)
prediction = model.predict(sentiment[['vader_sentiment']])
return prediction

20
src/main.py Normal file
View File

@ -0,0 +1,20 @@
from fetch import *
from learning import *
from analysis import *
import os
if __name__ == "__main__":
hashtags = ["btc", "bitcoin"]
max_results = 10
tweets = fetch_tweets_with_hashtags(hashtags, max_results)
save_to_csv(tweets, '../data/new_tweets.csv')
tweets = pd.read_csv('../data/new_tweets.csv')
os.remove('../data/new_tweets.csv')
date_format(tweets)
sentiment = analyze_sentiment(tweets)
daily_sentiment = sentiment.groupby('timestamp')['vader_sentiment'].mean().reset_index()
x = daily_sentiment['vader_sentiment']
print(f"Tweets sentiment score: {x.iloc[0]}")
prediction = model(daily_sentiment)
print(f"Current BTC price:{fetch_bitcoin_price()}")
print(f"Price prediction in one hour:{fetch_bitcoin_price() + 4*prediction[0]}")