changes
This commit is contained in:
parent
b5b17859e9
commit
7ed72c8c04
@ -3,7 +3,7 @@
|
||||
BitSent to aplikacja do analizy nastrojów rynku kryptowalut na podstawie ostatnich tweetów z hashtagiem `#BTC`. Narzędzie wykorzystuje historyczne dane o tweetach oraz bieżące tweety, aby przewidzieć, czy nastrój rynku jest **wzrostowy** czy **spadkowy**.
|
||||
|
||||
## Funkcjonalności
|
||||
- Pobieranie najnowszych tweetów z hashtagiem `#BTC` za pomocą `snscrape`.
|
||||
- Pobieranie najnowszych tweetów z hashtagiem `#BTC` za pomocą `tweepy`.
|
||||
- Analiza sentymentu tweetów na podstawie wcześniej wytrenowanego modelu.
|
||||
- Wyświetlenie przewidywanego nastroju rynku.
|
||||
|
||||
@ -19,4 +19,8 @@
|
||||
1. Sklonuj repozytorium:
|
||||
```bash
|
||||
git clone https://github.com/yourusername/bitsent.git
|
||||
cd bitsent
|
||||
cd bitsent
|
||||
2. Upewnij się że python jest zainstalowany
|
||||
```python --version```
|
||||
3. Uruchom skrypt z lokalizacji pliku
|
||||
```python main.py```
|
BIN
data/.DS_Store
vendored
BIN
data/.DS_Store
vendored
Binary file not shown.
219
data/sentiment.csv
Normal file
219
data/sentiment.csv
Normal file
@ -0,0 +1,219 @@
|
||||
timestamp,vader_sentiment,Gain/Loss
|
||||
2021-02-07 10:00:00,-0.4588,-0.6788333333315677
|
||||
2021-02-07 12:00:00,0.21332,6.400249999991502
|
||||
2021-02-07 14:00:00,0.01019,1.3066666666636593
|
||||
2021-02-07 16:00:00,0.009415384615384612,0.7320000000036089
|
||||
2021-02-07 18:00:00,0.09711666666666667,-3.5455833333326154
|
||||
2021-02-07 20:00:00,0.15535714285714283,2.6428333333387855
|
||||
2021-02-07 22:00:00,0.5515625,1.1895833333328483
|
||||
2021-02-08 00:00:00,0.20713750000000003,-0.19383333333826158
|
||||
2021-03-07 02:00:00,0.1654875,0.5811666666704696
|
||||
2021-03-07 04:00:00,0.551075,0.3430000000007567
|
||||
2021-03-07 06:00:00,0.4704,-1.1864166666637175
|
||||
2021-03-07 08:00:00,0.1485625,-1.993833333333896
|
||||
2021-03-07 10:00:00,-0.1795888888888889,-0.22058333332824986
|
||||
2021-03-07 12:00:00,0.2149857142857143,7.492249999995693
|
||||
2021-03-07 14:00:00,0.56125,2.582249999999476
|
||||
2021-03-07 16:00:00,0.4662307692307692,-1.4725000000034925
|
||||
2021-03-07 18:00:00,0.4016777777777778,-2.1731666666601086
|
||||
2021-03-07 20:00:00,-0.030000000000000002,3.4504999999990105
|
||||
2021-03-07 22:00:00,0.16445,-4.590250000001106
|
||||
2021-03-08 00:00:00,0.4204,-0.09775000000081491
|
||||
2021-04-07 02:00:00,0.38886666666666664,-0.3353333333361661
|
||||
2021-04-07 04:00:00,0.3067,-1.1171666666705278
|
||||
2021-04-07 06:00:00,0.3860428571428572,2.4347500000003492
|
||||
2021-04-07 08:00:00,0.4668666666666667,6.90849999999773
|
||||
2021-04-07 10:00:00,0.1549,-3.6265000000057626
|
||||
2021-04-07 12:00:00,0.16542857142857142,-2.6046666666588862
|
||||
2021-04-07 14:00:00,0.2331,-10.873500000001513
|
||||
2021-04-07 16:00:00,0.46502,4.004333333337854
|
||||
2021-04-07 18:00:00,0.4878166666666666,-2.458166666670877
|
||||
2021-04-07 20:00:00,0.3259666666666667,-2.629416666663019
|
||||
2021-04-07 22:00:00,0.2620625,2.2340000000040163
|
||||
2021-04-08 00:00:00,0.779925,2.775166666673613
|
||||
2021-04-08 22:00:00,-0.5245,2.317250000000058
|
||||
2021-04-09 00:00:00,0.434575,-0.5196666666670353
|
||||
2021-05-02 14:00:00,-0.11995000000000003,10.756000000001222
|
||||
2021-05-02 20:00:00,0.0,1.5450000000055297
|
||||
2021-05-03 00:00:00,0.0,0.12341666666907258
|
||||
2021-05-04 16:00:00,0.4404,-6.38216666666267
|
||||
2021-05-04 18:00:00,0.0,-13.469833333336283
|
||||
2021-05-04 22:00:00,0.0,1.6697500000009313
|
||||
2021-05-05 00:00:00,0.1366,-4.916749999996682
|
||||
2021-05-07 02:00:00,0.7772333333333333,-0.12258333333011251
|
||||
2021-05-07 04:00:00,0.2733666666666667,-0.992500000007567
|
||||
2021-05-07 06:00:00,0.16635,-3.8770000000004075
|
||||
2021-05-07 08:00:00,0.3377666666666667,2.7214999999996508
|
||||
2021-05-07 10:00:00,0.44745999999999997,-3.0332500000004075
|
||||
2021-05-07 12:00:00,0.09823333333333334,3.53041666666104
|
||||
2021-05-07 14:00:00,0.17858000000000002,-3.262749999994412
|
||||
2021-05-07 16:00:00,0.39808571428571426,5.318583333340939
|
||||
2021-05-07 18:00:00,0.47093333333333337,5.213916666667501
|
||||
2021-05-07 20:00:00,0.4985555555555556,4.666250000002037
|
||||
2021-05-07 22:00:00,0.4201,-4.9601666666640085
|
||||
2021-05-08 00:00:00,0.33936,-2.767500000009022
|
||||
2021-05-08 02:00:00,0.65265,1.1218333333308692
|
||||
2021-05-08 04:00:00,0.25,2.6674166666634846
|
||||
2021-05-08 06:00:00,0.6531,3.3850000000020373
|
||||
2021-05-08 08:00:00,0.0343,2.966166666665231
|
||||
2021-05-08 10:00:00,0.1341,6.882833333336748
|
||||
2021-05-08 12:00:00,0.2842,1.922166666656267
|
||||
2021-05-08 14:00:00,0.09386666666666665,-2.432749999999942
|
||||
2021-05-08 16:00:00,0.16463333333333333,1.1237500000061118
|
||||
2021-05-08 18:00:00,0.43216666666666664,-13.482499999998254
|
||||
2021-05-08 20:00:00,0.10203999999999999,7.5151666666642996
|
||||
2021-05-08 22:00:00,0.11926666666666663,1.2554166666668607
|
||||
2021-05-09 00:00:00,0.3818,-2.3267500000001746
|
||||
2021-06-02 02:00:00,0.0,4.328083333326504
|
||||
2021-06-02 06:00:00,-0.4019,3.2506666666668025
|
||||
2021-06-02 08:00:00,0.0,4.16608333332988
|
||||
2021-06-02 10:00:00,0.5411,5.885833333326445
|
||||
2021-06-02 12:00:00,0.0,2.295749999997497
|
||||
2021-06-02 16:00:00,0.1806,5.274499999999534
|
||||
2021-06-02 20:00:00,0.4569,-0.5001666666576057
|
||||
2021-06-02 22:00:00,0.0,0.4448333333275514
|
||||
2021-06-03 00:00:00,0.2233,1.529916666666395
|
||||
2021-06-04 02:00:00,0.18036666666666668,6.992166666670528
|
||||
2021-06-04 08:00:00,0.0,-7.88749999999709
|
||||
2021-06-04 14:00:00,0.0,-3.3074999999953434
|
||||
2021-06-04 16:00:00,-0.11315,1.0437500000043656
|
||||
2021-06-04 18:00:00,-0.10313333333333334,4.11699999999837
|
||||
2021-06-04 20:00:00,0.0,1.5028333333320916
|
||||
2021-06-05 00:00:00,0.6474500000000001,1.6735833333295886
|
||||
2021-06-08 02:00:00,0.8519,-5.279750000001513
|
||||
2021-06-08 04:00:00,0.03889999999999999,2.045250000002852
|
||||
2021-06-08 06:00:00,-0.212,-4.252083333332848
|
||||
2021-06-08 08:00:00,0.0421,1.2044166666673846
|
||||
2021-06-08 10:00:00,0.4404,-1.4051666666637175
|
||||
2021-06-08 12:00:00,-0.27875000000000005,-1.4900833333304035
|
||||
2021-06-08 14:00:00,0.12541250000000004,1.7237500000046566
|
||||
2021-06-08 16:00:00,0.13175,-7.653499999993073
|
||||
2021-06-08 18:00:00,0.42533333333333334,-5.440916666670091
|
||||
2021-06-08 20:00:00,0.38624444444444445,2.150583333335817
|
||||
2021-06-08 22:00:00,0.10626666666666668,4.075833333332412
|
||||
2021-06-09 00:00:00,0.0,4.534416666661855
|
||||
2021-07-02 02:00:00,0.148,2.257833333336748
|
||||
2021-07-02 04:00:00,-0.1531,-0.28416666667180834
|
||||
2021-07-02 06:00:00,0.2764,-3.5486666666693054
|
||||
2021-07-02 14:00:00,0.3094,0.11041666667006211
|
||||
2021-07-02 16:00:00,-0.586,5.521916666664765
|
||||
2021-07-04 02:00:00,-0.3818,1.9293333333334886
|
||||
2021-07-04 04:00:00,0.24209999999999998,3.0608333333366318
|
||||
2021-07-04 08:00:00,0.5994,3.5314999999973224
|
||||
2021-07-04 10:00:00,0.38775,1.960416666668607
|
||||
2021-07-04 12:00:00,0.3818,-0.5524166666655219
|
||||
2021-07-04 14:00:00,0.8067,-1.1629166666607489
|
||||
2021-07-04 16:00:00,-0.20095,-1.7917499999966822
|
||||
2021-07-04 18:00:00,0.37565000000000004,0.6822499999980209
|
||||
2021-07-04 20:00:00,0.1206,1.4725833333286573
|
||||
2021-07-05 00:00:00,0.0,3.2391666666590027
|
||||
2021-07-08 02:00:00,0.3825,-5.219250000001921
|
||||
2021-07-08 04:00:00,0.60815,-3.7955833333326154
|
||||
2021-07-08 06:00:00,0.0,2.691416666668374
|
||||
2021-07-08 08:00:00,0.1779,-3.7397500000006403
|
||||
2021-07-08 10:00:00,-0.049671428571428575,-2.146083333333081
|
||||
2021-07-08 12:00:00,0.4199,-2.81741666666494
|
||||
2021-07-08 14:00:00,0.19883125000000001,1.0437500000007276
|
||||
2021-07-08 16:00:00,0.1419,-1.3783333333340124
|
||||
2021-07-08 18:00:00,0.11880000000000002,4.8664166666640085
|
||||
2021-07-08 20:00:00,0.3992,-1.0279999999984284
|
||||
2021-07-08 22:00:00,0.48712,-1.814666666672565
|
||||
2021-07-09 00:00:00,0.2766,-2.90625
|
||||
2021-08-02 02:00:00,0.5411,-11.591249999990396
|
||||
2021-08-02 08:00:00,0.5411,0.4622499999968568
|
||||
2021-08-02 12:00:00,0.0,-6.393333333326154
|
||||
2021-08-02 14:00:00,-0.3382,1.262333333339484
|
||||
2021-08-02 16:00:00,0.2732,-0.7651666666642996
|
||||
2021-08-02 18:00:00,-0.47209999999999996,-1.2415000000037253
|
||||
2021-08-02 22:00:00,0.6249,-4.872749999994994
|
||||
2021-08-03 00:00:00,0.7040500000000001,-1.4337500000110595
|
||||
2021-08-04 02:00:00,0.0,-9.005250000001979
|
||||
2021-08-04 04:00:00,0.0,-3.34641666666721
|
||||
2021-08-04 06:00:00,0.05382,-5.943916666663426
|
||||
2021-08-04 08:00:00,0.0,0.0850833333315677
|
||||
2021-08-04 10:00:00,0.0,-3.796166666666977
|
||||
2021-08-04 12:00:00,0.4939,-2.1774999999979627
|
||||
2021-08-04 14:00:00,0.0,-1.9329166666648234
|
||||
2021-08-04 20:00:00,0.0073999999999999995,0.26041666666424135
|
||||
2021-08-04 22:00:00,0.0,3.473999999994703
|
||||
2021-08-05 00:00:00,0.0,-0.08133333332807524
|
||||
2021-08-08 02:00:00,0.08147777777777777,9.928249999997206
|
||||
2021-08-08 04:00:00,0.3189888888888889,-1.5674999999973807
|
||||
2021-08-08 06:00:00,0.3427142857142857,2.9571666666670353
|
||||
2021-08-08 08:00:00,0.3369375,8.09100000000035
|
||||
2021-08-08 10:00:00,0.23824444444444445,-2.268083333328832
|
||||
2021-08-08 12:00:00,0.4447181818181818,-3.1174166666605743
|
||||
2021-08-08 14:00:00,0.26695,-0.7463333333362243
|
||||
2021-08-08 16:00:00,0.48765,2.492999999994936
|
||||
2021-08-08 18:00:00,0.4374909090909091,-2.580166666666628
|
||||
2021-08-08 20:00:00,0.2960714285714286,-3.052000000003318
|
||||
2021-08-08 22:00:00,0.8264,2.9570833333418705
|
||||
2021-08-09 00:00:00,0.23571,7.135500000003958
|
||||
2021-09-02 08:00:00,-0.4451,1.1718333333337796
|
||||
2021-09-02 10:00:00,0.4588,0.37950000000273576
|
||||
2021-09-02 16:00:00,-0.296,-1.260083333334478
|
||||
2021-09-02 20:00:00,-0.14049999999999999,2.1992499999978463
|
||||
2021-09-03 00:00:00,0.0,4.547500000000582
|
||||
2021-09-04 02:00:00,-0.4329,3.0987499999973807
|
||||
2021-09-04 04:00:00,0.0,-1.6913333333359333
|
||||
2021-09-04 10:00:00,0.27055,-0.7534166666664532
|
||||
2021-09-04 12:00:00,0.0,2.776916666669422
|
||||
2021-09-04 14:00:00,0.12319999999999999,-3.861333333326911
|
||||
2021-09-04 16:00:00,0.0,1.6710000000093714
|
||||
2021-09-04 18:00:00,0.07433333333333335,-4.692749999994703
|
||||
2021-09-04 20:00:00,0.4885,0.19258333333709743
|
||||
2021-09-04 22:00:00,0.5083,0.4804166666654055
|
||||
2021-09-08 02:00:00,0.24712222222222222,4.934333333330869
|
||||
2021-09-08 04:00:00,0.42169999999999996,1.886000000005879
|
||||
2021-09-08 06:00:00,0.099675,-7.976333333324874
|
||||
2021-09-08 08:00:00,0.4492416666666667,-5.861250000001746
|
||||
2021-09-08 10:00:00,0.47060434782608696,-9.397583333331568
|
||||
2021-09-08 12:00:00,0.2766888888888889,12.831749999997555
|
||||
2021-09-08 14:00:00,0.3398764705882353,3.402249999999185
|
||||
2021-09-08 16:00:00,0.483352,-2.9609999999956926
|
||||
2021-09-08 18:00:00,0.3543045454545454,0.34724999999889405
|
||||
2021-09-08 20:00:00,0.58472,1.786083333332499
|
||||
2021-09-08 22:00:00,0.35475625,5.9135000000023865
|
||||
2021-09-09 00:00:00,0.21288333333333334,-3.1002499999958673
|
||||
2021-10-02 02:00:00,0.0,-0.22308333333785413
|
||||
2021-10-02 04:00:00,0.5719,-0.6001666666634264
|
||||
2021-10-02 06:00:00,0.0,0.16516666666575475
|
||||
2021-10-02 08:00:00,0.296,2.641250000000582
|
||||
2021-10-02 16:00:00,-0.5093,1.5320833333316841
|
||||
2021-10-02 18:00:00,0.0,1.3044999999983702
|
||||
2021-10-02 22:00:00,0.0,3.1049166666707606
|
||||
2021-10-03 00:00:00,0.4404,-0.6039999999920838
|
||||
2021-10-04 04:00:00,0.0,-4.226499999997031
|
||||
2021-10-04 08:00:00,0.41965,3.0988333333370974
|
||||
2021-10-04 10:00:00,0.34,1.2683333333334303
|
||||
2021-10-04 12:00:00,0.09,0.37924999999813735
|
||||
2021-10-04 16:00:00,0.18494000000000002,4.97591666666267
|
||||
2021-10-04 18:00:00,-0.1027,0.11441666666360106
|
||||
2021-10-04 20:00:00,0.0,7.5363333333298215
|
||||
2021-10-04 22:00:00,0.6124,-1.9645000000018626
|
||||
2021-10-05 00:00:00,-0.4939,-1.784749999998894
|
||||
2021-10-09 08:00:00,0.6354000000000001,2.367500000000291
|
||||
2021-10-09 10:00:00,0.418175,2.1697500000009313
|
||||
2021-10-09 12:00:00,0.26295,-0.12108333333162591
|
||||
2021-10-09 14:00:00,0.36776666666666663,0.3730833333393093
|
||||
2021-10-09 16:00:00,0.31528333333333336,4.474083333341696
|
||||
2021-10-09 18:00:00,0.16872307692307692,-1.6348333333298797
|
||||
2021-10-09 20:00:00,0.25524444444444444,-1.0650833333347691
|
||||
2021-10-09 22:00:00,0.4478166666666667,-1.1366666666581295
|
||||
2021-10-10 00:00:00,0.29610000000000003,1.2497500000026776
|
||||
2021-11-04 00:00:00,0.3182,-1.052416666665522
|
||||
2021-11-04 12:00:00,0.0,-0.0565000000060536
|
||||
2021-11-04 20:00:00,0.4019,0.5690833333283081
|
||||
2021-11-05 00:00:00,0.5411,1.189250000003085
|
||||
2021-12-03 06:00:00,0.0,-0.6670833333264454
|
||||
2021-12-03 08:00:00,0.6124,2.875500000001921
|
||||
2021-12-03 12:00:00,0.23835,0.46766666667099344
|
||||
2021-12-03 14:00:00,0.6486,4.723916666662262
|
||||
2021-12-03 16:00:00,0.0,2.0446666666684905
|
||||
2021-12-03 18:00:00,0.2023,-3.9345833333354676
|
||||
2021-12-03 20:00:00,0.5574,-9.742916666669771
|
||||
2021-12-04 00:00:00,0.35645000000000004,-8.961499999997613
|
||||
2021-12-04 12:00:00,0.2985,1.570833333331393
|
||||
2021-12-04 14:00:00,0.0,-5.483083333332615
|
||||
2021-12-04 16:00:00,0.0,-3.8566666666665697
|
||||
2021-12-04 20:00:00,0.0,-4.480749999995169
|
|
@ -1,4 +1,5 @@
|
||||
tweepy~=4.14.0
|
||||
numpy~=2.1.3
|
||||
pandas~=2.2.3
|
||||
vaderSentiment~=3.3.2
|
||||
vaderSentiment~=3.3.2
|
||||
scikit-learn~=1.6.1
|
Binary file not shown.
BIN
src/__pycache__/fetch.cpython-311.pyc
Normal file
BIN
src/__pycache__/fetch.cpython-311.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/learning.cpython-311.pyc
Normal file
BIN
src/__pycache__/learning.cpython-311.pyc
Normal file
Binary file not shown.
@ -1,18 +1,33 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
import requests
|
||||
|
||||
|
||||
def fetch_bitcoin_price():
|
||||
url = "https://api.coingecko.com/api/v3/simple/price"
|
||||
params = {
|
||||
"ids": "bitcoin",
|
||||
"vs_currencies": "usd"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data['bitcoin']['usd']
|
||||
except requests.RequestException as e:
|
||||
print(f"Error fetching Bitcoin price: {e}")
|
||||
return None
|
||||
|
||||
def load_csv(file_path):
|
||||
df = pd.read_csv(file_path)
|
||||
print(f"CSV loaded. Columns: {df.columns}")
|
||||
return df
|
||||
|
||||
|
||||
def get_sentiment(text):
|
||||
analyzer = SentimentIntensityAnalyzer()
|
||||
sentiment = analyzer.polarity_scores(text)
|
||||
return sentiment['compound'] # Compound score represents overall sentiment
|
||||
return sentiment['compound']
|
||||
|
||||
|
||||
def classify_sentiment(compound_score):
|
||||
@ -28,8 +43,36 @@ def analyze_sentiment(df):
|
||||
df['vader_sentiment_class'] = df['vader_sentiment'].apply(classify_sentiment)
|
||||
return df
|
||||
|
||||
|
||||
def save_to_csv(df, output_path):
|
||||
df.to_csv(output_path, index=False)
|
||||
print(f"Results saved to {output_path}")
|
||||
|
||||
def date_format(df):
|
||||
df.drop(columns='author_id', inplace=True)
|
||||
df['timestamp'] = pd.to_datetime(df['created_at'])
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%d-%m %H:%M:%S')
|
||||
df['timestamp'] = df['timestamp'].dt.ceil('2h')
|
||||
df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
return df
|
||||
|
||||
def btc_data_(date):
|
||||
btc_data = pd.read_csv('../data/bitstamp.csv')
|
||||
btc_data['timestamp'] = pd.to_datetime(btc_data['Timestamp'], unit='s')
|
||||
btc_data.set_index('timestamp', inplace=True)
|
||||
btc_data.drop(columns=['Timestamp'], inplace=True)
|
||||
btc_data = btc_data.resample('2h').mean()
|
||||
filtered_btc_data = btc_data.loc[date:date]
|
||||
columns_to_drop_btc = [
|
||||
"High", "Low"
|
||||
]
|
||||
filtered_btc_data = filtered_btc_data.drop(columns=columns_to_drop_btc)
|
||||
filtered_btc_data['Gain/Loss'] = filtered_btc_data['Close'] - filtered_btc_data['Open']
|
||||
filtered_btc_data.reset_index(inplace=True)
|
||||
return filtered_btc_data
|
||||
|
||||
def merge(btc, tweets):
|
||||
btc['timestamp'] = pd.to_datetime(btc['timestamp'])
|
||||
tweets['timestamp'] = pd.to_datetime(tweets['timestamp'])
|
||||
result = pd.merge(btc, tweets, on='timestamp', how='left')
|
||||
result.dropna(subset=['text'], inplace=True)
|
||||
return result
|
@ -6,14 +6,14 @@
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:06:55.821536Z",
|
||||
"start_time": "2025-01-14T17:06:48.631852Z"
|
||||
"end_time": "2025-01-15T11:14:44.304056Z",
|
||||
"start_time": "2025-01-15T11:14:36.737765Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from src.analysis import analyze_sentiment\n",
|
||||
"from src.analysis import analyze_sentiment, save_to_csv\n",
|
||||
"\n",
|
||||
"btc_tweets = pd.read_csv('../data/Bitcoin_tweets.csv')\n",
|
||||
"btc_data = pd.read_csv('../data/bitstamp.csv')"
|
||||
@ -23,18 +23,18 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/7p/w3y96f6s701d9vv4h5vptdz00000gn/T/ipykernel_14923/4274364300.py:5: DtypeWarning: Columns (5,6,7,12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||||
"/var/folders/7p/w3y96f6s701d9vv4h5vptdz00000gn/T/ipykernel_19189/1429944408.py:5: DtypeWarning: Columns (5,6,7,12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||||
" btc_tweets = pd.read_csv('../data/Bitcoin_tweets.csv')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 31
|
||||
"execution_count": 49
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:06:59.590206Z",
|
||||
"start_time": "2025-01-14T17:06:58.392209Z"
|
||||
"end_time": "2025-01-15T11:14:50.559541Z",
|
||||
"start_time": "2025-01-15T11:14:49.318953Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -44,13 +44,13 @@
|
||||
],
|
||||
"id": "67e2290f7b4ac803",
|
||||
"outputs": [],
|
||||
"execution_count": 32
|
||||
"execution_count": 50
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:07:02.514048Z",
|
||||
"start_time": "2025-01-14T17:07:01.025617Z"
|
||||
"end_time": "2025-01-15T11:14:54.134908Z",
|
||||
"start_time": "2025-01-15T11:14:52.720595Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -61,26 +61,26 @@
|
||||
],
|
||||
"id": "b7cd551da68b0193",
|
||||
"outputs": [],
|
||||
"execution_count": 33
|
||||
"execution_count": 51
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:07:04.510210Z",
|
||||
"start_time": "2025-01-14T17:07:03.912184Z"
|
||||
"end_time": "2025-01-15T11:14:55.478557Z",
|
||||
"start_time": "2025-01-15T11:14:54.885194Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "btc_data = btc_data.resample('4h').mean()",
|
||||
"source": "btc_data = btc_data.resample('2h').mean()",
|
||||
"id": "89ca0634382ae0a8",
|
||||
"outputs": [],
|
||||
"execution_count": 34
|
||||
"execution_count": 52
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:07:07.786155Z",
|
||||
"start_time": "2025-01-14T17:07:06.155961Z"
|
||||
"end_time": "2025-01-15T11:14:58.284073Z",
|
||||
"start_time": "2025-01-15T11:14:56.702218Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -99,483 +99,68 @@
|
||||
],
|
||||
"id": "50dcf3fac2b8f0ef",
|
||||
"outputs": [],
|
||||
"execution_count": 35
|
||||
"execution_count": 53
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T20:19:53.479883Z",
|
||||
"start_time": "2025-01-14T20:19:53.434390Z"
|
||||
"end_time": "2025-01-15T11:15:23.983659Z",
|
||||
"start_time": "2025-01-15T11:15:23.973039Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"start_date = \"2021-02-07\"\n",
|
||||
"end_date = \"2021-12-04\"\n",
|
||||
"start_date = \"2021-01-01\"\n",
|
||||
"end_date = \"2021-12-29\"\n",
|
||||
"\n",
|
||||
"filtered_btc_data = btc_data.loc[start_date:end_date]\n",
|
||||
"columns_to_drop_btc = [\n",
|
||||
" \"High\", \"Low\"\n",
|
||||
"]\n",
|
||||
"filtered_btc_data = filtered_btc_data.drop(columns=columns_to_drop_btc)\n",
|
||||
"filtered_btc_data['Gain/Loss'] = filtered_btc_data['Open'] - filtered_btc_data['Close']\n",
|
||||
"\n",
|
||||
"print(filtered_btc_data)"
|
||||
"filtered_btc_data['Gain/Loss'] = filtered_btc_data['Close'] - filtered_btc_data['Open']\n"
|
||||
],
|
||||
"id": "b06730669d1a9264",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Open Close Volume Gain/Loss\n",
|
||||
"timestamp \n",
|
||||
"2021-02-07 00:00:00 40165.899167 40165.286000 4.759081 0.613167\n",
|
||||
"2021-02-07 04:00:00 39218.887125 39213.334292 6.409186 5.552833\n",
|
||||
"2021-02-07 08:00:00 38588.391458 38588.979458 2.909470 -0.588000\n",
|
||||
"2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 -3.853458\n",
|
||||
"2021-02-07 16:00:00 38829.270625 38827.863833 4.306438 1.406792\n",
|
||||
"... ... ... ... ...\n",
|
||||
"2021-12-04 04:00:00 53281.233417 53281.023333 0.929084 0.210083\n",
|
||||
"2021-12-04 08:00:00 50036.703292 50009.813667 18.521395 26.889625\n",
|
||||
"2021-12-04 12:00:00 47438.188292 47436.232167 9.826317 1.956125\n",
|
||||
"2021-12-04 16:00:00 47215.739500 47219.201833 5.740048 -3.462333\n",
|
||||
"2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 -0.056708\n",
|
||||
"\n",
|
||||
"[1806 rows x 4 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 64
|
||||
"outputs": [],
|
||||
"execution_count": 54
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:07:12.251364Z",
|
||||
"start_time": "2025-01-14T17:07:12.172844Z"
|
||||
"end_time": "2025-01-15T11:15:25.778595Z",
|
||||
"start_time": "2025-01-15T11:15:25.745985Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"btc_tweets_cleaned['timestamp'] = btc_tweets_cleaned['date'].dt.ceil('4h')\n",
|
||||
"btc_tweets_cleaned = btc_tweets_cleaned.sort_values('timestamp')\n",
|
||||
"btc_tweets_cleaned"
|
||||
"btc_tweets_cleaned['timestamp'] = btc_tweets_cleaned['date'].dt.ceil('2h')\n",
|
||||
"btc_tweets_cleaned = btc_tweets_cleaned.sort_values('timestamp')"
|
||||
],
|
||||
"id": "e2cbd50d300a0e9f",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" user_followers date \\\n",
|
||||
"344135 30925.0 2021-02-07 11:10:55 \n",
|
||||
"344628 59242.0 2021-02-07 10:45:42 \n",
|
||||
"344627 33.0 2021-02-07 10:45:48 \n",
|
||||
"344626 52.0 2021-02-07 10:45:52 \n",
|
||||
"344625 22.0 2021-02-07 10:45:53 \n",
|
||||
"... ... ... \n",
|
||||
"48958 2155.0 2021-12-04 16:53:52 \n",
|
||||
"48957 3605.0 2021-12-04 16:54:04 \n",
|
||||
"48956 9.0 2021-12-04 16:54:44 \n",
|
||||
"48962 169.0 2021-12-04 16:51:37 \n",
|
||||
"48931 531.0 2021-12-04 17:00:42 \n",
|
||||
"\n",
|
||||
" text \\\n",
|
||||
"344135 Yoh my friends, I know you all love buying and... \n",
|
||||
"344628 Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle... \n",
|
||||
"344627 @MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR... \n",
|
||||
"344626 New distribution: MooniWarPrizes: 50.000 MWar ... \n",
|
||||
"344625 #China #elsalvadorbitcoin #Nigeria #Bitcoin #P... \n",
|
||||
"... ... \n",
|
||||
"48958 $MYST #MYST #BTC #Bitcoin \n",
|
||||
"48957 Team Bankroller on Telegram25 BNKRX prize for ... \n",
|
||||
"48956 I set up my @cryptocom account to buy $100 #bt... \n",
|
||||
"48962 Riot Blockchain Bitcoin production jumps 80% o... \n",
|
||||
"48931 An ECB Board Member Attacks Bitcoin, Says It '... \n",
|
||||
"\n",
|
||||
" hashtags timestamp \n",
|
||||
"344135 ['Crypto', 'Bitcoin', 'BZExchange'] 2021-02-07 12:00:00 \n",
|
||||
"344628 ['bitcoin'] 2021-02-07 12:00:00 \n",
|
||||
"344627 ['Bitcoin'] 2021-02-07 12:00:00 \n",
|
||||
"344626 ['Airdrop', 'Cryptocurrency', 'Free', 'Income'... 2021-02-07 12:00:00 \n",
|
||||
"344625 ['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit... 2021-02-07 12:00:00 \n",
|
||||
"... ... ... \n",
|
||||
"48958 ['MYST', 'BTC', 'Bitcoin'] 2021-12-04 20:00:00 \n",
|
||||
"48957 ['TRX', 'TRON'] 2021-12-04 20:00:00 \n",
|
||||
"48956 ['btc', 'cro', 'crypto', 'bitcoin'] 2021-12-04 20:00:00 \n",
|
||||
"48962 ['bitcoin'] 2021-12-04 20:00:00 \n",
|
||||
"48931 NaN 2021-12-04 20:00:00 \n",
|
||||
"\n",
|
||||
"[297728 rows x 5 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>user_followers</th>\n",
|
||||
" <th>date</th>\n",
|
||||
" <th>text</th>\n",
|
||||
" <th>hashtags</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>344135</th>\n",
|
||||
" <td>30925.0</td>\n",
|
||||
" <td>2021-02-07 11:10:55</td>\n",
|
||||
" <td>Yoh my friends, I know you all love buying and...</td>\n",
|
||||
" <td>['Crypto', 'Bitcoin', 'BZExchange']</td>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>344628</th>\n",
|
||||
" <td>59242.0</td>\n",
|
||||
" <td>2021-02-07 10:45:42</td>\n",
|
||||
" <td>Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle...</td>\n",
|
||||
" <td>['bitcoin']</td>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>344627</th>\n",
|
||||
" <td>33.0</td>\n",
|
||||
" <td>2021-02-07 10:45:48</td>\n",
|
||||
" <td>@MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR...</td>\n",
|
||||
" <td>['Bitcoin']</td>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>344626</th>\n",
|
||||
" <td>52.0</td>\n",
|
||||
" <td>2021-02-07 10:45:52</td>\n",
|
||||
" <td>New distribution: MooniWarPrizes: 50.000 MWar ...</td>\n",
|
||||
" <td>['Airdrop', 'Cryptocurrency', 'Free', 'Income'...</td>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>344625</th>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>2021-02-07 10:45:53</td>\n",
|
||||
" <td>#China #elsalvadorbitcoin #Nigeria #Bitcoin #P...</td>\n",
|
||||
" <td>['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit...</td>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>48958</th>\n",
|
||||
" <td>2155.0</td>\n",
|
||||
" <td>2021-12-04 16:53:52</td>\n",
|
||||
" <td>$MYST #MYST #BTC #Bitcoin</td>\n",
|
||||
" <td>['MYST', 'BTC', 'Bitcoin']</td>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>48957</th>\n",
|
||||
" <td>3605.0</td>\n",
|
||||
" <td>2021-12-04 16:54:04</td>\n",
|
||||
" <td>Team Bankroller on Telegram25 BNKRX prize for ...</td>\n",
|
||||
" <td>['TRX', 'TRON']</td>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>48956</th>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>2021-12-04 16:54:44</td>\n",
|
||||
" <td>I set up my @cryptocom account to buy $100 #bt...</td>\n",
|
||||
" <td>['btc', 'cro', 'crypto', 'bitcoin']</td>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>48962</th>\n",
|
||||
" <td>169.0</td>\n",
|
||||
" <td>2021-12-04 16:51:37</td>\n",
|
||||
" <td>Riot Blockchain Bitcoin production jumps 80% o...</td>\n",
|
||||
" <td>['bitcoin']</td>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>48931</th>\n",
|
||||
" <td>531.0</td>\n",
|
||||
" <td>2021-12-04 17:00:42</td>\n",
|
||||
" <td>An ECB Board Member Attacks Bitcoin, Says It '...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>297728 rows × 5 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 37
|
||||
"outputs": [],
|
||||
"execution_count": 55
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T17:07:14.783610Z",
|
||||
"start_time": "2025-01-14T17:07:14.687756Z"
|
||||
"end_time": "2025-01-15T11:15:27.471413Z",
|
||||
"start_time": "2025-01-15T11:15:27.291684Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"result = pd.merge(filtered_btc_data, btc_tweets_cleaned, on='timestamp', how='left')\n",
|
||||
"result.dropna(subset=['text'], inplace=True)\n",
|
||||
"result"
|
||||
"result.dropna(subset=['text'], inplace=True)"
|
||||
],
|
||||
"id": "66cff75a7a827a8d",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" timestamp Open Close Volume \\\n",
|
||||
"3 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
|
||||
"4 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
|
||||
"5 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
|
||||
"6 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
|
||||
"7 2021-02-07 12:00:00 39308.021458 39311.874917 3.499237 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"299375 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
|
||||
"299376 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
|
||||
"299377 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
|
||||
"299378 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
|
||||
"299379 2021-12-04 20:00:00 48301.942792 48301.999500 3.640812 \n",
|
||||
"\n",
|
||||
" user_followers date \\\n",
|
||||
"3 30925.0 2021-02-07 11:10:55 \n",
|
||||
"4 59242.0 2021-02-07 10:45:42 \n",
|
||||
"5 33.0 2021-02-07 10:45:48 \n",
|
||||
"6 52.0 2021-02-07 10:45:52 \n",
|
||||
"7 22.0 2021-02-07 10:45:53 \n",
|
||||
"... ... ... \n",
|
||||
"299375 2155.0 2021-12-04 16:53:52 \n",
|
||||
"299376 3605.0 2021-12-04 16:54:04 \n",
|
||||
"299377 9.0 2021-12-04 16:54:44 \n",
|
||||
"299378 169.0 2021-12-04 16:51:37 \n",
|
||||
"299379 531.0 2021-12-04 17:00:42 \n",
|
||||
"\n",
|
||||
" text \\\n",
|
||||
"3 Yoh my friends, I know you all love buying and... \n",
|
||||
"4 Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle... \n",
|
||||
"5 @MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR... \n",
|
||||
"6 New distribution: MooniWarPrizes: 50.000 MWar ... \n",
|
||||
"7 #China #elsalvadorbitcoin #Nigeria #Bitcoin #P... \n",
|
||||
"... ... \n",
|
||||
"299375 $MYST #MYST #BTC #Bitcoin \n",
|
||||
"299376 Team Bankroller on Telegram25 BNKRX prize for ... \n",
|
||||
"299377 I set up my @cryptocom account to buy $100 #bt... \n",
|
||||
"299378 Riot Blockchain Bitcoin production jumps 80% o... \n",
|
||||
"299379 An ECB Board Member Attacks Bitcoin, Says It '... \n",
|
||||
"\n",
|
||||
" hashtags \n",
|
||||
"3 ['Crypto', 'Bitcoin', 'BZExchange'] \n",
|
||||
"4 ['bitcoin'] \n",
|
||||
"5 ['Bitcoin'] \n",
|
||||
"6 ['Airdrop', 'Cryptocurrency', 'Free', 'Income'... \n",
|
||||
"7 ['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit... \n",
|
||||
"... ... \n",
|
||||
"299375 ['MYST', 'BTC', 'Bitcoin'] \n",
|
||||
"299376 ['TRX', 'TRON'] \n",
|
||||
"299377 ['btc', 'cro', 'crypto', 'bitcoin'] \n",
|
||||
"299378 ['bitcoin'] \n",
|
||||
"299379 NaN \n",
|
||||
"\n",
|
||||
"[297728 rows x 8 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" <th>Open</th>\n",
|
||||
" <th>Close</th>\n",
|
||||
" <th>Volume</th>\n",
|
||||
" <th>user_followers</th>\n",
|
||||
" <th>date</th>\n",
|
||||
" <th>text</th>\n",
|
||||
" <th>hashtags</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" <td>39308.021458</td>\n",
|
||||
" <td>39311.874917</td>\n",
|
||||
" <td>3.499237</td>\n",
|
||||
" <td>30925.0</td>\n",
|
||||
" <td>2021-02-07 11:10:55</td>\n",
|
||||
" <td>Yoh my friends, I know you all love buying and...</td>\n",
|
||||
" <td>['Crypto', 'Bitcoin', 'BZExchange']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" <td>39308.021458</td>\n",
|
||||
" <td>39311.874917</td>\n",
|
||||
" <td>3.499237</td>\n",
|
||||
" <td>59242.0</td>\n",
|
||||
" <td>2021-02-07 10:45:42</td>\n",
|
||||
" <td>Nfp #bitcoin buy for BITSTAMP:BTCUSD by Keedle...</td>\n",
|
||||
" <td>['bitcoin']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" <td>39308.021458</td>\n",
|
||||
" <td>39311.874917</td>\n",
|
||||
" <td>3.499237</td>\n",
|
||||
" <td>33.0</td>\n",
|
||||
" <td>2021-02-07 10:45:48</td>\n",
|
||||
" <td>@MMCrypto #Bitcoin WHY TODAY IS THE MOST IMPOR...</td>\n",
|
||||
" <td>['Bitcoin']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" <td>39308.021458</td>\n",
|
||||
" <td>39311.874917</td>\n",
|
||||
" <td>3.499237</td>\n",
|
||||
" <td>52.0</td>\n",
|
||||
" <td>2021-02-07 10:45:52</td>\n",
|
||||
" <td>New distribution: MooniWarPrizes: 50.000 MWar ...</td>\n",
|
||||
" <td>['Airdrop', 'Cryptocurrency', 'Free', 'Income'...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" <td>39308.021458</td>\n",
|
||||
" <td>39311.874917</td>\n",
|
||||
" <td>3.499237</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>2021-02-07 10:45:53</td>\n",
|
||||
" <td>#China #elsalvadorbitcoin #Nigeria #Bitcoin #P...</td>\n",
|
||||
" <td>['China', 'elsalvadorbitcoin', 'Nigeria', 'Bit...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>299375</th>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" <td>48301.942792</td>\n",
|
||||
" <td>48301.999500</td>\n",
|
||||
" <td>3.640812</td>\n",
|
||||
" <td>2155.0</td>\n",
|
||||
" <td>2021-12-04 16:53:52</td>\n",
|
||||
" <td>$MYST #MYST #BTC #Bitcoin</td>\n",
|
||||
" <td>['MYST', 'BTC', 'Bitcoin']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>299376</th>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" <td>48301.942792</td>\n",
|
||||
" <td>48301.999500</td>\n",
|
||||
" <td>3.640812</td>\n",
|
||||
" <td>3605.0</td>\n",
|
||||
" <td>2021-12-04 16:54:04</td>\n",
|
||||
" <td>Team Bankroller on Telegram25 BNKRX prize for ...</td>\n",
|
||||
" <td>['TRX', 'TRON']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>299377</th>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" <td>48301.942792</td>\n",
|
||||
" <td>48301.999500</td>\n",
|
||||
" <td>3.640812</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>2021-12-04 16:54:44</td>\n",
|
||||
" <td>I set up my @cryptocom account to buy $100 #bt...</td>\n",
|
||||
" <td>['btc', 'cro', 'crypto', 'bitcoin']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>299378</th>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" <td>48301.942792</td>\n",
|
||||
" <td>48301.999500</td>\n",
|
||||
" <td>3.640812</td>\n",
|
||||
" <td>169.0</td>\n",
|
||||
" <td>2021-12-04 16:51:37</td>\n",
|
||||
" <td>Riot Blockchain Bitcoin production jumps 80% o...</td>\n",
|
||||
" <td>['bitcoin']</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>299379</th>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" <td>48301.942792</td>\n",
|
||||
" <td>48301.999500</td>\n",
|
||||
" <td>3.640812</td>\n",
|
||||
" <td>531.0</td>\n",
|
||||
" <td>2021-12-04 17:00:42</td>\n",
|
||||
" <td>An ECB Board Member Attacks Bitcoin, Says It '...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>297728 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 38
|
||||
"outputs": [],
|
||||
"execution_count": 56
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T20:20:03.203053Z",
|
||||
"start_time": "2025-01-14T20:19:59.756279Z"
|
||||
"end_time": "2025-01-15T11:15:33.655160Z",
|
||||
"start_time": "2025-01-15T11:15:30.228882Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -589,140 +174,31 @@
|
||||
],
|
||||
"id": "f2d740d611c43cbc",
|
||||
"outputs": [],
|
||||
"execution_count": 65
|
||||
"execution_count": 57
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-14T20:20:13.500329Z",
|
||||
"start_time": "2025-01-14T20:20:13.493261Z"
|
||||
"end_time": "2025-01-15T11:15:34.200051Z",
|
||||
"start_time": "2025-01-15T11:15:34.194465Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "final",
|
||||
"source": [
|
||||
"from analysis import save_to_csv\n",
|
||||
"save_to_csv(final, '../data/sentiment.csv')"
|
||||
],
|
||||
"id": "9c29a057abb9a8e1",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" timestamp vader_sentiment Gain/Loss\n",
|
||||
"0 2021-02-07 12:00:00 0.159488 -3.853458\n",
|
||||
"1 2021-02-07 16:00:00 0.206750 1.406792\n",
|
||||
"2 2021-02-07 20:00:00 0.342183 -1.916208\n",
|
||||
"3 2021-02-08 00:00:00 0.392456 -0.425292\n",
|
||||
"4 2021-03-07 04:00:00 0.301357 0.421708\n",
|
||||
".. ... ... ...\n",
|
||||
"125 2021-12-03 20:00:00 0.263333 4.646417\n",
|
||||
"126 2021-12-04 00:00:00 0.135840 5.825708\n",
|
||||
"127 2021-12-04 12:00:00 0.000000 1.956125\n",
|
||||
"128 2021-12-04 16:00:00 0.000000 -3.462333\n",
|
||||
"129 2021-12-04 20:00:00 0.066533 -0.056708\n",
|
||||
"\n",
|
||||
"[130 rows x 3 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" <th>vader_sentiment</th>\n",
|
||||
" <th>Gain/Loss</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2021-02-07 12:00:00</td>\n",
|
||||
" <td>0.159488</td>\n",
|
||||
" <td>-3.853458</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2021-02-07 16:00:00</td>\n",
|
||||
" <td>0.206750</td>\n",
|
||||
" <td>1.406792</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2021-02-07 20:00:00</td>\n",
|
||||
" <td>0.342183</td>\n",
|
||||
" <td>-1.916208</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2021-02-08 00:00:00</td>\n",
|
||||
" <td>0.392456</td>\n",
|
||||
" <td>-0.425292</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2021-03-07 04:00:00</td>\n",
|
||||
" <td>0.301357</td>\n",
|
||||
" <td>0.421708</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>125</th>\n",
|
||||
" <td>2021-12-03 20:00:00</td>\n",
|
||||
" <td>0.263333</td>\n",
|
||||
" <td>4.646417</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>126</th>\n",
|
||||
" <td>2021-12-04 00:00:00</td>\n",
|
||||
" <td>0.135840</td>\n",
|
||||
" <td>5.825708</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>127</th>\n",
|
||||
" <td>2021-12-04 12:00:00</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>1.956125</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>128</th>\n",
|
||||
" <td>2021-12-04 16:00:00</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>-3.462333</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>129</th>\n",
|
||||
" <td>2021-12-04 20:00:00</td>\n",
|
||||
" <td>0.066533</td>\n",
|
||||
" <td>-0.056708</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>130 rows × 3 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Results saved to ../data/sentiment.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 66
|
||||
"execution_count": 58
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
40
src/fetch.py
40
src/fetch.py
@ -1,7 +1,5 @@
|
||||
import tweepy
|
||||
import csv
|
||||
import os
|
||||
import time
|
||||
import pandas as pd
|
||||
|
||||
# Twitter API credentials
|
||||
API_KEY = "gDLV23ofFr7xEj38SkBHJeCMl"
|
||||
@ -30,40 +28,10 @@ def fetch_tweets_with_hashtags(hashtags, max_results=10):
|
||||
"text": tweet.text
|
||||
})
|
||||
except tweepy.TooManyRequests as e:
|
||||
print("Rate limit reached. Waiting for 15 minutes...")
|
||||
time.sleep(15 * 60)
|
||||
return fetch_tweets_with_hashtags(hashtags, max_results)
|
||||
print("Rate limit reached.")
|
||||
tweets = pd.read_csv('../data/tweets_with_hashtags.csv')
|
||||
return tweets
|
||||
except Exception as e:
|
||||
print(f"Error occurred while fetching tweets: {e}")
|
||||
|
||||
return tweets_data
|
||||
|
||||
|
||||
def save_to_csv(data):
|
||||
try:
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
data_dir = os.path.join(current_dir, "..", "data")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
filepath = os.path.join(data_dir, "tweets_with_hashtags.csv")
|
||||
file_exists = os.path.isfile(filepath)
|
||||
|
||||
with open(filepath, "a", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=["author_id", "created_at", "text"])
|
||||
|
||||
if not file_exists:
|
||||
writer.writeheader()
|
||||
|
||||
writer.writerows(data)
|
||||
|
||||
print(f"Data successfully appended to {filepath}")
|
||||
except Exception as e:
|
||||
print(f"Error while saving data to CSV: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
hashtags = ["btc", "bitcoin"]
|
||||
max_results = 100
|
||||
tweets = fetch_tweets_with_hashtags(hashtags, max_results)
|
||||
save_to_csv(tweets)
|
@ -0,0 +1,16 @@
|
||||
import pandas as pd
|
||||
from sklearn.metrics import mean_squared_error, r2_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
|
||||
def model(sentiment):
|
||||
sentiment_df = pd.read_csv('../data/sentiment.csv')
|
||||
x = sentiment_df[['vader_sentiment']]
|
||||
y = sentiment_df['Gain/Loss']
|
||||
x_train = x
|
||||
y_train = y
|
||||
model = LinearRegression()
|
||||
model.fit(x_train, y_train)
|
||||
prediction = model.predict(sentiment[['vader_sentiment']])
|
||||
return prediction
|
20
src/main.py
Normal file
20
src/main.py
Normal file
@ -0,0 +1,20 @@
|
||||
from fetch import *
|
||||
from learning import *
|
||||
from analysis import *
|
||||
import os
|
||||
|
||||
if __name__ == "__main__":
|
||||
hashtags = ["btc", "bitcoin"]
|
||||
max_results = 10
|
||||
tweets = fetch_tweets_with_hashtags(hashtags, max_results)
|
||||
save_to_csv(tweets, '../data/new_tweets.csv')
|
||||
tweets = pd.read_csv('../data/new_tweets.csv')
|
||||
os.remove('../data/new_tweets.csv')
|
||||
date_format(tweets)
|
||||
sentiment = analyze_sentiment(tweets)
|
||||
daily_sentiment = sentiment.groupby('timestamp')['vader_sentiment'].mean().reset_index()
|
||||
x = daily_sentiment['vader_sentiment']
|
||||
print(f"Tweets sentiment score: {x.iloc[0]}")
|
||||
prediction = model(daily_sentiment)
|
||||
print(f"Current BTC price:{fetch_bitcoin_price()}")
|
||||
print(f"Price prediction in one hour:{fetch_bitcoin_price() + 4*prediction[0]}")
|
Loading…
Reference in New Issue
Block a user