small updates

This commit is contained in:
BartoszOwczarek22 2025-01-13 13:18:26 +01:00
parent dc80bf4ae4
commit 032d14dc4a
6 changed files with 13 additions and 45 deletions

View File

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.12 virtualenv at ~/Envs/nlp-env" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.11 virtualenv at ~/Envs/nlp-en" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PackageRequirementsSettings">

View File

@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Python 3.12 virtualenv at ~/Envs/nlp-env" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 virtualenv at ~/Envs/nlp-env" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 virtualenv at ~/Envs/nlp-en" project-jdk-type="Python SDK" />
</project>

View File

@ -1,3 +1,4 @@
tweepy~=4.14.0
snscrape~=0.7.0.20230622
numpy~=2.1.3
numpy~=2.1.3
pandas~=2.2.3
vaderSentiment~=3.3.2

View File

@ -1,4 +1,5 @@
import pandas as pd
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
@ -7,6 +8,7 @@ def load_csv(file_path):
print(f"CSV loaded. Columns: {df.columns}")
return df
def get_vader_sentiment(text):
analyzer = SentimentIntensityAnalyzer()
sentiment = analyzer.polarity_scores(text)
@ -23,12 +25,8 @@ def classify_sentiment(compound_score):
def analyze_sentiment(df):
df['vader_sentiment'] = df['text'].apply(get_vader_sentiment)
df['vader_sentiment_class'] = df['vader_sentiment'].apply(classify_sentiment)
return df
@ -36,3 +34,8 @@ def save_to_csv(df, output_path):
df.to_csv(output_path, index=False)
print(f"Results saved to {output_path}")
def clean_tweet(tweet):
tweet = re.sub('\\n', ' ', tweet)
tweet = re.sub('https?:\/\/\S+', ' ', tweet)
return tweet

View File

@ -52,41 +52,6 @@
"outputs": [],
"execution_count": 65
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-12T15:21:16.836060Z",
"start_time": "2025-01-12T15:21:16.785607Z"
}
},
"cell_type": "code",
"source": [
"btc_data.info()\n",
"#btc_tweets.info()"
],
"id": "b7e9b6bb66c7b3c4",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 6742281 entries, 2012-01-01 10:01:00 to NaT\n",
"Data columns (total 5 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 Open float64\n",
" 1 High float64\n",
" 2 Low float64\n",
" 3 Close float64\n",
" 4 Volume float64\n",
"dtypes: float64(5)\n",
"memory usage: 308.6 MB\n"
]
}
],
"execution_count": 50
},
{
"metadata": {
"ExecuteTime": {
@ -115,7 +80,6 @@
" \"user_favourites\", \"user_verified\", \"source\", \"is_retweet\"\n",
"]\n",
"\n",
"# Drop columns from btc_tweets\n",
"btc_tweets_cleaned = btc_tweets.drop(columns=columns_to_drop_tweets)\n",
"\n",
"output_path = \"../data/btc_tweets_cleaned.csv\"\n",

View File

@ -41,7 +41,7 @@ def fetch_tweets_with_hashtags(hashtags, max_results=10):
def save_to_csv(data):
try:
# Dynamically construct the path to `BitSent/data/tweets_with_hashtags.csv`
current_dir = os.path.dirname(__file__)
data_dir = os.path.join(current_dir, "..", "data")
os.makedirs(data_dir, exist_ok=True)