small updates
This commit is contained in:
parent
dc80bf4ae4
commit
032d14dc4a
@ -2,7 +2,7 @@
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.12 virtualenv at ~/Envs/nlp-env" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.11 virtualenv at ~/Envs/nlp-en" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PackageRequirementsSettings">
|
||||
|
@ -3,5 +3,5 @@
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.12 virtualenv at ~/Envs/nlp-env" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 virtualenv at ~/Envs/nlp-env" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 virtualenv at ~/Envs/nlp-en" project-jdk-type="Python SDK" />
|
||||
</project>
|
@ -1,3 +1,4 @@
|
||||
tweepy~=4.14.0
|
||||
snscrape~=0.7.0.20230622
|
||||
numpy~=2.1.3
|
||||
numpy~=2.1.3
|
||||
pandas~=2.2.3
|
||||
vaderSentiment~=3.3.2
|
@ -1,4 +1,5 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
|
||||
@ -7,6 +8,7 @@ def load_csv(file_path):
|
||||
print(f"CSV loaded. Columns: {df.columns}")
|
||||
return df
|
||||
|
||||
|
||||
def get_vader_sentiment(text):
|
||||
analyzer = SentimentIntensityAnalyzer()
|
||||
sentiment = analyzer.polarity_scores(text)
|
||||
@ -23,12 +25,8 @@ def classify_sentiment(compound_score):
|
||||
|
||||
|
||||
def analyze_sentiment(df):
|
||||
|
||||
df['vader_sentiment'] = df['text'].apply(get_vader_sentiment)
|
||||
|
||||
|
||||
df['vader_sentiment_class'] = df['vader_sentiment'].apply(classify_sentiment)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@ -36,3 +34,8 @@ def save_to_csv(df, output_path):
|
||||
df.to_csv(output_path, index=False)
|
||||
print(f"Results saved to {output_path}")
|
||||
|
||||
|
||||
def clean_tweet(tweet):
|
||||
tweet = re.sub('\\n', ' ', tweet)
|
||||
tweet = re.sub('https?:\/\/\S+', ' ', tweet)
|
||||
return tweet
|
||||
|
@ -52,41 +52,6 @@
|
||||
"outputs": [],
|
||||
"execution_count": 65
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-12T15:21:16.836060Z",
|
||||
"start_time": "2025-01-12T15:21:16.785607Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"btc_data.info()\n",
|
||||
"#btc_tweets.info()"
|
||||
],
|
||||
"id": "b7e9b6bb66c7b3c4",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"DatetimeIndex: 6742281 entries, 2012-01-01 10:01:00 to NaT\n",
|
||||
"Data columns (total 5 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 Open float64\n",
|
||||
" 1 High float64\n",
|
||||
" 2 Low float64\n",
|
||||
" 3 Close float64\n",
|
||||
" 4 Volume float64\n",
|
||||
"dtypes: float64(5)\n",
|
||||
"memory usage: 308.6 MB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 50
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@ -115,7 +80,6 @@
|
||||
" \"user_favourites\", \"user_verified\", \"source\", \"is_retweet\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Drop columns from btc_tweets\n",
|
||||
"btc_tweets_cleaned = btc_tweets.drop(columns=columns_to_drop_tweets)\n",
|
||||
"\n",
|
||||
"output_path = \"../data/btc_tweets_cleaned.csv\"\n",
|
||||
|
@ -41,7 +41,7 @@ def fetch_tweets_with_hashtags(hashtags, max_results=10):
|
||||
|
||||
def save_to_csv(data):
|
||||
try:
|
||||
# Dynamically construct the path to `BitSent/data/tweets_with_hashtags.csv`
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
data_dir = os.path.join(current_dir, "..", "data")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
Loading…
Reference in New Issue
Block a user