Skip to content

Syverts1/Twitter_SentimentAnalysis_News

Repository files navigation

Twitter Sentiment Analysis of Various News Sources

This project analyzed tweets by BBC, CBS, CNN, Fox, and The New York Times using the VADER Sentiment Analysis tool. The last 100 tweets were pulled from each source from 03/27/2018.

Sentiment Analsis of Tweets over Time

Scatter

Overall Sentiment Analysis by News Source

Bar

Observed Trends

  1. Of the sample data pulled, the news sources seemed to have an overall neutral sentiment. This can be seen by the congregation of points at 0 on the scatter plot. It is also seen by the mean compound sentiments all being within +-.09

  2. @FoxNews and @nytimes did see the lowest average compound scores (although still relatively neutral)

  3. @CBS saw the highest average compound scores (although still relatively neutral)

Jupyter Notebook Markdown

# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import tweepy
import seaborn as sns

# Twitter API Keys
from config import consumer_key, consumer_secret, access_token, access_token_secret
# Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()


# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
#List of users to be analyzed
target_users = ["@BBC","@CBS", "@CNN", "@FoxNews", "@nytimes"]

#initiate output lists
compound = []
pos = []
neu = []
neg = []
user = []
tweets_ago = []
date = []


#loop through target_users, and analyze sentiments
for item in target_users:
    
    #initiate counter
    i = 1
    
    #pull last 100 tweets from the user selected
    public_tweets = api.search(item, count=100, result_type = "recent")
    
    #sentiment analysis on 100 tweets
    for tweet in public_tweets["statuses"]:
        
        compound.append(analyzer.polarity_scores(tweet["text"])["compound"])
        pos.append(analyzer.polarity_scores(tweet["text"])["pos"])
        neg.append(analyzer.polarity_scores(tweet["text"])["neg"])
        neu.append(analyzer.polarity_scores(tweet["text"])["neu"])
        user.append(item)
        tweets_ago.append(i)
        date.append(tweet["created_at"])
        

        i = i + 1

print("Analysis Finished")
Analysis Finished
sentiments = pd.DataFrame({"Media Sources": user,
                          "Date": date,
                          "Tweet Polarity": compound,
                          "Positive": pos,
                          "Negative": neg,
                          "Neutral": neu,
                          "Tweets Ago": tweets_ago})

sentiments=sentiments[["Media Sources","Tweet Polarity","Positive","Negative","Neutral","Tweets Ago","Date"]]

sentiments.head()
Media Sources Tweet Polarity Positive Negative Neutral Tweets Ago Date
0 @BBC 0.6124 0.182 0.000 0.818 1 Tue Mar 27 02:26:14 +0000 2018
1 @BBC 0.0000 0.000 0.000 1.000 2 Tue Mar 27 02:25:27 +0000 2018
2 @BBC -0.5106 0.000 0.148 0.852 3 Tue Mar 27 02:24:57 +0000 2018
3 @BBC 0.0000 0.000 0.000 1.000 4 Tue Mar 27 02:24:45 +0000 2018
4 @BBC -0.6249 0.000 0.242 0.758 5 Tue Mar 27 02:24:42 +0000 2018
#export data in the DataFrame into a CSV file
sentiments.to_csv(path_or_buf="sentiments.csv")
#Create a scatter plot showing sentiment analysis by media sources
sns.set_style("whitegrid")

plot = sns.stripplot(x=sentiments["Tweets Ago"], y=sentiments["Tweet Polarity"], hue=sentiments["Media Sources"],
           size=10,edgecolor="gray", linewidth = .5, alpha=".7")

plt.legend(title = "Media Sources", bbox_to_anchor=(1,1))
plt.title("Sentiment Analysis of Media Tweets (03/26/2018 UTC)")

plt.xticks(np.arange(0,101,step=10),np.arange(0,101,step=10))
plt.xlim(105,-5)

plt.show()
plt.savefig("scatter.png")

png

grouping = sentiments.groupby("Media Sources").mean()

grouping = grouping.reset_index()

grouping = grouping.drop(["Positive","Negative","Neutral","Tweets Ago"],axis=1)

grouping
Media Sources Tweet Polarity
0 @BBC -0.021636
1 @CBS 0.073810
2 @CNN 0.036826
3 @FoxNews -0.085965
4 @nytimes -0.087544
#Create a bar plot to show the overall sentiments
#of the last 100 tweets of each organization

sns.set_style("darkgrid")

plt.bar(grouping["Media Sources"], grouping["Tweet Polarity"],color=sns.color_palette())

for index,i in enumerate(grouping["Tweet Polarity"]):
    if i < 0:
        plt.annotate(round(i,3),xy=(index,i-.007),ha="center")
    else:
        plt.annotate(round(i,3),xy=(index,i+.002),ha="center")

plt.xlabel("Media Sources")
plt.ylabel("Tweet Polarity")
plt.title("Overall Media Sentiment based on Twitter (03/27/2018 UTC)")
plt.show()

plt.savefig("bar.png")

png

variance = sentiments.groupby("Media Sources").var()

variance = variance.drop(["Tweets Ago"],axis=1)

variance
Tweet Polarity Positive Negative Neutral
Media Sources
@BBC 0.194284 0.012837 0.009142 0.018791
@CBS 0.105887 0.006269 0.010673 0.012970
@CNN 0.109342 0.007920 0.008880 0.017206
@FoxNews 0.107561 0.010557 0.009402 0.022669
@nytimes 0.163092 0.013003 0.014673 0.024003