# Import neccessary tools
import pandas as pd
import numpy as np 
import seaborn as sns
import re
import matplotlib.pyplot as plt
import statsmodels.api as sm 
import warnings
warnings.filterwarnings('ignore')


# Load data into a dataframe
data = pd.read_csv("amas_combined.csv", low_memory= False)
# Take a peek at the dataframe
data.head(10)


# Drop the unnecessary columns
needed_columns = ['tweet_full_text', 'tweet_id', 'user_description', 'tweet_retweet_count']
for col_name in data.columns:
    if col_name not in needed_columns:
        data = data.drop(col_name, 1)
# Rearange order of columns
data = data[['tweet_id', 'user_description', 'tweet_full_text','tweet_retweet_count']]
data.head()


# regex for all nominees
TAYLOR_SWIFT = r"(?i)(taylor swift|#taylor|taylor|taylorswift13|#taylorswift)"
DRAKE = r"(?i)(drake|drizzy)"
IMAGINE_DRAGONS = r"(?i)(imagine dragons|dragons|imaginedragons|imagine dragon|imaginedragon)"
MIGOS = r"(?i)(migos)"
POST_MALONE = r"(?i)(post malone|posty|postmalone|malone)"
ED_SHEERAN = r"(?i)(edsheeran|ed sheeran|sheeran)"
CAMILA = r"(?i)(camila_cabello|camila|cabello)"
CARDI = r"(?i)(iamcardib|cardi|cardi b)"
DUA = r"(?i)(dualipa|dua|lipa)"
KHALID = r"(?i)(thegreatkhalid|khalid)"
TENTACION = r"(?i)(xxxtentacion|tentacion|xxx)"
BRUNO = r"(?i)(bruno)"
BEBE = r"(?i)(beberexha|bebe|rexha)"
ZEDD = r"(?i)(zedd|zed)"
ARIANA = r"(?i)(arianagrande|ariana|grande)"
DEMI = r"(?i)(ddlovato|demi|lovato)"
SHAWN = r"(?i)(shawnmendes|shawn|mendes)"
MAROON = r"(?i)(maroon5|maroon|adamlevine|levine)"
KANE_BROWN = r"(?i)(kanebrown|kane|brown)"
LUKE_BRYAN = r"(?i)(lukebryanonline|luke|bryan)"
THOMAS_RHETT = r"(?i)(thomasrhett|thomas|rhett)"
CARRIE = r"(?i)(carrieunderwood|underwood|carrie)"
KELSEA = r"(?i)(kelseaballerini|kelsea|ballerini)"
MAREN = r"(?i)(marenmorris|maren|morris)"
FLORIDA_GEORGIA = r"(?i)(flagaLine|florida georgia line|fgl|florida|georgia)"
DAN_SHAY = r"(?i)(danandshay|dan and shay|dan \+ shay)"
LANCO = r"(?i)(lancomusic|lanco)"
LIL_UZI = r"(?i)(liluzivert|lil uzi|uzi)"
RIHANNA = r"(?i)(rihanna)"
ELLA = r"(?i)(ellamai|ella|mai)"
SZA = r"(?i)(\bSZA\b|@SZA|#SZA)"
BTS = r"(?i)(\bBTS\b|@bts_bighit|#BTS)"
PINK = r"(?i)(pink|p!nk)"
PANIC_DISCO = r"(?i)(panicatthedisco|panic at the disco|patd|panic)"
PORTUGAL = r"(?i)(portugaltheman|ptm|portugal)"
DADDY_YANKEE = r"(?i)(daddy_yankee|daddy yankee|yankee)"
BALVIN = r"(?i)(jbalvin|balvin)"
OZUNA = r"(?i)(ozuna)"
LAUREN = r"(?i)(lauren_daigle|lauren daigle|daigle)"
MERCY_ME = r"(?i)(mercyme|mercy me)"
ZACH_WILL = r"(?i)(zachwilliams|zach williams|williams)"
MARSHMELLO = r"(?i)(marshmello|marshmellomusic)"
CHAIN_SMOKERS = r"(?i)(thechainsmokers|chainsmokers|chainsmoker)"


# Function that classify nominee given string
def classifier(row):
    if re.search(TAYLOR_SWIFT, row['tweet_full_text']) != None:
        return "Taylor Swift"
    elif re.search(DRAKE, row['tweet_full_text']) != None:
        return "Drake"  
    elif re.search(IMAGINE_DRAGONS, row['tweet_full_text']) != None:
        return "Imagine Dragons"
    elif re.search(MIGOS, row['tweet_full_text']) != None:
        return "Migos"
    elif re.search(POST_MALONE, row['tweet_full_text']) != None:
        return "Post Malone"
    elif re.search(ED_SHEERAN, row['tweet_full_text']) != None:
        return "Ed Sheeran"
    elif re.search(CAMILA, row['tweet_full_text']) != None:
        return "Camila Cabello"      
    elif re.search(CARDI, row['tweet_full_text']) != None:
        return "Cardi B"
    elif re.search(DUA, row['tweet_full_text']) != None:
        return "Dua Lipa"    
    elif re.search(KHALID, row['tweet_full_text']) != None:
        return "Khalid"
    elif re.search(TENTACION, row['tweet_full_text']) != None:
        return "XXXTentacion"
    elif re.search(BRUNO, row['tweet_full_text']) != None:
        return "Bruno Mars"
    elif re.search(BEBE, row['tweet_full_text']) != None:
        return "Bebe Rexha"     
    elif re.search(ZEDD, row['tweet_full_text']) != None:
        return "Zedd"
    elif re.search(ARIANA, row['tweet_full_text']) != None:
        return "Ariana Grande"
    elif re.search(DEMI, row['tweet_full_text']) != None:
        return "Demi Lavoto"
    elif re.search(SHAWN, row['tweet_full_text']) != None:
        return "Shawn Mendes"
    elif re.search(MAROON, row['tweet_full_text']) != None:
        return "Maroon 5"
    elif re.search(KANE_BROWN, row['tweet_full_text']) != None:
        return "Kane Brown"
    elif re.search(LUKE_BRYAN, row['tweet_full_text']) != None:
        return "Luke Bryan"
    elif re.search(THOMAS_RHETT, row['tweet_full_text']) != None:
        return "Thomas Rhett"   
    elif re.search(CARRIE, row['tweet_full_text']) != None:
        return "Carrie Underwood"
    elif re.search(KELSEA, row['tweet_full_text']) != None:
        return "Kelsea Ballerini"  
    elif re.search(MAREN, row['tweet_full_text']) != None:
        return "Maren Morris"
    elif re.search(FLORIDA_GEORGIA, row['tweet_full_text']) != None:
        return "Florida Georgia Line"
    elif re.search(DAN_SHAY, row['tweet_full_text']) != None:
        return "Dan + Shay"
    elif re.search(LANCO, row['tweet_full_text']) != None:
        return "LANCO"
    elif re.search(LIL_UZI, row['tweet_full_text']) != None:
        return "Lil Uzi Vert"
    elif re.search(RIHANNA, row['tweet_full_text']) != None:
        return "Rihanna"
    elif re.search(ELLA, row['tweet_full_text']) != None:
        return "Ella Mai"
    elif re.search(SZA, row['tweet_full_text']) != None:
        return "SZA"
    elif re.search(PINK, row['tweet_full_text']) != None:
        return "P!NK"
    elif re.search(PANIC_DISCO, row['tweet_full_text']) != None:
        return "Panic! At The Disco"   
    elif re.search(PORTUGAL, row['tweet_full_text']) != None:
        return "Portugal. The Man"
    elif re.search(DADDY_YANKEE, row['tweet_full_text']) != None:
        return "Daddy Yankee"
    elif re.search(BALVIN, row['tweet_full_text']) != None:
        return "J Balvin" 
    elif re.search(OZUNA, row['tweet_full_text']) != None:
        return "Ozuna"   
    elif re.search(LAUREN, row['tweet_full_text']) != None:
        return "Lauren Daigle"  
    elif re.search(MERCY_ME, row['tweet_full_text']) != None:
        return "MercyMe"
    elif re.search(ZACH_WILL, row['tweet_full_text']) != None:
        return "Zach Williams"
    elif re.search(MARSHMELLO, row['tweet_full_text']) != None:
        return "Marshmello"
    elif re.search(CHAIN_SMOKERS, row['tweet_full_text']) != None:
        return "Chainsmokers" 
    elif re.search(BTS, row['tweet_full_text']) != None:
        return "BTS"


# Now, to classify the dataframe
data['Nominee'] = data.apply(classifier, axis=1)
data


# Make another dataframe only containing the nominees and number of times they were classified
nominee_series = data['Nominee'].value_counts()
nom_df = pd.DataFrame({'nominee': nominee_series.index, 'mentions': nominee_series.values})
nom_df


# Time to make the bar graph
plt.figure(figsize = (12, 10))
sns.set_theme(style="whitegrid")
sns.barplot(x = "mentions", y="nominee", data=nom_df)
plt.title("Nominee and Mentions", fontsize=14)
plt.xlabel("# of Mentions", fontsize=14)
plt.ylabel("Nominee", fontsize=14)

Text(0, 0.5, 'Nominee')


trend_data = pd.read_csv('trends_combined.csv')
# Take a peek 
trend_data.head(10)


# Drop unnecessary columns
trend_data = trend_data.drop('query', 1)
trend_data = trend_data.drop('url', 1)
trend_data = trend_data.drop('woeid', 1)
trend_data = trend_data.drop('time', 1)
trend_data.head(10)


nominees = nom_df['nominee']
# Match trend since there is no trend of 'Camila Cabello'
nominees[0] = 'Camila'
nom_trend = trend_data.copy()
# Drop the rows with no mention of any of the nominees
for index, row in nom_trend.iterrows():
    found = False
    for n in nominees:
        if row['name'] == n:
            found = True
    if found == False:
        nom_trend.drop(index, inplace = True)
nom_trend


# Group by name
group_df = nom_trend.drop('location', 1).groupby(['name']).mean()
group_df = group_df.reset_index()
group_df = group_df.sort_values(by='tweet_volume', ascending=False)
group_df = group_df.dropna()
group_df


# Time to make the bar graph
plt.figure(figsize = (15, 12))
sns.set_theme(style="whitegrid")
sns.barplot(x = "name", y="tweet_volume", data=group_df)
plt.title("Nominee and Trends", fontsize=14)
plt.xlabel("Nominee", fontsize=14)
plt.ylabel("Tweet Volume", fontsize=14)

Text(0, 0.5, 'Tweet Volume')


# We can group by location 
loc_df = nom_trend.drop('name', 1).groupby(['location']).sum()
loc_df = loc_df.reset_index()
loc_df = loc_df.sort_values(by='tweet_volume', ascending=False)
loc_df


# Time to make the bar graph
plt.figure(figsize = (20, 15))
sns.set_theme(style="whitegrid")
sns.barplot(x = "tweet_volume", y="location", data=loc_df)
plt.title("Location and Trends", fontsize=14)
plt.xlabel("Tweet Volume (in millions)", fontsize=14)
plt.ylabel("Location", fontsize=14)

Text(0, 0.5, 'Location')


# Making the 4 Graphs 
# Array of the top 4 cities
cities = ['Portland' , 'Phoenix' , 'Long Beach' , 'Chicago']
# Make a bargraph on the top 4 Cities in tweet volume
for city in cities:
    temp_df = nom_trend[nom_trend['location'] == city]
    temp_df  = temp_df.groupby(['name']).mean()
    temp_df  = temp_df.reset_index()
    temp_df  = temp_df.sort_values(by='tweet_volume', ascending=False)
    temp_df = temp_df.dropna()
    plt.figure(figsize = (20, 15))
    sns.set_theme(style="whitegrid")
    sns.barplot(x = "name", y="tweet_volume", data=temp_df)
    plt.title("Nominee and Tweet Volume in " + city, fontsize=14)
    plt.xlabel("Nominee", fontsize=14)
    plt.ylabel("Tweet Volume", fontsize=14)
    plt.show()


#Load data into a dataframe
data_stat = pd.read_csv("categories_winners.csv", low_memory= False)
#Take a peek at the dataframe
data_stat.head(10)


# Statistical analysis - plot wins vs mentions
nom_df['Win?'] = 0
for i in range (0, len(nom_df['nominee'])):
    for j in range (0, len(data_stat['Winners'])):
        if nom_df.at[i, 'nominee'] in data_stat.at[j, 'Winners'] :
            nom_df['Win?'].loc[nom_df.nominee == nom_df.at[i, 'nominee']] = 1


nom_df.head(10)


#Regression and analysis
log_reg = sm.Logit(nom_df['Win?'], nom_df.mentions).fit()
s = sns.lmplot(x = "mentions", y="Win?", data=nom_df, logistic=True, n_boot=500, y_jitter=.03)
plt.title("Mentions and Wins", fontsize=14)
plt.xlabel("Mentions", fontsize=14)
s.fig.set_size_inches(15,5)
print(log_reg.summary())

Optimization terminated successfully.
         Current function value: 0.647075
         Iterations 6
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   Win?   No. Observations:                   42
Model:                          Logit   Df Residuals:                       41
Method:                           MLE   Df Model:                            0
Date:                Sat, 19 Dec 2020   Pseudo R-squ.:                 0.04122
Time:                        19:15:40   Log-Likelihood:                -27.177
converged:                       True   LL-Null:                       -28.346
Covariance Type:            nonrobust   LLR p-value:                       nan
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
mentions       0.0005      0.000      1.310      0.190      -0.000       0.001
==============================================================================


#add new variable
nom_df['Trend?'] = 0
group_df = group_df.dropna()
group_df = group_df.reset_index(drop=True)
print(group_df)
for i in range (0, len(nom_df['nominee'])):
    for j in range (0, len(group_df['name'])):
        if nom_df.at[i, 'nominee'] in group_df.at[j, 'name'] :
            nom_df['Trend?'].loc[nom_df.nominee == nom_df.at[i, 'nominee']] = 1


nom_df.head(10)

           name  tweet_volume
0        Camila  1.034901e+06
1  Taylor Swift  2.721614e+05
2         Drake  7.571300e+04
3       Cardi B  6.755200e+04
4  Shawn Mendes  4.029356e+04
5        Khalid  3.883995e+04
6   Post Malone  2.632017e+04


# Regression and analysis
log_reg = sm.Logit(nom_df['Win?'], nom_df[['Trend?']]).fit()
s = sns.barplot(x = "Trend?", y="Win?", data=nom_df)
plt.title("Trending vs Winning", fontsize=14)
plt.xlabel("Trend?", fontsize=14)
print(log_reg.summary())

Optimization terminated successfully.
         Current function value: 0.645975
         Iterations 6
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   Win?   No. Observations:                   42
Model:                          Logit   Df Residuals:                       41
Method:                           MLE   Df Model:                            0
Date:                Sat, 19 Dec 2020   Pseudo R-squ.:                 0.04285
Time:                        19:15:40   Log-Likelihood:                -27.131
converged:                       True   LL-Null:                       -28.346
Covariance Type:            nonrobust   LLR p-value:                       nan
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Trend?         1.7918      1.080      1.659      0.097      -0.325       3.909
==============================================================================


#for each trending artist, compare their tweet volume to the city's tweet volume, and create a logistic regression with their 
#likelihood to win based on that

for city in cities:
    temp_df = nom_trend[nom_trend['location'] == city]
    temp_df  = temp_df.groupby(['name']).mean()
    temp_df  = temp_df.reset_index()
    temp_df  = temp_df.sort_values(by='tweet_volume', ascending=False)
    temp_df = temp_df.dropna()
    temp_df = temp_df.reset_index(drop=True)
    nom_df[city] = 0.0
    for i in range (0, len(nom_df['nominee'])):
        for j in range (0, len(temp_df['tweet_volume'])):
            if nom_df.at[i, 'nominee'] in temp_df.at[j, 'name'] :
                k = loc_df[loc_df['location'] == city]['tweet_volume']
                l = temp_df.at[j, 'tweet_volume']/k.values[0]
                nom_df[city].loc[nom_df.nominee == nom_df.at[i, 'nominee']] = l
    # Regression and analysis
    log_reg = sm.Logit(nom_df['Win?'], nom_df[city]).fit()
    s = sns.lmplot(x = city, y="Win?", data=nom_df, logistic=True, n_boot=500, y_jitter=.03)
    plt.title(city, fontsize=14)
    plt.xlabel((city + " Popularity"), fontsize=14)
    s.fig.set_size_inches(15,5)
    print(log_reg.summary())

Warning: Maximum number of iterations has been exceeded.
         Current function value: 0.610630
         Iterations: 35
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   Win?   No. Observations:                   42
Model:                          Logit   Df Residuals:                       41
Method:                           MLE   Df Model:                            0
Date:                Sat, 19 Dec 2020   Pseudo R-squ.:                 0.09522
Time:                        19:15:53   Log-Likelihood:                -25.646
converged:                      False   LL-Null:                       -28.346
Covariance Type:            nonrobust   LLR p-value:                       nan
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Portland    2269.8250   4.31e+06      0.001      1.000   -8.44e+06    8.45e+06
==============================================================================

Possibly complete quasi-separation: A fraction 0.12 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.
Optimization terminated successfully.
         Current function value: 0.660961
         Iterations 9
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   Win?   No. Observations:                   42
Model:                          Logit   Df Residuals:                       41
Method:                           MLE   Df Model:                            0
Date:                Sat, 19 Dec 2020   Pseudo R-squ.:                 0.02065
Time:                        19:15:58   Log-Likelihood:                -27.760
converged:                       True   LL-Null:                       -28.346
Covariance Type:            nonrobust   LLR p-value:                       nan
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Phoenix       30.6653     37.228      0.824      0.410     -42.301     103.631
==============================================================================
Warning: Maximum number of iterations has been exceeded.
         Current function value: 0.610630
         Iterations: 35
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   Win?   No. Observations:                   42
Model:                          Logit   Df Residuals:                       41
Method:                           MLE   Df Model:                            0
Date:                Sat, 19 Dec 2020   Pseudo R-squ.:                 0.09522
Time:                        19:16:11   Log-Likelihood:                -25.646
converged:                      False   LL-Null:                       -28.346
Covariance Type:            nonrobust   LLR p-value:                       nan
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Long Beach  1445.8949    2.7e+05      0.005      0.996   -5.28e+05    5.31e+05
==============================================================================

Possibly complete quasi-separation: A fraction 0.12 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.
Warning: Maximum number of iterations has been exceeded.
         Current function value: 0.594126
         Iterations: 35
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   Win?   No. Observations:                   42
Model:                          Logit   Df Residuals:                       41
Method:                           MLE   Df Model:                            0
Date:                Sat, 19 Dec 2020   Pseudo R-squ.:                  0.1197
Time:                        19:16:24   Log-Likelihood:                -24.953
converged:                      False   LL-Null:                       -28.346
Covariance Type:            nonrobust   LLR p-value:                       nan
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Chicago     2043.1832   2.11e+07   9.69e-05      1.000   -4.13e+07    4.13e+07
==============================================================================

Possibly complete quasi-separation: A fraction 0.14 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.

	tweet_contributors	tweet_coordinates	tweet_created_at	tweet_display_text_range	tweet_entities	tweet_extended_entities	tweet_favorited	tweet_full_text	tweet_geo	...	user_profile_use_background_image	user_protected	user_screen_name	user_statuses_count	user_time_zone	user_translator_type	user_url	user_utc_offset	user_verified	tweet_source_url
0	NaN	NaN	2018-10-10 08:04:09	[0, 47]	{'hashtags': [{'text': 'AMAs', 'indices': [19,...	NaN	False	RTT marcellejoseph #AMAs #ArianaGrande #AMAsTNT	NaN	...	True	False	songscri	86608	NaN	none	NaN	NaN	False	https://ifttt.com
1	NaN	NaN	2018-10-10 08:04:08	[0, 47]	{'hashtags': [{'text': 'AMAs', 'indices': [19,...	NaN	False	RTT EvelynMarshall #AMAs #ArianaGrande #AMAsTNT	NaN	...	True	False	songscri	86608	NaN	none	NaN	NaN	False	https://ifttt.com
2	NaN	NaN	2018-10-10 08:04:07	[0, 44]	{'hashtags': [{'text': 'AMAs', 'indices': [16,...	NaN	False	RTT Outcastlive #AMAs #ArianaGrande #AMAsTNT	NaN	...	True	False	songscri	86608	NaN	none	NaN	NaN	False	https://ifttt.com
3	NaN	NaN	2018-10-10 08:04:07	[0, 45]	{'hashtags': [{'text': 'AMAs', 'indices': [17,...	NaN	False	RTT TransitFeeds #AMAs #ArianaGrande #AMAsTNT	NaN	...	True	False	songscri	86608	NaN	none	NaN	NaN	False	https://ifttt.com
4	NaN	NaN	2018-10-10 08:04:06	[0, 68]	{'hashtags': [{'text': 'AMAs2018', 'indices': ...	NaN	False	If @NICKIMINAJ isn’t at the #AMAs2018 #AMAs it...	NaN	...	True	False	barbieboyminaj	2227	NaN	none	NaN	NaN	False	http://twitter.com/download/iphone
5	NaN	NaN	2018-10-10 08:04:06	[0, 45]	{'hashtags': [{'text': 'AMAs', 'indices': [17,...	NaN	False	RTT cabematthews #AMAs #ArianaGrande #AMAsTNT	NaN	...	True	False	songscri	86608	NaN	none	NaN	NaN	False	https://ifttt.com
6	NaN	NaN	2018-10-10 08:04:05	[0, 45]	{'hashtags': [{'text': 'AMAs', 'indices': [17,...	NaN	False	RTT EyesOpen_mag #AMAs #ArianaGrande #AMAsTNT	NaN	...	True	False	songscri	86608	NaN	none	NaN	NaN	False	https://ifttt.com
7	NaN	NaN	2018-10-10 08:04:03	[0, 50]	{'hashtags': [{'text': 'AMAs', 'indices': [32,...	{'media': [{'id': 1049933549472636928, 'id_str...	False	Halsey on the red carpet at the #AMAs⁠ ⁠ 2018 ...	NaN	...	False	False	halseyph	5098	NaN	none	https://t.co/RRAvKa5hqB	NaN	False	http://twitter.com/download/iphone
8	NaN	NaN	2018-10-10 08:04:02	[0, 74]	{'hashtags': [{'text': 'AMAs', 'indices': [35,...	{'media': [{'id': 1049933560428163072, 'id_str...	False	Uitslag American Music Awards 2018\n#AMAs #AMA...	NaN	...	False	False	country_nl	5630	NaN	none	http://t.co/H14yrcsrBx	NaN	False	http://publicize.wp.com/
9	NaN	NaN	2018-10-10 08:04:02	[0, 100]	{'hashtags': [{'text': 'AMAs', 'indices': [69,...	{'media': [{'id': 1049933560293933057, 'id_str...	False	En fotos: así se presentaron los artistas en l...	NaN	...	False	False	noticias24	1531150	NaN	none	http://t.co/xJa3Jmn5nj	NaN	True	http://socialtoctoc.com/noticias24

	tweet_id	user_description	tweet_full_text
0	1049933588601479168	.	RTT marcellejoseph #AMAs #ArianaGrande #AMAsTNT
1	1049933586026115072	.	RTT EvelynMarshall #AMAs #ArianaGrande #AMAsTNT
2	1049933583056560128	.	RTT Outcastlive #AMAs #ArianaGrande #AMAsTNT
3	1049933580561014785	.	RTT TransitFeeds #AMAs #ArianaGrande #AMAsTNT
4	1049933579097001984	NICKI MINAJ is my Queen! I’m a proud American ...	If @NICKIMINAJ isn’t at the #AMAs2018 #AMAs it...

	tweet_id	user_description	tweet_full_text	tweet_retweet_count	Nominee
0	1049933588601479168	.	RTT marcellejoseph #AMAs #ArianaGrande #AMAsTNT	0	Ariana Grande
1	1049933586026115072	.	RTT EvelynMarshall #AMAs #ArianaGrande #AMAsTNT	0	Ariana Grande
2	1049933583056560128	.	RTT Outcastlive #AMAs #ArianaGrande #AMAsTNT	0	Ariana Grande
3	1049933580561014785	.	RTT TransitFeeds #AMAs #ArianaGrande #AMAsTNT	0	Ariana Grande
4	1049933579097001984	NICKI MINAJ is my Queen! I’m a proud American ...	If @NICKIMINAJ isn’t at the #AMAs2018 #AMAs it...	0	None
...	...	...	...	...	...
57459	1049559002567258112	Just for Kris Wu .花	@galaxy_nini1201 @AMAs @securitybenefit @KrisW...	1	None
57460	1049558869070860289	จงขังฉันไว้ในหัวใจ 🖤 YOUNG ONE . SON...	เริ่มลุ้นชุด #AMAs แล้ว	1	None
57461	1049558826511491072	I believe we are here to love and be loved‼️ -...	I just voted for @Camila_Cabello for New Artis...	3	Camila Cabello
57462	1049558762975985666	NaN	#KrisWu #AMAs Kris Wu - Freedom ft. Jhené Aiko...	0	None
57463	1049558643203497984	14 \| army \| sister of sue ramirez	anseeeeel 🥰\n\n@BTS_twt \nfavorite social arti...	1	None

	name	query	tweet_volume	url	location	woeid	time
0	America	America	656323.0	http://twitter.com/search?q=America	Albuquerque	2352824	2018-10-09 22:39:00
1	#AMAs	%23AMAs	439388.0	http://twitter.com/search?q=%23AMAs	Albuquerque	2352824	2018-10-09 22:39:00
2	New Mexico	%22New+Mexico%22	63126.0	http://twitter.com/search?q=%22New+Mexico%22	Albuquerque	2352824	2018-10-09 22:39:00
3	James Gunn	%22James+Gunn%22	33083.0	http://twitter.com/search?q=%22James+Gunn%22	Albuquerque	2352824	2018-10-09 22:39:00
4	Category 3	%22Category+3%22	32153.0	http://twitter.com/search?q=%22Category+3%22	Albuquerque	2352824	2018-10-09 22:39:00
5	Met Gala	%22Met+Gala%22	27064.0	http://twitter.com/search?q=%22Met+Gala%22	Albuquerque	2352824	2018-10-09 22:39:00
6	Google Pixel 3	%22Google+Pixel+3%22	26637.0	http://twitter.com/search?q=%22Google+Pixel+3%22	Albuquerque	2352824	2018-10-09 22:39:00
7	#IndigenousPeoplesDay2018	%23IndigenousPeoplesDay2018	17536.0	http://twitter.com/search?q=%23IndigenousPeopl...	Albuquerque	2352824	2018-10-09 22:39:00
8	#RegularChallenge	%23RegularChallenge	15148.0	http://twitter.com/search?q=%23RegularChallenge	Albuquerque	2352824	2018-10-09 22:39:00
9	#RegisterAndVote	%23RegisterAndVote	12268.0	http://twitter.com/search?q=%23RegisterAndVote	Albuquerque	2352824	2018-10-09 22:39:00

	name	tweet_volume	location
0	America	656323.0	Albuquerque
1	#AMAs	439388.0	Albuquerque
2	New Mexico	63126.0	Albuquerque
3	James Gunn	33083.0	Albuquerque
4	Category 3	32153.0	Albuquerque
5	Met Gala	27064.0	Albuquerque
6	Google Pixel 3	26637.0	Albuquerque
7	#IndigenousPeoplesDay2018	17536.0	Albuquerque
8	#RegularChallenge	15148.0	Albuquerque
9	#RegisterAndVote	12268.0	Albuquerque

American Music Awards Tweets¶

Introduction¶

Required Tools¶

Data Preparation¶

Data Tidying¶

Graphing the tweet volume per location¶

Graphing the Top 4 Cities¶

Statistical Analysis¶

Conclusion¶

	nominee	mentions
0	Camila Cabello	9486
1	Taylor Swift	5109
2	BTS	4829
3	Shawn Mendes	2278
4	Cardi B	2246
5	Dua Lipa	1223
6	Ella Mai	724
7	XXXTentacion	518
8	Post Malone	286
9	Ariana Grande	276
10	Bebe Rexha	215
11	Zedd	214
12	Khalid	170
13	J Balvin	157
14	Demi Lavoto	146
15	Carrie Underwood	118
16	Drake	114
17	Panic! At The Disco	106
18	Migos	105
19	P!NK	71
20	Bruno Mars	67
21	Kane Brown	51
22	Daddy Yankee	47
23	Rihanna	37
24	SZA	35
25	Ed Sheeran	35
26	Kelsea Ballerini	33
27	Imagine Dragons	31
28	Chainsmokers	21
29	Portugal. The Man	21
30	Florida Georgia Line	20
31	Lil Uzi Vert	18
32	LANCO	18
33	Thomas Rhett	16
34	Marshmello	12
35	Lauren Daigle	12
36	Luke Bryan	11
37	Maroon 5	10
38	Maren Morris	5
39	Zach Williams	2
40	Ozuna	2
41	MercyMe	1

	name	tweet_volume	location
2191	Taylor Swift	250758.0	Phoenix
2291	Taylor Swift	249522.0	Portland
3180	Camila	1020268.0	Albuquerque
3185	Khalid	37706.0	Albuquerque
3186	Shawn Mendes	35606.0	Albuquerque
...	...	...	...
20414	Taylor Swift	268699.0	Philadelphia
20463	Taylor Swift	268699.0	Phoenix
20562	Taylor Swift	267905.0	Portland
21842	Cardi B	67552.0	Chicago
22786	Taylor Swift	320635.0	Long Beach

	name	tweet_volume
0	Camila	1.034901e+06
8	Taylor Swift	2.721614e+05
3	Drake	7.571300e+04
1	Cardi B	6.755200e+04
7	Shawn Mendes	4.029356e+04
5	Khalid	3.883995e+04
6	Post Malone	2.632017e+04

	location	tweet_volume
46	Portland	2820806.0
44	Phoenix	2660417.0
27	Long Beach	2380677.0
8	Chicago	2117247.0
3	Baltimore	1838701.0
...	...	...
7	Charlotte	1254910.0
4	Baton Rouge	1254534.0
20	Honolulu	1254534.0
0	Albuquerque	1254534.0
16	El Paso	1234131.0

	Category	Winners
0	Favorite Artist - Alternative Rock	Panic! At The Disco
1	Favorite Artist - Adult Contemporary	Shawn Mendes
2	Favorite Artist - Latin	Daddy Yankee
3	Favorite Artist - Contemporary Inspirational	Lauren Daigle
4	Favorite Artist – Electronic Dance Music (EDM)	Marshmello
5	Favorite Soundtrack	“Black Panther: The Album, Music From And Insp...
6	Favorite Social Artist	BTS
7	New Artist of the Year	Camila Cabello
8	Collaboration of the Year	Camila Cabello ft. Young Thug “Havana”
9	Artist Of The Year	Taylor Swift

American Music Awards Tweets¶

Introduction¶

Required Tools¶

Data Preparation¶

Data Tidying¶

Preparing the trending data¶

Graphing the tweet volume per location¶

Graphing the Top 4 Cities¶

Statistical Analysis¶

Conclusion¶