# IMPORT HERE!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns


fandango = pd.read_csv("fandango_scrape.csv")


fandango.head()


fandango.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 504 entries, 0 to 503
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   FILM    504 non-null    object 
 1   STARS   504 non-null    float64
 2   RATING  504 non-null    float64
 3   VOTES   504 non-null    int64  
dtypes: float64(2), int64(1), object(1)
memory usage: 15.9+ KB


fandango.describe()


# CODE HERE
plt.figure(figsize=(12,7),dpi=150)
sns.scatterplot(data=fandango,x='RATING',y='VOTES')

<AxesSubplot:xlabel='RATING', ylabel='VOTES'>


# CODE HERE
fandango.corr()


# CODE HERE
fandango['YEAR']=fandango['FILM'].str.split('( )').str[-1]


fandango['YEAR']=fandango['YEAR'].str.replace('(','')
fandango['YEAR']=fandango['YEAR'].str.replace(')','')
fandango.head()


#CODE HERE
fandango['YEAR'].value_counts()

2015    478
2014     23
2012      1
1964      1
2016      1
Name: YEAR, dtype: int64


#CODE HERE
plt.figure(figsize=(7,5))
sns.countplot(x='YEAR',data=fandango)

<AxesSubplot:xlabel='YEAR', ylabel='count'>


#CODE HERE
fandango['VOTES'].nlargest(10).index
fandango.iloc[0:10]


#CODE HERE
fandango[fandango['VOTES']==0].count()['FILM']

69


#CODE HERE
df=fandango
df['zero_votes']=df['VOTES']==0
df=df.set_index('zero_votes')
df=df.drop(True)
df=df.reset_index()
df=df.drop('zero_votes',axis=1)


df['VOTES'].min()

1


#CODE HERE
plt.figure(figsize=(12,5))
sns.kdeplot(x='RATING',data=fandango,clip=[0,5],label='True Rating',bw_adjust=0.5,fill=True)
sns.kdeplot(x='STARS',data=fandango,clip=[0,5],label='STARS Displayed',bw_adjust=0.5,fill=True)
plt.xlim(0.9,5.9)
plt.ylim(0,0.6)
plt.legend(bbox_to_anchor=(1.3,0.5))

<matplotlib.legend.Legend at 0x23e74978948>


#CODE HERE
fandango['STARS_DIFF']=np.round(fandango['STARS']-fandango['RATING'],1)


fandango=fandango.drop('zero_votes',axis=1)


fandango


#CODE HERE
plt.figure(figsize=(12,5),dpi=200)
sns.countplot(x='STARS_DIFF',data=fandango,palette='magma')

<AxesSubplot:xlabel='STARS_DIFF', ylabel='count'>


#CODE HERE
fandango[fandango['STARS_DIFF']==1]


all_sites = pd.read_csv("all_sites_scores.csv")


all_sites.head()


all_sites.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 146 entries, 0 to 145
Data columns (total 8 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   FILM                        146 non-null    object 
 1   RottenTomatoes              146 non-null    int64  
 2   RottenTomatoes_User         146 non-null    int64  
 3   Metacritic                  146 non-null    int64  
 4   Metacritic_User             146 non-null    float64
 5   IMDB                        146 non-null    float64
 6   Metacritic_user_vote_count  146 non-null    int64  
 7   IMDB_user_vote_count        146 non-null    int64  
dtypes: float64(2), int64(5), object(1)
memory usage: 9.2+ KB


all_sites.describe()


# CODE HERE
plt.figure(figsize=(12,5),dpi=200)
plt.xlim(0,100)
plt.ylim(0,100)
sns.scatterplot(x='RottenTomatoes',y='RottenTomatoes_User',data=all_sites)

<AxesSubplot:xlabel='RottenTomatoes', ylabel='RottenTomatoes_User'>


#CODE HERE
all_sites['Rotten_Diff']=all_sites['RottenTomatoes']-all_sites['RottenTomatoes_User']


# CODE HERE
np.abs(all_sites['Rotten_Diff']).mean()

15.095890410958905


#CODE HERE
plt.figure(figsize=(17,10),dpi=500)
sns.displot(x='Rotten_Diff',data=all_sites,kde=True)

<seaborn.axisgrid.FacetGrid at 0x23e74be4948>

<Figure size 8500x5000 with 0 Axes>


#CODE HERE

plt.figure(figsize=(20,7),dpi=200)
val=all_sites['Rotten_Diff'].abs()
sns.displot(x=val,data=all_sites,bins=20,kde=True)

<seaborn.axisgrid.FacetGrid at 0x23e74bd1e88>

<Figure size 4000x1400 with 0 Axes>


# CODE HERE
f=all_sites.nsmallest(n=5,columns='Rotten_Diff')
f[['FILM','Rotten_Diff']]


# CODE HERE
f=all_sites.nlargest(n=5,columns='Rotten_Diff')
f[['FILM','Rotten_Diff']]


# CODE HERE
plt.figure(figsize=(12,5),dpi=200)
sns.scatterplot(x='Metacritic',y='Metacritic_User',data=all_sites)
plt.xlim(0,100)
plt.ylim(0,10)

(0.0, 10.0)


#CODE HERE
plt.figure(figsize=(12,5),dpi=200)
sns.scatterplot(x='Metacritic_user_vote_count',y='IMDB_user_vote_count',data=all_sites)
plt.xlim(0,2000)
plt.ylim(0,350000)

(0.0, 350000.0)


#CODE HERE
all_sites.nlargest(n=1,columns='IMDB_user_vote_count')


#CODE HERE
all_sites.nlargest(n=1,columns='Metacritic_user_vote_count')


#CODE HERE
pd.merge(all_sites,fandango,how='inner',on='FILM').info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 145 entries, 0 to 144
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   FILM                        145 non-null    object 
 1   RottenTomatoes              145 non-null    int64  
 2   RottenTomatoes_User         145 non-null    int64  
 3   Metacritic                  145 non-null    int64  
 4   Metacritic_User             145 non-null    float64
 5   IMDB                        145 non-null    float64
 6   Metacritic_user_vote_count  145 non-null    int64  
 7   IMDB_user_vote_count        145 non-null    int64  
 8   Rotten_Diff                 145 non-null    int64  
 9   STARS                       145 non-null    float64
 10  RATING                      145 non-null    float64
 11  VOTES                       145 non-null    int64  
 12  YEAR                        145 non-null    object 
 13  STARS_DIFF                  145 non-null    float64
dtypes: float64(5), int64(7), object(2)
memory usage: 17.0+ KB


all_tables=pd.merge(fandango,all_sites,how='inner',on='FILM')
all_tables


# CODE HERE
all_tables['RT_Norm']=np.round(all_tables['RottenTomatoes']/20,1)
all_tables['RTU_Norm']=np.round(all_tables['RottenTomatoes_User']/20,1)


all_tables['Meta_Norm']=np.round(all_tables['Metacritic']/20,1)
all_tables['Meta_U_Norm']=np.round(all_tables['Metacritic_User']/2,1)


all_tables['IMDB_Norm']=np.round(all_tables['IMDB']/2,1)


all_tables.head()


#CODE HERE
df1=fandango[['STARS','RATING']]
df2=all_tables[['RT_Norm','RTU_Norm','Meta_Norm','Meta_U_Norm','IMDB_Norm']]
norm_scores=pd.concat([df1,df2],1)


norm_scores.head()


#CODE HERE
norm_scores


def move_legend(ax, new_loc, **kws):
    old_legend = ax.legend_
    handles = old_legend.legendHandles
    labels = [t.get_text() for t in old_legend.get_texts()]
    title = old_legend.get_title().get_text()
    ax.legend(handles, labels, loc=new_loc, title=title, **kws)


#plt.figure(figsize=(13,11),dpi=200)
#sns.kdeplot(data=norm_scores,x='STARS',fill=True,clip=[0,5],label='STARS')
#sns.kdeplot(data=norm_scores,x='RATING',fill=True,clip=[0,5],label='RATING')
#sns.kdeplot(data=norm_scores,x='RT_Norm',fill=True,clip=[0,5],label='RT_Norm')
#sns.kdeplot(data=norm_scores,x='RTU_Norm',fill=True,clip=[0,5],label='RTU_Norm')
#sns.kdeplot(data=norm_scores,x='Meta_Norm',fill=True,clip=[0,5],label='Meta_Norm')
#sns.kdeplot(data=norm_scores,x='Meta_U_Norm',fill=True,clip=[0,5],label='Meta_U_Norm')
#sns.kdeplot(data=norm_scores,x='IMDB_Norm',fill=True,clip=[0,5],label='IMDB_Norm')

fig,ax=plt.subplots(figsize=(15,6),dpi=100)
sns.kdeplot(data=norm_scores,fill=True,clip=[0,5],ax=ax)
move_legend(ax, "upper left")


#CODE HERE
fig, ax = plt.subplots(figsize=(15,6),dpi=150)
sns.kdeplot(data=norm_scores[['RT_Norm','STARS']],clip=[0,5],fill=True,palette='Set1',ax=ax)

<AxesSubplot:ylabel='Density'>


#CODE HERE
plt.figure(figsize=(15,6),dpi=100)
sns.histplot(data=norm_scores)
move_legend(ax, "upper left")


# CODE HERE
norm_scores['FILM']=all_tables['FILM']


d=norm_scores.nsmallest(n=10,columns='RT_Norm')

d


# CODE HERE
fig,ax=plt.subplots(figsize=(15,6),dpi=150)
sns.kdeplot(data=d,clip=[0,5],ax=ax,fill=True)

<AxesSubplot:ylabel='Density'>


norm_scores.iloc[25]

STARS                     4.5
RATING                    4.2
RT_Norm                   0.4
RTU_Norm                  2.3
Meta_Norm                 1.3
Meta_U_Norm               2.3
IMDB_Norm                   3
FILM           Taken 3 (2015)
Name: 25, dtype: object


0.4+2.3+1.3+2.3+3

9.3


9.3/5

1.86

	FILM	STARS	RATING	VOTES
0	Fifty Shades of Grey (2015)	4.0	3.9	34846
1	Jurassic World (2015)	4.5	4.5	34390
2	American Sniper (2015)	5.0	4.8	34085
3	Furious 7 (2015)	5.0	4.8	33538
4	Inside Out (2015)	4.5	4.5	15749

	STARS	RATING	VOTES
count	504.000000	504.000000	504.000000
mean	3.558532	3.375794	1147.863095
std	1.563133	1.491223	3830.583136
min	0.000000	0.000000	0.000000
25%	3.500000	3.100000	3.000000
50%	4.000000	3.800000	18.500000
75%	4.500000	4.300000	189.750000
max	5.000000	5.000000	34846.000000

	STARS	RATING	VOTES
STARS	1.000000	0.994696	0.164218
RATING	0.994696	1.000000	0.163764
VOTES	0.164218	0.163764	1.000000

	FILM	STARS	RATING	VOTES	YEAR
0	Fifty Shades of Grey (2015)	4.0	3.9	34846	2015
1	Jurassic World (2015)	4.5	4.5	34390	2015
2	American Sniper (2015)	5.0	4.8	34085	2015
3	Furious 7 (2015)	5.0	4.8	33538	2015
4	Inside Out (2015)	4.5	4.5	15749	2015
5	The Hobbit: The Battle of the Five Armies (2014)	4.5	4.3	15337	2014
6	Kingsman: The Secret Service (2015)	4.5	4.2	15205	2015
7	Minions (2015)	4.0	4.0	14998	2015
8	Avengers: Age of Ultron (2015)	5.0	4.5	14846	2015
9	Into the Woods (2014)	3.5	3.4	13055	2014

	FILM	RottenTomatoes	RottenTomatoes_User	Metacritic	Metacritic_User	IMDB	Metacritic_user_vote_count	IMDB_user_vote_count
0	Avengers: Age of Ultron (2015)	74	86	66	7.1	7.8	1330	271107
1	Cinderella (2015)	85	80	67	7.5	7.1	249	65709
2	Ant-Man (2015)	80	90	64	8.1	7.8	627	103660
3	Do You Believe? (2015)	18	84	22	4.7	5.4	31	3136
4	Hot Tub Time Machine 2 (2015)	14	28	29	3.4	5.1	88	19560

Capstone Project¶

Overview¶

Goal:¶

Part One: Understanding the Background and Data¶

The Data¶

all_sites_scores.csv¶

fandango_scape.csv¶

Part Two: Exploring Fandango Displayed Scores versus True User Ratings¶

Part Three: Comparison of Fandango Ratings to Other Sites¶

Rotten Tomatoes¶

MetaCritic¶

IMDB¶

Fandago Scores vs. All Sites¶

Normalize columns to Fandango STARS and RATINGS 0-5¶

Comparing Distribution of Scores Across Sites¶

Column	Definition
FILM	The film in question
RottenTomatoes	The Rotten Tomatoes Tomatometer score for the film
RottenTomatoes_User	The Rotten Tomatoes user score for the film
Metacritic	The Metacritic critic score for the film
Metacritic_User	The Metacritic user score for the film
IMDB	The IMDb user score for the film
Metacritic_user_vote_count	The number of user votes the film had on Metacritic
IMDB_user_vote_count	The number of user votes the film had on IMDb

Column	Definiton
FILM	The movie
STARS	Number of stars presented on Fandango.com
RATING	The Fandango ratingValue for the film, as pulled from the HTML of each page. This is the actual average score the movie obtained.
VOTES	number of people who had reviewed the film at the time we pulled it.

	RottenTomatoes	RottenTomatoes_User	Metacritic	Metacritic_User	IMDB	Metacritic_user_vote_count	IMDB_user_vote_count
count	146.000000	146.000000	146.000000	146.000000	146.000000	146.000000	146.000000
mean	60.849315	63.876712	58.808219	6.519178	6.736986	185.705479	42846.205479
std	30.168799	20.024430	19.517389	1.510712	0.958736	316.606515	67406.509171
min	5.000000	20.000000	13.000000	2.400000	4.000000	4.000000	243.000000
25%	31.250000	50.000000	43.500000	5.700000	6.300000	33.250000	5627.000000
50%	63.500000	66.500000	59.000000	6.850000	6.900000	72.500000	19103.000000
75%	89.000000	81.000000	75.000000	7.500000	7.400000	168.500000	45185.750000
max	100.000000	94.000000	94.000000	9.600000	8.600000	2375.000000	334164.000000

	FILM	Rotten_Diff
3	Do You Believe? (2015)	-66
85	Little Boy (2015)	-61
105	Hitman: Agent 47 (2015)	-42
134	The Longest Ride (2015)	-42
125	The Wedding Ringer (2015)	-39

	FILM	Rotten_Diff
69	Mr. Turner (2014)	42
112	It Follows (2015)	31
115	While We're Young (2015)	31
37	Welcome to Me (2015)	24
40	I'll See You In My Dreams (2015)	24

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm
0	4.0	3.9	1.2	2.1	2.3	1.6	2.1
1	4.5	4.5	3.6	4.0	3.0	3.5	3.6
2	5.0	4.8	3.6	4.2	3.6	3.3	3.7
3	5.0	4.8	4.0	4.2	3.4	3.4	3.7
4	4.5	4.5	4.9	4.5	4.7	4.4	4.3

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm
0	4.0	3.9	1.2	2.1	2.3	1.6	2.1
1	4.5	4.5	3.6	4.0	3.0	3.5	3.6
2	5.0	4.8	3.6	4.2	3.6	3.3	3.7
3	5.0	4.8	4.0	4.2	3.4	3.4	3.7
4	4.5	4.5	4.9	4.5	4.7	4.4	4.3
...	...	...	...	...	...	...	...
499	0.0	0.0	NaN	NaN	NaN	NaN	NaN
500	0.0	0.0	NaN	NaN	NaN	NaN	NaN
501	0.0	0.0	NaN	NaN	NaN	NaN	NaN
502	0.0	0.0	NaN	NaN	NaN	NaN	NaN
503	0.0	0.0	NaN	NaN	NaN	NaN	NaN

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm	FILM
49	4.5	4.1	0.2	1.8	0.6	1.2	2.2	Paul Blart: Mall Cop 2 (2015)
25	4.5	4.2	0.4	2.3	1.3	2.3	3.0	Taken 3 (2015)
28	4.5	4.1	0.4	1.0	1.4	1.2	2.0	Fantastic Four (2015)
54	4.5	4.1	0.4	1.8	1.6	1.8	2.4	Hot Pursuit (2015)
84	5.0	4.6	0.4	2.4	1.4	1.6	3.0	Hitman: Agent 47 (2015)
50	4.0	3.5	0.5	1.8	1.5	2.8	2.3	The Boy Next Door (2015)
77	4.0	4.0	0.6	1.8	1.5	2.0	2.8	Seventh Son (2015)
78	3.0	3.0	0.6	1.5	1.4	1.6	2.8	Mortdecai (2015)
83	3.0	2.9	0.6	1.7	1.6	2.5	2.8	Sinister 2 (2015)
87	3.5	3.3	0.6	1.4	1.6	1.9	2.7	Unfinished Business (2015)

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm
0	4.0	3.9	1.2	2.1	2.3	1.6	2.1
1	4.5	4.5	3.6	4.0	3.0	3.5	3.6
2	5.0	4.8	3.6	4.2	3.6	3.3	3.7
3	5.0	4.8	4.0	4.2	3.4	3.4	3.7
4	4.5	4.5	4.9	4.5	4.7	4.4	4.3

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm
0	4.0	3.9	1.2	2.1	2.3	1.6	2.1
1	4.5	4.5	3.6	4.0	3.0	3.5	3.6
2	5.0	4.8	3.6	4.2	3.6	3.3	3.7
3	5.0	4.8	4.0	4.2	3.4	3.4	3.7
4	4.5	4.5	4.9	4.5	4.7	4.4	4.3
...	...	...	...	...	...	...	...
499	0.0	0.0	NaN	NaN	NaN	NaN	NaN
500	0.0	0.0	NaN	NaN	NaN	NaN	NaN
501	0.0	0.0	NaN	NaN	NaN	NaN	NaN
502	0.0	0.0	NaN	NaN	NaN	NaN	NaN
503	0.0	0.0	NaN	NaN	NaN	NaN	NaN

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm
0	4.0	3.9	1.2	2.1	2.3	1.6	2.1
1	4.5	4.5	3.6	4.0	3.0	3.5	3.6
2	5.0	4.8	3.6	4.2	3.6	3.3	3.7
3	5.0	4.8	4.0	4.2	3.4	3.4	3.7
4	4.5	4.5	4.9	4.5	4.7	4.4	4.3

	STARS	RATING	RT_Norm	RTU_Norm	Meta_Norm	Meta_U_Norm	IMDB_Norm
0	4.0	3.9	1.2	2.1	2.3	1.6	2.1
1	4.5	4.5	3.6	4.0	3.0	3.5	3.6
2	5.0	4.8	3.6	4.2	3.6	3.3	3.7
3	5.0	4.8	4.0	4.2	3.4	3.4	3.7
4	4.5	4.5	4.9	4.5	4.7	4.4	4.3
...	...	...	...	...	...	...	...
499	0.0	0.0	NaN	NaN	NaN	NaN	NaN
500	0.0	0.0	NaN	NaN	NaN	NaN	NaN
501	0.0	0.0	NaN	NaN	NaN	NaN	NaN
502	0.0	0.0	NaN	NaN	NaN	NaN	NaN
503	0.0	0.0	NaN	NaN	NaN	NaN	NaN