import pandas as pd
star_wars = pd.read_csv("star_wars.csv", encoding="ISO-8859-1")


star_wars.head()


star_wars.columns

Index(['RespondentID',
       'Have you seen any of the 6 films in the Star Wars franchise?',
       'Do you consider yourself to be a fan of the Star Wars film franchise?',
       'Which of the following Star Wars films have you seen? Please select all that apply.',
       'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8',
       'Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.',
       'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13',
       'Unnamed: 14',
       'Please state whether you view the following characters favorably, unfavorably, or are unfamiliar with him/her.',
       'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19',
       'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23',
       'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27',
       'Unnamed: 28', 'Which character shot first?',
       'Are you familiar with the Expanded Universe?',
       'Do you consider yourself to be a fan of the Expanded Universe?ï¿½ï¿½',
       'Do you consider yourself to be a fan of the Star Trek franchise?',
       'Gender', 'Age', 'Household Income', 'Education',
       'Location (Census Region)'],
      dtype='object')


yes_no = {'Yes': True, 'No': False}

for col in [
    "Have you seen any of the 6 films in the Star Wars franchise?",
    "Do you consider yourself to be a fan of the Star Wars film franchise?"
    ]:
    star_wars[col] = star_wars[col].map(yes_no)

star_wars.head()


import numpy as np

movie_mapping = {
    "Star Wars: Episode I  The Phantom Menace": True,
    np.nan: False,
    "Star Wars: Episode II  Attack of the Clones": True,
    "Star Wars: Episode III  Revenge of the Sith": True,
    "Star Wars: Episode IV  A New Hope": True,
    "Star Wars: Episode V The Empire Strikes Back": True,
    "Star Wars: Episode VI Return of the Jedi": True
}

for col in star_wars.columns[3:9]:
    star_wars[col] = star_wars[col].map(movie_mapping)


star_wars = star_wars.rename(columns={
        "Which of the following Star Wars films have you seen? Please select all that apply.": "seen_1",
        "Unnamed: 4": "seen_2",
        "Unnamed: 5": "seen_3",
        "Unnamed: 6": "seen_4",
        "Unnamed: 7": "seen_5",
        "Unnamed: 8": "seen_6"
        })

star_wars.head()


star_wars = star_wars.rename(columns={
        "Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.": "ranking_1",
        "Unnamed: 10": "ranking_2",
        "Unnamed: 11": "ranking_3",
        "Unnamed: 12": "ranking_4",
        "Unnamed: 13": "ranking_5",
        "Unnamed: 14": "ranking_6"
        })

star_wars.head()


star_wars[star_wars.columns[9:15]] = star_wars[star_wars.columns[9:15]].astype(float)


star_wars[star_wars.columns[9:15]].mean()

ranking_1    3.732057
ranking_2    4.084827
ranking_3    4.337321
ranking_4    3.273596
ranking_5    2.516129
ranking_6    3.051374
dtype: float64


%matplotlib inline
import matplotlib.pyplot as plt

plt.bar(range(1,7,1), star_wars[star_wars.columns[9:15]].mean())

<BarContainer object of 6 artists>


star_wars[star_wars.columns[3:9]].sum()

seen_1    674
seen_2    572
seen_3    551
seen_4    608
seen_5    759
seen_6    739
dtype: int64


plt.bar(range(1,7,1), star_wars[star_wars.columns[3:9]].sum())

<BarContainer object of 6 artists>


males = star_wars[star_wars["Gender"] == "Male"]
females = star_wars[star_wars["Gender"] == "Female"]


import matplotlib.style as style
style.use('ggplot')

plt.figure(figsize=(10,3))
plt.subplot(1,2,1)
plt.bar(range(1,7,1), males[males.columns[9:15]].mean(), color='lightblue')
plt.ylim(2,5)
plt.title('Males Ranking')

plt.subplot(1,2,2)
plt.bar(range(1,7,1), females[females.columns[9:15]].mean())
plt.ylim(2,5)
plt.title('Females Ranking')

plt.show()


style.use('ggplot')

plt.figure(figsize=(10,3))
plt.subplot(1,2,1)
plt.bar(range(1,7,1), males[males.columns[3:9]].sum(), color='lightblue')
plt.ylim(200,400)
plt.title('Males Views')

plt.subplot(1,2,2)
plt.bar(range(1,7,1), females[females.columns[3:9]].sum())
plt.title('Females Views')
plt.ylim(200,400)

plt.show()

	RespondentID	Have you seen any of the 6 films in the Star Wars franchise?	Do you consider yourself to be a fan of the Star Wars film franchise?	Which of the following Star Wars films have you seen? Please select all that apply.	Unnamed: 4	Unnamed: 5	Unnamed: 6	Unnamed: 7	Unnamed: 8	Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.	...	Unnamed: 28	Which character shot first?	Are you familiar with the Expanded Universe?	Do you consider yourself to be a fan of the Expanded Universe?ï¿½ï¿½	Do you consider yourself to be a fan of the Star Trek franchise?	Gender	Age	Household Income	Education	Location (Census Region)
0	NaN	Response	Response	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	Star Wars: Episode IV A New Hope	Star Wars: Episode V The Empire Strikes Back	Star Wars: Episode VI Return of the Jedi	3.0	...	Yoda	Response	Response	Response	Response	Response	Response	Response	Response	Response
1	3.292880e+09	Yes	Yes	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	Star Wars: Episode IV A New Hope	Star Wars: Episode V The Empire Strikes Back	Star Wars: Episode VI Return of the Jedi	3.0	...	Very favorably	I don't understand this question	Yes	No	No	Male	18-29	NaN	High school degree	South Atlantic
2	3.292880e+09	No	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	Yes	Male	18-29	$0 - $24,999	Bachelor degree	West South Central
3	3.292765e+09	Yes	No	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	NaN	NaN	NaN	1.0	...	Unfamiliar (N/A)	I don't understand this question	No	NaN	No	Male	18-29	$0 - $24,999	High school degree	West North Central
4	3.292763e+09	Yes	Yes	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	Star Wars: Episode IV A New Hope	Star Wars: Episode V The Empire Strikes Back	Star Wars: Episode VI Return of the Jedi	5.0	...	Very favorably	I don't understand this question	No	NaN	Yes	Male	18-29	$100,000 - $149,999	Some college or Associate degree	West North Central

	RespondentID	Have you seen any of the 6 films in the Star Wars franchise?	Do you consider yourself to be a fan of the Star Wars film franchise?	Which of the following Star Wars films have you seen? Please select all that apply.	Unnamed: 4	Unnamed: 5	Unnamed: 6	Unnamed: 7	Unnamed: 8	Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.	...	Unnamed: 28	Which character shot first?	Are you familiar with the Expanded Universe?	Do you consider yourself to be a fan of the Expanded Universe?ï¿½ï¿½	Do you consider yourself to be a fan of the Star Trek franchise?	Gender	Age	Household Income	Education	Location (Census Region)
0	NaN	NaN	NaN	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	Star Wars: Episode IV A New Hope	Star Wars: Episode V The Empire Strikes Back	Star Wars: Episode VI Return of the Jedi	3.0	...	Yoda	Response	Response	Response	Response	Response	Response	Response	Response	Response
1	3.292880e+09	True	True	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	Star Wars: Episode IV A New Hope	Star Wars: Episode V The Empire Strikes Back	Star Wars: Episode VI Return of the Jedi	3.0	...	Very favorably	I don't understand this question	Yes	No	No	Male	18-29	NaN	High school degree	South Atlantic
2	3.292880e+09	False	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	Yes	Male	18-29	$0 - $24,999	Bachelor degree	West South Central
3	3.292765e+09	True	False	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	NaN	NaN	NaN	1.0	...	Unfamiliar (N/A)	I don't understand this question	No	NaN	No	Male	18-29	$0 - $24,999	High school degree	West North Central
4	3.292763e+09	True	True	Star Wars: Episode I The Phantom Menace	Star Wars: Episode II Attack of the Clones	Star Wars: Episode III Revenge of the Sith	Star Wars: Episode IV A New Hope	Star Wars: Episode V The Empire Strikes Back	Star Wars: Episode VI Return of the Jedi	5.0	...	Very favorably	I don't understand this question	No	NaN	Yes	Male	18-29	$100,000 - $149,999	Some college or Associate degree	West North Central

Star Wars Servey¶

Overview¶

Read the Data¶

Cleaning and Mapping Yes/No Columns¶

Cleaning and Mapping Checkbox Columns¶

Cleaning the Ranking Columns¶

Finding the Highest-Ranked Movie¶

Finding the Most Viewed Movie¶

Exploring the Data by Binary Segments¶