# Read in the data.
from csv import reader

open_file = open('hacker-news-posts.csv', encoding='utf-8')
read_file = reader(open_file)
hn = list(read_file)
hn[:5]

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'],
 ['12579008',
  'You have two days to comment if you want stem cells to be classified as your own',
  'http://www.regulations.gov/document?D=FDA-2015-D-3719-0018',
  '1',
  '0',
  'altstar',
  '9/26/2016 3:26'],
 ['12579005',
  'SQLAR  the SQLite Archiver',
  'https://www.sqlite.org/sqlar/doc/trunk/README.md',
  '1',
  '0',
  'blacksqr',
  '9/26/2016 3:24'],
 ['12578997',
  'What if we just printed a flatscreen television on the side of our boxes?',
  'https://medium.com/vanmoof/our-secrets-out-f21c1f03fdc8#.ietxmez43',
  '1',
  '0',
  'pavel_lishin',
  '9/26/2016 3:19'],
 ['12578989',
  'algorithmic music',
  'http://cacm.acm.org/magazines/2011/7/109891-algorithmic-composition/fulltext',
  '1',
  '0',
  'poindontcare',
  '9/26/2016 3:16']]


# Remove the headers.
headers = hn[0]
hn = hn[1:]
print(headers)
print(hn[:5])

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
[['12579008', 'You have two days to comment if you want stem cells to be classified as your own', 'http://www.regulations.gov/document?D=FDA-2015-D-3719-0018', '1', '0', 'altstar', '9/26/2016 3:26'], ['12579005', 'SQLAR  the SQLite Archiver', 'https://www.sqlite.org/sqlar/doc/trunk/README.md', '1', '0', 'blacksqr', '9/26/2016 3:24'], ['12578997', 'What if we just printed a flatscreen television on the side of our boxes?', 'https://medium.com/vanmoof/our-secrets-out-f21c1f03fdc8#.ietxmez43', '1', '0', 'pavel_lishin', '9/26/2016 3:19'], ['12578989', 'algorithmic music', 'http://cacm.acm.org/magazines/2011/7/109891-algorithmic-composition/fulltext', '1', '0', 'poindontcare', '9/26/2016 3:16'], ['12578979', 'How the Data Vault Enables the Next-Gen Data Warehouse and Data Lake', 'https://www.talend.com/blog/2016/05/12/talend-and-Â\x93the-data-vaultÂ\x94', '1', '0', 'markgainor1', '9/26/2016 3:14']]


# Identify posts that begin with either `Ask HN` or `Show HN` and separate the data into different lists.
ask_posts = []
show_posts =[]
other_posts = []

for post in hn:
    title = post[1]
    if title.lower().startswith("ask hn"):
        ask_posts.append(post)
    elif title.lower().startswith("show hn"):
        show_posts.append(post)
    else:
        other_posts.append(post)
        
print(len(ask_posts))
print(len(show_posts))
print(len(other_posts))

9139
10158
273822


# Calculate the average number of comments `Ask HN` posts receive.
total_ask_comments = 0

for post in ask_posts:
    total_ask_comments += int(post[4])
    
avg_ask_comments = total_ask_comments / len(ask_posts)
print(avg_ask_comments)

10.393478498741656


total_show_comments = 0

for post in show_posts:
    total_show_comments += int(post[4])
    
avg_show_comments = total_show_comments / len(show_posts)
print(avg_show_comments)

4.886099625910612


# Calculate the amount of ask posts created during each hour of day and the number of comments received.
import datetime as dt

result_list = []

for post in ask_posts:
    result_list.append(
        [post[6], int(post[4])]
    )

comments_by_hour = {}
counts_by_hour = {}
date_format = "%m/%d/%Y %H:%M"

for row in result_list:
    date = row[0]
    number_comment = row[1]
    time = dt.datetime.strptime(date, date_format).hour
    if time in counts_by_hour:
        comments_by_hour[time] += number_comment
        counts_by_hour[time] += 1
    else:
        comments_by_hour[time] = number_comment
        counts_by_hour[time] = 1

comments_by_hour

{2: 2996,
 1: 2089,
 22: 3372,
 21: 4500,
 19: 3954,
 17: 5547,
 15: 18525,
 14: 4972,
 13: 7245,
 11: 2797,
 10: 3013,
 9: 1477,
 7: 1585,
 3: 2154,
 23: 2297,
 20: 4462,
 16: 4466,
 8: 2362,
 0: 2277,
 18: 4877,
 12: 4234,
 4: 2360,
 6: 1587,
 5: 1838}


# Calculate the average amount of comments `Ask HN` posts created at each hour of the day receive.
avg_by_hour = {}

for hr in comments_by_hour:
    avg_by_hour[hr] = comments_by_hour[hr] / counts_by_hour[hr]

avg_by_hour

{2: 11.137546468401487,
 1: 7.407801418439717,
 22: 8.804177545691905,
 21: 8.687258687258687,
 19: 7.163043478260869,
 17: 9.449744463373083,
 15: 28.676470588235293,
 14: 9.692007797270955,
 13: 16.31756756756757,
 11: 8.96474358974359,
 10: 10.684397163120567,
 9: 6.653153153153153,
 7: 7.013274336283186,
 3: 7.948339483394834,
 23: 6.696793002915452,
 20: 8.749019607843136,
 16: 7.713298791018998,
 8: 9.190661478599221,
 0: 7.5647840531561465,
 18: 7.94299674267101,
 12: 12.380116959064328,
 4: 9.7119341563786,
 6: 6.782051282051282,
 5: 8.794258373205741}


inverse = [(avg, hour) for hour , avg in avg_by_hour.items()]
max(inverse)

(28.676470588235293, 15)


key = max(avg_by_hour, key=avg_by_hour.get)
value = max(avg_by_hour.values())
print(key, value)

15 28.676470588235293


import operator
max(avg_by_hour.items(), key=operator.itemgetter(1))

(15, 28.676470588235293)


avg_by_hour_list = []

for key, value in avg_by_hour.items():
    avg_by_hour_list.append([value, key])

sorted_list = sorted(avg_by_hour_list, reverse=True)

sorted_list

[[28.676470588235293, 15],
 [16.31756756756757, 13],
 [12.380116959064328, 12],
 [11.137546468401487, 2],
 [10.684397163120567, 10],
 [9.7119341563786, 4],
 [9.692007797270955, 14],
 [9.449744463373083, 17],
 [9.190661478599221, 8],
 [8.96474358974359, 11],
 [8.804177545691905, 22],
 [8.794258373205741, 5],
 [8.749019607843136, 20],
 [8.687258687258687, 21],
 [7.948339483394834, 3],
 [7.94299674267101, 18],
 [7.713298791018998, 16],
 [7.5647840531561465, 0],
 [7.407801418439717, 1],
 [7.163043478260869, 19],
 [7.013274336283186, 7],
 [6.782051282051282, 6],
 [6.696793002915452, 23],
 [6.653153153153153, 9]]


# Sort the values and print the the 5 hours with the highest average comments.

print("Top 5 Hours for 'Ask HN' Comments")
for avg, hr in sorted_list[:5]:
    hour = dt.datetime.strptime(str(hr), "%H").strftime("%H:%M")
    print(
        "{}: {:.2f} average comments per post".format(hour ,avg)
    )

Top 5 Hours for 'Ask HN' Comments
15:00: 28.68 average comments per post
13:00: 16.32 average comments per post
12:00: 12.38 average comments per post
02:00: 11.14 average comments per post
10:00: 10.68 average comments per post

Exploring Hackers News Posts¶

Read data¶

Removing Headers from a List of Lists¶

Extracting Ask HN and Show HN Posts¶

Calculating the Average Number of Comments for Ask HN and Show HN Posts¶

Finding the Amount of Ask Posts and Comments by Hour Created¶

Calculating the Average Number of Comments for Ask HN Posts by Hour¶

Sorting and Printing Values from a List of Lists¶

Find the max value in a Dictionary.¶

Convert the Dictionary to list and then sort¶

Conclusion¶