-
Notifications
You must be signed in to change notification settings - Fork 7
/
users.py
executable file
·112 lines (90 loc) · 3.97 KB
/
users.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import datetime, glob, json, re
from collections import Counter, defaultdict
import threads, main
###########################################################################
# Allows for the API user to examine a single User in the data set and look
# at various features of their comments
###########################################################################
class User(object):
"""this will hold the data we'd like for a single user,
such as the comments, the times they were made, threads the
user commented in, etc."""
def __init__(self, userID, COMMENTS):
"""userID is the Facebook ID of the user, and COMMENTS is a
list containing all scraped comments for them."""
#all the data
self.COMMENTS = COMMENTS
#user ID
self.userID = userID
#list of the total text of the comments made
self.all_text = [comment['message'] for comment in COMMENTS]
#list of the times of the comments
self.all_times = [threads.Thread.getTime(x['time'])
for x in COMMENTS]
#list of comment/time tuples
self.all_text_time = zip(self.all_text, self.all_times)
#list of the threads commented in
self.all_threads = [User.getThread(comment)
for comment in COMMENTS]
#same things but of the replies made. this won't always be necessary,
#so just make it a method
self.reply_text = []
self.reply_times = []
self.reply_text_time = []
self.reply_threads = []
#now the same categories but for the top-level comments only.
#again this won't always be needed, so put the initialization in a
#method
self.top_text = []
self.top_times = []
self.top_text_time = []
self.top_threads = []
def getStructure(self):
"""setup the top-level and reply parameters"""
for comment in self.COMMENTS:
message = comment['message']
time = threads.Thread.getTime(comment['time'])
thread = User.getThread(comment)
if comment['rep_index']: #this is a reply
self.reply_text_time.append((message, time))
self.reply_threads.append(thread)
else: #top-level
self.top_text_time.append((message, time))
self.top_threads.append(thread)
if self.reply_text_time: #can't unpack null
self.reply_text, self.reply_times = zip(*self.reply_text_time)
if self.top_text_time:
self.top_text, self.top_times = zip(*self.top_text_time)
def counterThreads(self, choice = "All"):
"""returns the list of threads as a Counter for easier analysis. user
may choose between all, top-level, and replies using _choice_"""
if choice == 'All':
THREADS = self.all_threads
elif choice == 'Top':
THREADS = self.top_threads
elif choice == 'Reply':
THREADS = self.reply_threads
thread_counter = Counter()
for thread in THREADS:
thread_counter[thread] += 1
return thread_counter
def responseTimes(self, choice = "All"):
"""return a list of time taken from the previous comment in the thread
till the comment was made."""
response_times = []
if choice == "All":
for comment in self.COMMENTS:
response_times.append(comment['response'])
elif choice == "Top":
for comment in self.COMMENTS:
if not comment['rep_index']:
response_times.append(comment['response'])
elif choice == "Reply":
for comment in self.COMMENTS:
if comment['rep_index']:
response_times.append(comment['response'])
return response_times
@staticmethod
def getThread(comment):
"""given a _comment_ dict will return the thread ID"""
return comment['id'].split('_')[0]