-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathgetindex_python.py
More file actions
56 lines (42 loc) · 1.21 KB
/
getindex_python.py
File metadata and controls
56 lines (42 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
"""
Created on Sun May 26 09:59:52 2019
@author: lliu9
"""
from mrjob.job import MRJob
import csv
class MRQnsTags(MRJob):
"""
get the acceptedanswerid, date viewcount of questions with top tags
"""
def mapper(self, _, line):
'''
read in the post file
filter the tags
return the key: acceptid; value: counts
'''
line = csv.reader([line], quotechar='|').__next__()
try:
#locate 'questions'
if line[1] =='1':
tags = line[14].replace('<','').split('>')
acceptid = line[3]
counts = line[6]
try:
if "python" in tags and acceptid != '':
yield acceptid, counts
except ValueError:
pass
except IndexError:
pass
def reducer(self, index, body):
'''
return the key: acceptid; value: counts
this step doesn't do any aggragation as all acceptid is unique
'''
try:
yield index, body
except (TypeError, ValueError):
pass
if __name__ == '__main__':
MRQnsTags.run()