y******u 发帖数: 804 | 7 如果想连连mapreduce算法,下面python script能模拟
MapReduce.py
import json
class MapReduce:
def __init__(self):
self.intermediate = {}
self.result = []
def emit_intermediate(self, key, value):
self.intermediate.setdefault(key, [])
self.intermediate[key].append(value)
def emit(self, value):
self.result.append(value)
def execute(self, data, mapper, reducer):
for line in data:
record = json.loads(line)
mapper(record)
for key in self.intermediate:
reducer(key, self.intermediate[key])
#jenc = json.JSONEncoder(encoding='latin-1')
jenc = json.JSONEncoder()
for item in self.result:
print jenc.encode(item)
wordcount.py
import MapReduce
import sys
"""
Word Count Example in the Simple Python MapReduce Framework
"""
mr = MapReduce.MapReduce()
# =============================
# Do not modify above this line
def mapper(record):
# key: document identifier
# value: document contents
key = record[0]
value = record[1]
words = value.split()
for w in words:
mr.emit_intermediate(w, 1)
def reducer(key, list_of_values):
# key: word
# value: list of occurrence counts
total = 0
for v in list_of_values:
total += v
mr.emit((key, total))
# Do not modify below this line
# =============================
if __name__ == '__main__':
inputdata = open(sys.argv[1])
mr.execute(inputdata, mapper, reducer) |