#!/usr/bin/env python import random # 'abc..z' alphaStr = "".join(map(chr, range(97,123))) fp = open("word.txt", "w") maxIter = 100000 for i in range(maxIter): word = "" len =random.randint(1,5) for j in range(len): word + = alphaStr[random.randint(0,25)] fp.write(word + '/n') fp.close() cat word.txt | ./wordcount_mapper.py | ./wordcount_reducer.py . word count reduce, python #filename: wordcount_reducer.py from operator import itemgetter import sys wordcount = {} for line in sys.stdin: word, count = line.strip().split('/t',1) try: count = int(count) wordcount[word] = wordcount.get(word,0) + count except ValueError pass sorted_wordcount = sorted(wordcount.iterms(), key = itemgettter(0)) for word,count in sorted_wordcount: print("%s/t%s") %(word, count)
原创文章,作者:奋斗,如若转载,请注明出处:https://blog.ytso.com/tech/pnotes/8145.html