3.19.2. Map Reduce Word Count Example
class WordCount {
class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable> {
// Local variables for the map function.
// Here to avoid frequent allocation.
IntWritable one = new IntWritable (1);
Text word = new Text ();
void map (Object key, Text value, Context context) {
StringTokenizer itr = new StringTokenizer (value.toString ());
while (itr.hasMoreTokens ()) {
word.set (itr.nextToken ());
context.write (word, one);
}
}
}
class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
// Local variables for the reduce function.
// Here to avoid frequent allocation.
IntWritable result = new IntWritable ();
void reduce (Text key, Iterable<IntWritable> values, Context context) {
int sum = 0;
for (IntWritable val : values) {
sum += val.get ();
}
result.set (sum);
context.write (key, result);
}
}
public static void main (String [] args) {
...
Job job = new Job (conf, "Word Count Job");
job.setJarByClass (WordCount.class);
job.setMapperClass (TokenizerMapper.class);
job.setCombinerClass (IntSumReducer.class);
job.setReducerClass (IntSumReducer.class);
job.setOutputKeyClass (Text.class);
job.setOutputValueClass (IntWritable.class);
FileInputFormat.addInputPath (job, new Path ("input"));
FileOutputFormat.setOutputPath (job, new Path ("output"));
System.exit (job.waitForCompletion (true) ? 0 : 1);
}
}
// Example adjusted from sources, see references.
// Apache License applies to the example.