3.19.2. Map Reduce Word Count Example

class WordCount {

  class TokenizerMapper
    extends Mapper<Object, Text, Text, IntWritable> {

    // Local variables for the map function.
    // Here to avoid frequent allocation.
    IntWritable one = new IntWritable (1);
    Text word = new Text ();

    void map (Object key, Text value, Context context) {
      StringTokenizer itr = new StringTokenizer (value.toString ());
      while (itr.hasMoreTokens ()) {
        word.set (itr.nextToken ());
        context.write (word, one);
      }
    }
  }

  class IntSumReducer
    extends Reducer<Text,IntWritable,Text,IntWritable> {

    // Local variables for the reduce function.
    // Here to avoid frequent allocation.
    IntWritable result = new IntWritable ();

    void reduce (Text key, Iterable<IntWritable> values, Context context) {
      int sum = 0;
      for (IntWritable val : values) {
        sum += val.get ();
      }
      result.set (sum);
      context.write (key, result);
    }
  }

  public static void main (String [] args) {

    ...

    Job job = new Job (conf, "Word Count Job");
    job.setJarByClass (WordCount.class);
    job.setMapperClass (TokenizerMapper.class);
    job.setCombinerClass (IntSumReducer.class);
    job.setReducerClass (IntSumReducer.class);
    job.setOutputKeyClass (Text.class);
    job.setOutputValueClass (IntWritable.class);

    FileInputFormat.addInputPath (job, new Path ("input"));
    FileOutputFormat.setOutputPath (job, new Path ("output"));

    System.exit (job.waitForCompletion (true) ? 0 : 1);
  }
}

// Example adjusted from sources, see references.
// Apache License applies to the example.