Use MRUnit for testing your MapReduce program

In the WordCount(HelloWorld) MapReduce program entry i blogged about how you can create your WordCount (HelloWorld) MapReduce program. You can use Apache MRUnit which is a unit testing framework for testing your MapReduce program. I used MRUnit for developing unit tests for my WordCount program.
  1. First i did create a unit test for my WordCountMapper class like this, Basic idea here is you set input and expected output on the test class and then execute the test by calling mapDriver.runTest()
    package com.spnotes.hadoop;
    import static org.junit.Assert.*;
    import java.util.ArrayList;
    import java.util.List;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mrunit.mapreduce.MapDriver;
    import org.apache.hadoop.mrunit.types.Pair;
    import org.junit.Before;
    import org.junit.Test;
    public class WordCountMapperTest {
    MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
    @Before
    public void setup(){
    WordCountMapper mapper = new WordCountMapper();
    mapDriver = MapDriver.newMapDriver(mapper);// <LongWritable, Text, Text, IntWritable>.
    }
    @Test
    public void testOneWord() throws Exception{
    mapDriver.withInput(new LongWritable(1), new Text("This"));
    mapDriver.withOutput(new Text("This"), new IntWritable(1));
    mapDriver.runTest();
    }
    @Test
    public void testThreeWords() throws Exception{
    mapDriver.withInput(new LongWritable(1), new Text("This is test"));
    List<Pair<Text, IntWritable>> output = new ArrayList<Pair<Text,IntWritable>>();
    output.add(new Pair<Text, IntWritable>(new Text("This"), new IntWritable(1)));
    output.add(new Pair<Text, IntWritable>(new Text("is"), new IntWritable(1)));
    output.add(new Pair<Text, IntWritable>(new Text("test"), new IntWritable(1)));
    mapDriver.withAllOutput(output);
    mapDriver.runTest();
    }
    @Test
    public void testRepeatedWord() throws Exception{
    mapDriver.withInput(new LongWritable(1), new Text("test test test"));
    List<Pair<Text, IntWritable>> output = new ArrayList<Pair<Text,IntWritable>>();
    output.add(new Pair<Text, IntWritable>(new Text("test"), new IntWritable(1)));
    output.add(new Pair<Text, IntWritable>(new Text("test"), new IntWritable(1)));
    output.add(new Pair<Text, IntWritable>(new Text("test"), new IntWritable(1)));
    mapDriver.withAllOutput(output);
    mapDriver.runTest();
    }
    @Test
    public void testOrderingOfKeys() throws Exception{
    mapDriver.withInput(new LongWritable(1), new Text("ccc bbb aaa"));
    List<Pair<Text, IntWritable>> output = new ArrayList<Pair<Text,IntWritable>>();
    output.add(new Pair<Text, IntWritable>(new Text("ccc"), new IntWritable(1)));
    output.add(new Pair<Text, IntWritable>(new Text("bbb"), new IntWritable(1)));
    output.add(new Pair<Text, IntWritable>(new Text("aaa"), new IntWritable(1)));
    mapDriver.withAllOutput(output);
    mapDriver.runTest();
    }
    }
  2. Then i did create a unit test for my WordCountReducer class like this
    package com.spnotes.hadoop;
    import static org.junit.Assert.*;
    import java.util.ArrayList;
    import java.util.List;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
    import org.junit.Before;
    import org.junit.Test;
    public class WordCountReducerTest {
    private ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;
    @Before
    public void setup(){
    WordCountReducer wordCountReducer = new WordCountReducer();
    reduceDriver = ReduceDriver.newReduceDriver(new WordCountReducer());
    }
    @Test
    public void testSimpleReduce(){
    List<IntWritable> wordCountList = new ArrayList<IntWritable>();
    wordCountList.add(new IntWritable(1));
    wordCountList.add(new IntWritable(1));
    wordCountList.add(new IntWritable(1));
    reduceDriver.withInput(new Text("test"), wordCountList);
    reduceDriver.withOutput(new Text("test"), new IntWritable(3));
    }
    }
  3. Last part was to develop a end to end test, in this case you setup both mapper and reducer class that you want to set and then run it end to end
    package com.spnotes.hadoop;
    import static org.junit.Assert.*;
    import java.util.ArrayList;
    import java.util.List;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mrunit.mapreduce.MapDriver;
    import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
    import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
    import org.apache.hadoop.mrunit.types.Pair;
    import org.junit.Before;
    import org.junit.Test;
    public class WordCountMRTest {
    MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable> mapReduceDriver;
    MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
    ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;
    @Before
    public void setUp() {
    WordCountMapper mapper = new WordCountMapper();
    WordCountReducer reducer = new WordCountReducer();
    mapDriver = new MapDriver<LongWritable, Text, Text, IntWritable>();
    mapDriver.setMapper(mapper);
    reduceDriver = new ReduceDriver<Text, IntWritable, Text, IntWritable>();
    reduceDriver.setReducer(reducer);
    mapReduceDriver = new MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable>();
    mapReduceDriver.setMapper(mapper);
    mapReduceDriver.setReducer(reducer);
    }
    @Test
    public void testSimple()throws Exception {
    mapReduceDriver.withInput(new LongWritable(1), new Text("This test is simple test"));
    List<Pair<Text, IntWritable>> outputs = new ArrayList<Pair<Text,IntWritable>>();
    outputs.add(new Pair<Text, IntWritable>(new Text("This"),new IntWritable(1)));
    outputs.add(new Pair<Text, IntWritable>(new Text("is"),new IntWritable(1)));
    outputs.add(new Pair<Text, IntWritable>(new Text("simple"),new IntWritable(1)));
    outputs.add(new Pair<Text, IntWritable>(new Text("test"),new IntWritable(2)));
    mapReduceDriver.withAllOutput(outputs);
    mapReduceDriver.runTest();
    }
    }