Thursday, March 29, 2012

Improvement over BigTop's TestHadoopExamples.groovy with YAML and TestRunHadoopExamples.groovy

Thanks for feedbacks from Wing Yew and Roman, I was set out to improve

./bigtop-tests/test-artifacts/hadoop/src/main/groovy/org/apache/bigtop/itest/hadoopexamples/TestHadoopExamples.groovy

using YAML, Groovy and existing BigTop's itest infrastructure. YAML is better than XML to capture the list of Shell commands.

I have made the improvement on the following two areas:

1. Move the commands inside TestHadoopExamples.groovy to YAML file, so that we need NOT compile the code if add/change the test cases.

2. Introduce the comparator to compare output with the expected to verify the commands' execution. For example, when calculating Pi, make sure the end result matched with the known number.

The code has been tested on ubuntu 10.04 at AWS, and jira submitted.


Here are the test cases in TestHadoopExamples.groovy:
 
  static Map examples =
    [
        pi                :'20 10',
        wordcount         :"$EXAMPLES/text $EXAMPLES_OUT/wordcount",
        multifilewc       :"$EXAMPLES/text $EXAMPLES_OUT/multifilewc",
//        aggregatewordcount:"$EXAMPLES/text $EXAMPLES_OUT/aggregatewordcount 5 textinputformat",
//        aggregatewordhist :"$EXAMPLES/text $EXAMPLES_OUT/aggregatewordhist 5 textinputformat",
        grep              :"$EXAMPLES/text $EXAMPLES_OUT/grep '[Cc]uriouser'",
        sleep             :"-m 10 -r 10",
        secondarysort     :"$EXAMPLES/ints $EXAMPLES_OUT/secondarysort",
        randomtextwriter  :"-Dtest.randomtextwrite.total_bytes=1073741824 $EXAMPLES_OUT/randomtextwriter"
    ];


Here is the YAML content that has all test cases covered.
 

- !!org.apache.bigtop.itest.hadoopexamples.BigTopIntegrationTest
  integrationTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop jar $HADOOP_HOME/hadoop-examples-*.jar pi 5 5,
    commandComparator: echo "Pi is 3.68", comparatorClass: org.apache.hadoop.cli.util.SubstringComparator}
  postTestCommandList: []
  preTestCommandList: []
  testDesc: calculate pi using hadoop MR
  testName: calculate pi
- !!org.apache.bigtop.itest.hadoopexamples.BigTopIntegrationTest
  integrationTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop jar $HADOOP_HOME/hadoop-examples-*.jar wordcount /wordcount /wordcount_out,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: mkdir ./wordcount_out,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -get /wordcount_out/* ./wordcount_out,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -rmr /wordcount,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -rmr /wordcount_out/,
    commandComparator: null, comparatorClass: null}
  postTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: 'cat ./wordcount_out/*
      | grep  Roman | sed ''s/[^0-9.]*\([0-9.]*\).*/\1/''', commandComparator: cat wordcount/* | grep -c Roman,
    comparatorClass: org.apache.bigtop.itest.hadoopexamples.ExtactComparatorIgnoreWhiteSpace}
  preTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: rm -rf ./wordcount,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: rm -rf ./wordcount_out,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: mkdir ./wordcount,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: 'curl http://www.meetup.com/HandsOnProgrammingEvents/events/53837022/
      | sed -e :a -e ''s/<[^>]*>//g;/</N;//ba'' | sed ''s/&nbsp//g'' | sed ''s/^[
      \t]*//;s/[ \t]*$//''  | sed ''/^$/d'' | sed ''/"http[^"]*"/d'' > ./wordcount/content',
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -mkdir /wordcount,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -put ./wordcount/* /wordcount,
    commandComparator: null, comparatorClass: null}
  testDesc: count word in Hadoop MR
  testName: count word in MR
- !!org.apache.bigtop.itest.hadoopexamples.BigTopIntegrationTest
  integrationTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -rmr examples-output/wordcount,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop jar $HADOOP_HOME/hadoop-examples-*.jar wordcount examples/text examples-output/wordcount,
    commandComparator: null, comparatorClass: null}
  postTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: 'hadoop
      fs -cat  examples-output/wordcount/part* | grep "Commission" | sed ''s/[^0-9]*\([0-9]\+\).*/\1/''
      | tr ''\n'' '' '' | sed "s/\(^.*$\)/\1\n/" | sed ''s/^[[:space:]]*//;s/[[:space:]]*$//''
      | sed -e ''s/ /+/g'' | bc', commandComparator: hadoop fs -cat  examples/text/* | grep -c "Commission",
    comparatorClass: org.apache.bigtop.itest.hadoopexamples.ExtactComparatorIgnoreWhiteSpace}
  preTestCommandList: []
  testDesc: countword in TestHadoopExamples.groovy
  testName: countword in MR
- !!org.apache.bigtop.itest.hadoopexamples.BigTopIntegrationTest
  integrationTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -rmr examples-output/multifilewc,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop jar $HADOOP_HOME/hadoop-examples-*.jar multifilewc examples/text examples-output/multifilewc,
    commandComparator: null, comparatorClass: null}
  postTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: 'hadoop
      fs -cat  examples-output/multifilewc/part* | grep "Ambassadors" | sed ''s/[^0-9]*\([0-9]\+\).*/\1/''
      | tr ''\n'' '' '' | sed "s/\(^.*$\)/\1\n/" | sed ''s/^[[:space:]]*//;s/[[:space:]]*$//''
      | sed -e ''s/ /+/g'' | bc', commandComparator: hadoop fs -cat  examples/text/* | grep -c "Ambassadors",
    comparatorClass: org.apache.bigtop.itest.hadoopexamples.ExtactComparatorIgnoreWhiteSpace}
  preTestCommandList: []
  testDesc: multifilewcin TestHadoopExamples.groovy
  testName: multifilewc test in MR
- !!org.apache.bigtop.itest.hadoopexamples.BigTopIntegrationTest
  integrationTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop fs -rmr examples-output/grep,
    commandComparator: null, comparatorClass: null}
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: 'hadoop
      jar $HADOOP_HOME/hadoop-examples-*.jar grep examples/text examples-output/grep   ''[Cc]uriouser''',
    commandComparator: null, comparatorClass: null}
  postTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: 'hadoop
      fs -cat examples-output/grep/part* | sed ''s/[0-9]*//g'' | sed ''s/Curiouser/curiouser/g''',
    commandComparator: echo "curiousercuriouser", comparatorClass: org.apache.bigtop.itest.hadoopexamples.ExtactComparatorIgnoreWhiteSpace}
  preTestCommandList: []
  testDesc: grep in TestHadoopExamples.groovy
  testName: grep in MR
- !!org.apache.bigtop.itest.hadoopexamples.BigTopIntegrationTest
  integrationTestCommandList:
  - !!org.apache.bigtop.itest.hadoopexamples.BigTopTestCommand {command: hadoop jar $HADOOP_HOME/hadoop-examples-*.jar sleep -m 10 -r 10,
    commandComparator: null, comparatorClass: null}
  postTestCommandList: []
  preTestCommandList: []
  testDesc: sleep in TestHadoopExamples.groovy
  testName: sleep in MR





Here is the content of TestRunHadoopExamples.groovy:
 


/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.bigtop.itest.hadoopexamples

import java.util.Map;

import org.junit.Ignore
import org.junit.Test
import org.junit.runner.RunWith;
import org.junit.BeforeClass
import org.junit.runners.Parameterized.Parameters;

import static org.junit.Assert.assertTrue
import static org.junit.Assert.assertNotNull


import org.apache.bigtop.itest.junit.OrderedParameterized;
import org.apache.bigtop.itest.shell.Shell;


import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log

import org.apache.hadoop.cli.util.ComparatorBase

import org.apache.bigtop.itest.junit.OrderedParameterized
import org.junit.runners.Parameterized.Parameters
import org.junit.runner.RunWith

import org.yaml.snakeyaml.Yaml

import org.apache.hadoop.conf.Configuration
import org.apache.bigtop.itest.JarContent



@RunWith(OrderedParameterized.class)
class TestRunHadoopExamples {
 
 static private Log LOG = LogFactory.getLog(TestRunHadoopExamples.class);
 static private String TEST_CASE_FILE_NAME = "./bigtop-testcases.yaml";
 //static private String TEST_CASE_FILE_NAME = "/home/ubuntu/bigtop/bigtop-tests/test-execution/smokes/hadoop/bigtop-testcases.yaml";
 static private String TEST_CASE_NAME_PREFIX = 'TestRunHadoopExamples';
 
 static private Shell sh = new Shell("/bin/bash -s");

 private static final String HADOOP_HOME = System.getenv('HADOOP_HOME');
 private static final String HADOOP_CONF_DIR = System.getenv('HADOOP_CONF_DIR');
 private static String hadoopExamplesJar = JarContent.getJarName(HADOOP_HOME, 'hadoop.*examples.*.jar');
 static {
  assertNotNull("HADOOP_HOME has to be set to run this test", HADOOP_HOME);
  assertNotNull("HADOOP_CONF_DIR has to be set to run this test", HADOOP_CONF_DIR);
  assertNotNull("Can't find hadoop-examples.jar file", hadoopExamplesJar);
 }
 private static Configuration conf;
 private static String HADOOP_OPTIONS;
 private static final String EXAMPLES = "examples";
 private static final String EXAMPLES_OUT = "examples-output";
  
 
 private String testName;
 private String testCaseString;
 

 private String stripOutLeadingBracket (String str) {
  if (str==null) return str;
  if (str.length()<2) return str;
  if (str.startsWith("[") && str.endsWith("]")) {
   return str.substring(1,str.length()-1)
  } else {
   return str;
  }
 }

 private static List<BigTopIntegrationTest> loadBigTopIntegrationTestCases (String fileName) {
  String fileContents = new File(fileName).text
  List<BigTopIntegrationTest> testCaseList2 = new Yaml().load(fileContents)
 }
 
 @BeforeClass
 static void setUp() {
  String skipSetup = System.getProperty('bigtop.itest.skip.setup');
  if (skipSetup!=null && skipSetup.length()>0)
   return;
  
  LOG.info("Start setUp") 
  conf = new Configuration();
  conf.addResource('mapred-site.xml');
  HADOOP_OPTIONS = "-fs ${conf.get('fs.default.name')} -jt ${conf.get('mapred.job.tracker')}";
  // Unpack resource
  JarContent.unpackJarContainer(TestRunHadoopExamples.class, '.' , null)
  
  sh.exec("hadoop fs $HADOOP_OPTIONS -test -e $EXAMPLES");
  if (sh.getRet() == 0) {
   sh.exec("hadoop fs $HADOOP_OPTIONS -rmr -skipTrash $EXAMPLES");
   assertTrue("Deletion of previous $EXAMPLES from HDFS failed", sh.getRet() == 0);
  }
  sh.exec("hadoop fs $HADOOP_OPTIONS -test -e $EXAMPLES_OUT");
  if (sh.getRet() == 0) {
   sh.exec("hadoop fs $HADOOP_OPTIONS -rmr -skipTrash $EXAMPLES_OUT");
   assertTrue("Deletion of previous examples output from HDFS failed", sh.getRet() == 0);
  }
  
  // copy test files to HDFS
  sh.exec("hadoop fs $HADOOP_OPTIONS -put $EXAMPLES $EXAMPLES",
    "hadoop fs $HADOOP_OPTIONS -mkdir $EXAMPLES_OUT");
   assertTrue("Could not create output directory", sh.getRet() == 0);
 }
  
 
 @Parameters
 public static Map<String, Object[]> generateTests() {
  Map<String, Object[]> res = [:];
  List<BigTopIntegrationTest> testList = loadBigTopIntegrationTestCases (TEST_CASE_FILE_NAME);
  int count=1;
  
  for (BigTopIntegrationTest test : testList) {
   def nowCal = Calendar.instance
   String casename = "$TEST_CASE_NAME_PREFIX-$nowCal.time-$count"
   Object[] args = [ casename, new Yaml().dump(test) ]
   res.put( casename, args)
   count++;
  }
  return res;
 }
  
 public TestRunHadoopExamples (String name, String testDetail ) {
  testName = name;
  testCaseString = testDetail;
  displayMessage (["Test case name - $testName, args - $testCaseString"], false)
 }
 
 private void displayMessage (def message, boolean error) {
  if (message!=null) {
   if (error) 
    message.each() { LOG.error "${it}" };
   else 
    message.each() { LOG.info "${it}" };
  }
 }

 public boolean runExample(BigTopIntegrationTest test) {
  boolean success = true
  
  for ( BigTopTestCommand command: test.getCommandList() ) {
   displayMessage (["Shell command ["  + command.getCommand() + "]"], false);
   sh.exec(command.getCommand());
   String stdout = sh.getOut();
   String shReturnCode = sh.getRet()
   String shStdErr = sh.getErr();
   
   if ( command.getComparatorClass() !=null && command.getCommandComparator()!=null && command.getCommandComparator().trim().length()>0) {
    ["ComparatorClass - " + command.getComparatorClass(), "CommandComparator - " + command.getCommandComparator(), "Shell CommandComparator ["  + command.getCommandComparator() + "]"].each() { LOG.info "${it}" };
    sh.exec(command.getCommandComparator());
    String expectedOutput = sh.getOut();
    displayMessage (["CommandComparator return code is $shReturnCode, Output is $expectedOutput"], false);

    String comparatorClassName = command.getComparatorClass();
    ComparatorBase compare = BigTopIntegrationTestFacade.getComparatorClass(comparatorClassName);
    def resultDisplay = []
    if (compare==null) {
     resultDisplay.add("Error! No such ComparatorClass - $comparatorClassName");
     success = false;
    } else {
     if (stdout.length()>=2 && expectedOutput.length()>=2 ) {
      boolean ret = compare.compare( stripOutLeadingBracket (stdout) , stripOutLeadingBracket(expectedOutput) );
      resultDisplay = (ret) ? ["SUCCESS! actual output - $stdout, expected - $expectedOutput, compare class - $comparatorClassName" ] : ["FAIL! actual output - $stdout,  expected - $expectedOutput, compare class - $comparatorClassName"] 
      if (!ret) success = false
     } else {
      resultDisplay.add("Error! No output to compare. ");
      success = false;
     }
    }
    displayMessage (resultDisplay, success);

   } else {
    def resultDisplay = (sh.getRet()==0) ? ["Command return code - $shReturnCode, Output - $stdout" ] : ["Command return code - $shReturnCode,  Output - $stdout, Error output is $shStdErr" ]
    displayMessage (resultDisplay, false)
   }
  }

  return success
 }
  
 @Test
 public void testHadoopMapReduceExample() {
  LOG.info( "testHadoopMapReduceExample() - " + testName);
  
  BigTopIntegrationTest test = new Yaml().load(testCaseString)

  LOG.info("Test case name [" + test.getTestName() + "]");
  LOG.info("Test case description [" + test.getTestDesc() + "]");
  LOG.info("Test case details - " + test.toString());

  assertTrue("Test succeed : ", runExample(test));
 }
 
 public static void main(String[] args) {
  
 }
 
}

Enjoy the journey!

No comments:

Post a Comment