Previous detailUpNext detail
Step 2.1Step 2 overviewStep 2.3

Step 2.2: Compute value-frequency statistics (Optional)

This step is optional, but useful. It describes how to compute some statistics for the value-frequency files that were just created. The statistics are relatively primitive, but they are the basis for JUnit test cases that can some errors that would be hard to detect otherwise. Statistics for value-frequency files can be generated with a script named create_file_statistics.sh. This script is located in the scripts directory of the adatagenerator-project project. Usage of the script is explained in the body of the script file itself.

For example, in the case of the LastNamve.csv file, the following file is generated:

package net.sf.adatagenerator.ex.cdc1.bean.resources;

import net.sf.adatagenerator.util.FrequencyBasedListStatistics;

public class LastName extends FrequencyBasedListStatistics {

  public static final String MIN_VALUE = "ABBOTT" ;
  public static final String MAX_VALUE = "YUNG" ;
  public static final int COUNT_LEAST_FREQUENT_VALUES = 122 ;
  public static final String A_LEAST_FREQUENT_VALUE = "HOLMES WILKES" ;
  public static final int COUNT_MOST_FREQUENT_VALUES = 1 ;
  public static final String A_MOST_FREQUENT_VALUE = "KIM" ;
  public static final int NUMBER_OF_VALUES = 264 ;
  public static final int LOWEST_COUNT = 1 ;
  public static final int HIGHEST_COUNT = 16 ;
  public static final int SUM_OF_COUNTS = 550 ;

  public LastName() {
    super( MIN_VALUE, MAX_VALUE, COUNT_LEAST_FREQUENT_VALUES,
      A_LEAST_FREQUENT_VALUE, COUNT_MOST_FREQUENT_VALUES, A_MOST_FREQUENT_VALUE,
      NUMBER_OF_VALUES, LOWEST_COUNT, HIGHEST_COUNT, SUM_OF_COUNTS);
  }
}

This Java file and the statistics files for the other value-frequency files should be placed in the src/main/java/net/sf/adatagenerator/ex/cdc1/bean/resources directory. In order to compile at all, the maven configuration file, pom.xml, for the example project needs to have a dependency added on the adatagenerator-project:

adg-cdc-example/pom.xml
=======================
<?xml version="1.0"?>
 ...
    <!-- new dependency -->
    <dependency>
        <groupId>net.sf.adatagenerator</groupId>
        <artifactId>adatagenerator-project</artifactId>
        <version>0.0.1-SNAPSHOT</version>
        <type>pom</type>
    </dependency>
    <!-- END new dependency -->
  </dependencies>
</project>

After this change to the pom.xml file, the generated files will will cause compiler warnings about unparameterized raw types. To eliminate these warnings, the generated Java classes should be changed by adding the String type as a parameter to the base class FrequencyBasedListStatistics. Just one line needs to be changed:

    public class LastName extends FrequencyBasedListStatistics {

changes to

   public class LastName extends FrequencyBasedListStatistics<String> {

With this change to each statistics class, the statistics classes can be used to create very concise concise JUnit tests:

package net.sf.adatagenerator.ex.cdc1.bean.resources;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import junit.framework.TestCase;
import net.sf.adatagenerator.api.CreationException;
import net.sf.adatagenerator.ex.cdc1.util.Util;
import net.sf.adatagenerator.util.FrequencyBasedList;
import net.sf.adatagenerator.util.FrequencyBasedListFactory;
import net.sf.adatagenerator.util.FrequencyBasedListStatistics;
import net.sf.adatagenerator.util.StringFrequencyFile;

public class FrequencyBasedList_Statistics_Test extends TestCase {

  @SuppressWarnings("serial")
  Map<String, Class<? extends FrequencyBasedListStatistics<String>>> tests = new HashMap<String, Class<? extends FrequencyBasedListStatistics<String>>>() {
    {
      put("DOB.csv", DOB.class);
      put("FirstName.csv", FirstName.class);
      put("LastName.csv", LastName.class);
      put("MiddleName.csv", MiddleName.class);
      put("MomFirst.csv", MomFirst.class);
      put("MomLast.csv", MomLast.class);
      put("MomMaiden.csv", MomMaiden.class);
      put("MomMiddle.csv", MomMiddle.class);
      put("Sex.csv", Sex.class);
      put("Suffix.csv", Suffix.class);
      put("VacCode.csv", VacCode.class);
      put("VacDate.csv", VacDate.class);
      put("VacMfr.csv", VacMfr.class);
      put("VacName.csv", VacName.class);
    }
  };

  public void testStatistics() {
    List<String> failedTests = new ArrayList<String>();
    List<Exception> exceptions = new ArrayList<Exception>();
    for (String resourceBaseName : tests.keySet()) {
      try {
        Class<? extends FrequencyBasedListStatistics<String>> clz = tests.get(resourceBaseName);
        testStatistics(resourceBaseName, clz);
      } catch (Exception x) {
        failedTests.add(resourceBaseName);
        exceptions.add(x);
      }
    }
    if (!exceptions.isEmpty()) {
      StringWriter sw = new StringWriter();
      PrintWriter pw = new PrintWriter(sw);
      pw.println("Failed tests: " + failedTests.toString());
      for (Exception e : exceptions) {
        pw.println(e.toString());
        e.printStackTrace(pw);
        pw.println();
      }
      fail(sw.toString());
    }
  }

  public void testStatistics(String resourceBaseName,
      Class<? extends FrequencyBasedListStatistics<String>> clz) {

    FrequencyBasedListStatistics<String> fbls = null;
    try {
      fbls = clz.newInstance();
    } catch (Exception e1) {
      fail(e1.toString());
    }
    assertTrue(fbls != null);

    FrequencyBasedList<String> fbl = null;
    try {
      ClassLoader cl = Util.class.getClassLoader();
      String fqrn = Util.getFullyQualifiedDataName(resourceBaseName);
      FrequencyBasedListFactory<String> factory = new StringFrequencyFile(cl, fqrn);
      fbl = factory.createFrequencyBasedList();
    } catch (CreationException e) {
      fail(e.toString());
    }
    assertTrue(fbl != null);

    assertTrue(fbl.getLeastFrequentValues().contains(fbls.getaLeastFrequentValue()));
    assertTrue(fbl.getMostFrequentValues().contains(fbls.getaMostFrequentValue()));
    assertTrue((fbls.getMaxValue() == null && fbl.getMaximumValue() == null)
        || fbls.getMaxValue().equals(fbl.getMaximumValue()));
    assertTrue((fbls.getMinValue() == null && fbl.getMinimumValue() == null)
        || fbls.getMinValue().equals(fbl.getMinimumValue()));
    assertTrue(fbls.getCountLeastFrequentValues() == fbl.getLeastFrequentValues().size());
    assertTrue(fbls.getCountMostFrequentValues() == fbl.getMostFrequentValues().size());
    assertTrue(fbls.getSumOfCounts() == fbl.size());
    Map<String, Integer> relativeValues = fbl.getRelativeFrequencies();
    assertTrue(fbls.getLowestCount() == relativeValues.get(fbls.getaLeastFrequentValue()));
    assertTrue(fbls.getHighestCount() == relativeValues.get(fbls.getaMostFrequentValue()));

  }

}

Previous detailUpNext detail
Step 2.1Step 2 overviewStep 2.3