File | Line |
---|
net/sf/adatagenerator/febrl/mutators/OCRTransformer.java | 63 |
net/sf/adatagenerator/febrl/mutators/PhoneticTransformer.java | 62 |
for (String[] line : ocrDict) {
position = line[0];
orgPattern = line[1];
newPattern = line[2];
if (line.length > 3) {
preConditions = line[3];
}
if (line.length > 4) {
postConditions = line[4];
}
if (line.length > 5) {
necessoryConditions = line[5];
}
if (line.length > 6) {
startConditions = line[6];
}
if (matchPattern((String) value, position.trim(),
orgPattern.trim(), preConditions.trim(),
postConditions.trim(), necessoryConditions.trim(),
startConditions.trim())) {
modifiedValue = ((String) value).replaceAll(
orgPattern.trim(), newPattern.trim());
modifiedValue = modifiedValue.replaceAll("@", " ");
logger.info("Phonetically Transforming " + (String) value
+ " to " + modifiedValue);
}
}
return modifiedValue;
} else {
return (String) value;
}
}
public boolean matchPattern(String subject, String position,
String orgPattern, String preConditions, String postConditions,
String necessoryConditions, String startConditions) {
boolean matched = false;
String pattern = null;
if (Pattern.matches(".*" + orgPattern + ".*", subject)
&& verifyNecessoryConditions(subject,
necessoryConditions.trim())
&& verifyStartCondition(subject, startConditions.trim())) {
pattern = formPattern(subject, position, orgPattern,
preConditions.trim(), postConditions.trim());
matched = Pattern.matches(pattern, subject);
}
return matched;
}
public boolean verifyNecessoryConditions(String subject,
String necessoryConditions) {
boolean neccessoryConditionsSatisfied = true;
if (!necessoryConditions.trim().equalsIgnoreCase("None")) {
String[] neccessoryConditions = necessoryConditions.split("|");
for (String condition : neccessoryConditions) {
if (!Pattern.matches(condition, subject)) {
neccessoryConditionsSatisfied = false;
break;
}
}
}
return neccessoryConditionsSatisfied;
}
public boolean verifyStartCondition(String subject, String startConditions) {
boolean startConditionSatisfied = true;
if (!startConditions.trim().equalsIgnoreCase("None")) {
boolean isRequired = (startConditions.charAt(0) == 'y') ? true
: false;
String conditions = startConditions.substring(2);
for (String condition : conditions.split(";")) {
boolean conditionSatisfied = Pattern
.matches(condition, subject);
if ((isRequired && !conditionSatisfied)
|| (!isRequired && conditionSatisfied)) {
startConditionSatisfied = false;
break;
}
}
}
return startConditionSatisfied;
}
public String formPattern(String subject, String position,
String orgPattern, String preConditions, String postConditions) {
if (postConditions.trim().equalsIgnoreCase("V")) {
postConditions = "y;1;a;e;i;o;u";
} else if (postConditions.trim().equalsIgnoreCase("C")) {
postConditions = "n;1;a;e;i;o;u";
}
LinkedList<String> patternList = new LinkedList<String>();
patternList.add(orgPattern);
if (!postConditions.trim().equalsIgnoreCase("None")) {
String[] postCondition = postConditions.trim().split("\\|");
for (String subPostCondition : postCondition) {
if (subPostCondition.length() > 0) {
String[] tokens = subPostCondition.split(";");
int pos = Integer.parseInt(tokens[1]);
for (int i = 1; i <= pos; i++) {
if (i == pos) {
String matchCharacters = "[";
for (String token : Arrays.asList(tokens).subList(
2, tokens.length)) {
matchCharacters += token;
}
matchCharacters += "]";
if (tokens[0].equalsIgnoreCase("n")) {
matchCharacters.replace("[", "[^");
}
patternList.addLast(matchCharacters);
} else {
patternList.addLast(".");
}
}
}
}
if (preConditions.trim().equalsIgnoreCase("V")) {
preConditions = "y;-1;a;e;i;o;u";
} else if (preConditions.trim().equalsIgnoreCase("C")) {
preConditions = "n;-1;a;e;i;o;u";
}
if (!preConditions.trim().equalsIgnoreCase("None")) {
String[] preRequisites = preConditions.split("\\|");
for (String preRequisite : preRequisites) {
String[] tokens = preRequisite.split(";");
int pos = Integer.parseInt(tokens[1]);
for (int i = -1; i >= pos; i--) {
if (i == pos) {
String matchCharacters = "[";
for (String token : Arrays.asList(tokens).subList(
2, tokens.length)) {
matchCharacters += token;
}
matchCharacters += "]";
if (tokens[0].equalsIgnoreCase("n")) {
matchCharacters.replace("[", "[^");
}
patternList.addFirst(matchCharacters);
} else {
patternList.addFirst(".");
}
}
}
}
}
if (position.equals("start")) {
patternList.addFirst("^");
} else if (position.equalsIgnoreCase("end")) {
patternList.addLast("$");
} else if (position.equalsIgnoreCase("all")) {
patternList.addFirst(".*");
patternList.addLast(".*");
}
ListIterator<String> itr = patternList.listIterator();
String pattern = "";
while (itr.hasNext())
{
pattern += itr.next();
}
logger.info("using pattern:" + pattern); |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/OCRTransformer.java | 81 |
net/sf/adatagenerator/febrl/modifiers/PhoneticTransformer.java | 83 |
+ "' to '" + retVal + "'");
}
}
}
return retVal;
}
public boolean matchPattern(String subject, String position,
String orgPattern, String preConditions, String postConditions,
String necessoryConditions, String startConditions) {
boolean matched = false;
String pattern = null;
if (Pattern.matches(".*" + orgPattern + ".*", subject)
&& verifyNecessoryConditions(subject,
necessoryConditions.trim())
&& verifyStartCondition(subject, startConditions.trim())) {
pattern = formPattern(subject, position, orgPattern,
preConditions.trim(), postConditions.trim());
matched = Pattern.matches(pattern, subject);
}
return matched;
}
public boolean verifyNecessoryConditions(String subject,
String necessoryConditions) {
boolean neccessoryConditionsSatisfied = true;
if (!necessoryConditions.trim().equalsIgnoreCase("None")) {
String[] neccessoryConditions = necessoryConditions.split("|");
for (String condition : neccessoryConditions) {
if (!Pattern.matches(condition, subject)) {
neccessoryConditionsSatisfied = false;
break;
}
}
}
return neccessoryConditionsSatisfied;
}
public boolean verifyStartCondition(String subject, String startConditions) {
boolean startConditionSatisfied = true;
if (!startConditions.trim().equalsIgnoreCase("None")) {
boolean isRequired = (startConditions.charAt(0) == 'y') ? true
: false;
String conditions = startConditions.substring(2);
for (String condition : conditions.split(";")) {
boolean conditionSatisfied = Pattern
.matches(condition, subject);
if ((isRequired && !conditionSatisfied)
|| (!isRequired && conditionSatisfied)) {
startConditionSatisfied = false;
break;
}
}
}
return startConditionSatisfied;
}
public String formPattern(String subject, String position,
String orgPattern, String preConditions, String postConditions) {
if (postConditions.trim().equalsIgnoreCase("V")) {
postConditions = "y;1;a;e;i;o;u";
} else if (postConditions.trim().equalsIgnoreCase("C")) {
postConditions = "n;1;a;e;i;o;u";
}
LinkedList<String> patternList = new LinkedList<String>();
patternList.add(orgPattern);
if (!postConditions.trim().equalsIgnoreCase("None")) {
String[] postCondition = postConditions.trim().split("\\|");
for (String subPostCondition : postCondition) {
if (subPostCondition.length() > 0) {
String[] tokens = subPostCondition.split(";");
int pos = Integer.parseInt(tokens[1]);
for (int i = 1; i <= pos; i++) {
if (i == pos) {
String matchCharacters = "[";
for (String token : Arrays.asList(tokens).subList(
2, tokens.length)) {
matchCharacters += token;
}
matchCharacters += "]";
if (tokens[0].equalsIgnoreCase("n")) {
matchCharacters.replace("[", "[^");
}
patternList.addLast(matchCharacters);
} else {
patternList.addLast(".");
}
}
}
}
if (preConditions.trim().equalsIgnoreCase("V")) {
preConditions = "y;-1;a;e;i;o;u";
} else if (preConditions.trim().equalsIgnoreCase("C")) {
preConditions = "n;-1;a;e;i;o;u";
}
if (!preConditions.trim().equalsIgnoreCase("None")) {
String[] preRequisites = preConditions.split("\\|");
for (String preRequisite : preRequisites) {
String[] tokens = preRequisite.split(";");
int pos = Integer.parseInt(tokens[1]);
for (int i = -1; i >= pos; i--) {
if (i == pos) {
String matchCharacters = "[";
for (String token : Arrays.asList(tokens).subList(
2, tokens.length)) {
matchCharacters += token;
}
matchCharacters += "]";
if (tokens[0].equalsIgnoreCase("n")) {
matchCharacters.replace("[", "[^");
}
patternList.addFirst(matchCharacters);
} else {
patternList.addFirst(".");
}
}
}
}
}
if (position.equals("start")) {
patternList.addFirst("^");
} else if (position.equalsIgnoreCase("end")) {
patternList.addLast("$");
} else if (position.equalsIgnoreCase("all")) {
patternList.addFirst(".*");
patternList.addLast(".*");
}
ListIterator<String> itr = patternList.listIterator();
String pattern = "";
while (itr.hasNext())
{
pattern += itr.next();
}
logger.finest("using pattern:" + pattern); |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/OCRTransformer.java | 86 |
net/sf/adatagenerator/febrl/modifiers/PhoneticTransformer.java | 88 |
net/sf/adatagenerator/febrl/mutators/OCRTransformer.java | 96 |
net/sf/adatagenerator/febrl/mutators/PhoneticTransformer.java | 96 |
}
public boolean matchPattern(String subject, String position,
String orgPattern, String preConditions, String postConditions,
String necessoryConditions, String startConditions) {
boolean matched = false;
String pattern = null;
if (Pattern.matches(".*" + orgPattern + ".*", subject)
&& verifyNecessoryConditions(subject,
necessoryConditions.trim())
&& verifyStartCondition(subject, startConditions.trim())) {
pattern = formPattern(subject, position, orgPattern,
preConditions.trim(), postConditions.trim());
matched = Pattern.matches(pattern, subject);
}
return matched;
}
public boolean verifyNecessoryConditions(String subject,
String necessoryConditions) {
boolean neccessoryConditionsSatisfied = true;
if (!necessoryConditions.trim().equalsIgnoreCase("None")) {
String[] neccessoryConditions = necessoryConditions.split("|");
for (String condition : neccessoryConditions) {
if (!Pattern.matches(condition, subject)) {
neccessoryConditionsSatisfied = false;
break;
}
}
}
return neccessoryConditionsSatisfied;
}
public boolean verifyStartCondition(String subject, String startConditions) {
boolean startConditionSatisfied = true;
if (!startConditions.trim().equalsIgnoreCase("None")) {
boolean isRequired = (startConditions.charAt(0) == 'y') ? true
: false;
String conditions = startConditions.substring(2);
for (String condition : conditions.split(";")) {
boolean conditionSatisfied = Pattern
.matches(condition, subject);
if ((isRequired && !conditionSatisfied)
|| (!isRequired && conditionSatisfied)) {
startConditionSatisfied = false;
break;
}
}
}
return startConditionSatisfied;
}
public String formPattern(String subject, String position,
String orgPattern, String preConditions, String postConditions) {
if (postConditions.trim().equalsIgnoreCase("V")) {
postConditions = "y;1;a;e;i;o;u";
} else if (postConditions.trim().equalsIgnoreCase("C")) {
postConditions = "n;1;a;e;i;o;u";
}
LinkedList<String> patternList = new LinkedList<String>();
patternList.add(orgPattern);
if (!postConditions.trim().equalsIgnoreCase("None")) {
String[] postCondition = postConditions.trim().split("\\|");
for (String subPostCondition : postCondition) {
if (subPostCondition.length() > 0) {
String[] tokens = subPostCondition.split(";");
int pos = Integer.parseInt(tokens[1]);
for (int i = 1; i <= pos; i++) {
if (i == pos) {
String matchCharacters = "[";
for (String token : Arrays.asList(tokens).subList(
2, tokens.length)) {
matchCharacters += token;
}
matchCharacters += "]";
if (tokens[0].equalsIgnoreCase("n")) {
matchCharacters.replace("[", "[^");
}
patternList.addLast(matchCharacters);
} else {
patternList.addLast(".");
}
}
}
}
if (preConditions.trim().equalsIgnoreCase("V")) {
preConditions = "y;-1;a;e;i;o;u";
} else if (preConditions.trim().equalsIgnoreCase("C")) {
preConditions = "n;-1;a;e;i;o;u";
}
if (!preConditions.trim().equalsIgnoreCase("None")) {
String[] preRequisites = preConditions.split("\\|");
for (String preRequisite : preRequisites) {
String[] tokens = preRequisite.split(";");
int pos = Integer.parseInt(tokens[1]);
for (int i = -1; i >= pos; i--) {
if (i == pos) {
String matchCharacters = "[";
for (String token : Arrays.asList(tokens).subList(
2, tokens.length)) {
matchCharacters += token;
}
matchCharacters += "]";
if (tokens[0].equalsIgnoreCase("n")) {
matchCharacters.replace("[", "[^");
}
patternList.addFirst(matchCharacters);
} else {
patternList.addFirst(".");
}
}
}
}
}
if (position.equals("start")) {
patternList.addFirst("^");
} else if (position.equalsIgnoreCase("end")) {
patternList.addLast("$");
} else if (position.equalsIgnoreCase("all")) {
patternList.addFirst(".*");
patternList.addLast(".*");
}
ListIterator<String> itr = patternList.listIterator();
String pattern = "";
while (itr.hasNext())
{
pattern += itr.next();
}
logger.finest("using pattern:" + pattern); |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/GivenNameMisspeller.java | 24 |
net/sf/adatagenerator/febrl/mutators/GivenNameMisspeller.java | 24 |
}
protected void loadData() throws IOException {
misSpelDict = new HashMap<String, Set<String>>();
BufferedReader br = null;
try {
br = FebrlGeneratorUtilities.getFebrlDataReader(RESOURCE_BASE_NAME);
String strLine;
// Read File Line By Line
while ((strLine = br.readLine()) != null) {
// Print the content on the console
if (strLine.length() > 0 && strLine.charAt(0) != '#') {
String[] line = strLine.split(":");
if (line.length > 1) {
String key = line[0].trim().toLowerCase();
String value = line[1].trim();
Set<String> values = new HashSet<String>(
Arrays.asList(value.split(",")));
misSpelDict.put(key, values);
}
}
}
} finally {
ResourceUtil.closeReader(br);
}
}
public String modifyValue(String value) { |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/OCRTransformer.java | 56 |
net/sf/adatagenerator/febrl/mutators/OCRTransformer.java | 63 |
for (String[] line : ocrDict) {
position = line[0];
orgPattern = line[1];
newPattern = line[2];
if (line.length > 3) {
preConditions = line[3];
}
if (line.length > 4) {
postConditions = line[4];
}
if (line.length > 5) {
necessoryConditions = line[5];
}
if (line.length > 6) {
startConditions = line[6];
}
if (matchPattern((String) value, position.trim(),
orgPattern.trim(), preConditions.trim(),
postConditions.trim(), necessoryConditions.trim(),
startConditions.trim())) { |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/OCRTransformer.java | 56 |
net/sf/adatagenerator/febrl/mutators/PhoneticTransformer.java | 62 |
for (String[] line : ocrDict) {
position = line[0];
orgPattern = line[1];
newPattern = line[2];
if (line.length > 3) {
preConditions = line[3];
}
if (line.length > 4) {
postConditions = line[4];
}
if (line.length > 5) {
necessoryConditions = line[5];
}
if (line.length > 6) {
startConditions = line[6];
}
if (matchPattern((String) value, position.trim(),
orgPattern.trim(), preConditions.trim(),
postConditions.trim(), necessoryConditions.trim(),
startConditions.trim())) { |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/PhoneticTransformer.java | 24 |
net/sf/adatagenerator/febrl/mutators/PhoneticTransformer.java | 29 |
}
protected void loadData() throws IOException {
phoneticDict = new ArrayList<String[]>();
BufferedReader br = null;
try {
br = FebrlModifierUtilities.getFebrlRuleReader(RESOURCE_BASE_NAME);
String strLine;
while ((strLine = br.readLine()) != null) {
if (strLine.length() > 0 && strLine.charAt(0) != '#') {
String[] line = strLine.split(",");
phoneticDict.add(line);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
ResourceUtil.closeReader(br);
}
}
public String modifyValue(String value) { |
File | Line |
---|
net/sf/adatagenerator/febrl/modifiers/OCRTransformer.java | 24 |
net/sf/adatagenerator/febrl/mutators/OCRTransformer.java | 29 |
}
protected void loadData() throws IOException {
ocrDict = new ArrayList<String[]>();
BufferedReader br = null;
try {
br = FebrlModifierUtilities.getFebrlRuleReader(RESOURCE_BASE_NAME);
String strLine;
while ((strLine = br.readLine()) != null) {
if (strLine.length() > 0 && strLine.charAt(0) != '#') {
String[] line = strLine.split(",");
ocrDict.add(line);
}
}
} finally {
ResourceUtil.closeReader(br);
}
}
public String modifyValue(String value) { |