Commit 47b6b6cd authored by JR's avatar JR

Translation and 3 decimal places for similarity.

parent 35c46ea2
......@@ -7,10 +7,12 @@ package br.ufpr.inf.junior.imgazetteer;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.RoundingMode;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.logging.Logger;
import java.util.Locale;
import gate.creole.ExecutionException;
......@@ -20,150 +22,145 @@ import gate.creole.ExecutionException;
*/
public class AuxiliarStringSimilarity {
protected Logger logger;
private void publi() {
logger = Logger.getLogger(this.getClass().getName());
}
/**
* Define the similarity between entries and text chunks that were
* associated by Edit Distance search.
* associated by Edit Distance search.
* The number of better similarities accepted for each chunk is defined by
* the value of acceptSimilaritiesNumber parameter.
*
* @param editDistanceResult
* @param metodoComparacao
* @param obj
* @param similaridadeMinima
* @param qtdeSimilaridadesAceitas
* @param comparationMethod
* @param comparationObject
* @param minimumSimilarity
* @param acceptSimilaritiesNumber
* @param caseSensitive
* @return ArrayList
* @throws ExecutionException
*/
public ArrayList<SimilarityResult> secondMatch(EditDistanceResult editDistanceResult, Method metodoComparacao,
Object obj, float similaridadeMinima, int qtdeSimilaridadesAceitas, boolean caseSensitive)
public ArrayList<SimilarityResult> secondMatch(EditDistanceResult editDistanceResult, Method comparationMethod,
Object comparationObject, float minimumSimilarity, int acceptSimilaritiesNumber, boolean caseSensitive)
throws ExecutionException {
List<NodeFT> listActivateLeaves = editDistanceResult.getListActivateLeaves();
InputString stringRead = editDistanceResult.getStringRead();
float similaridade;
float[] similaridadesValidas = new float[qtdeSimilaridadesAceitas];
HashMap<Float, ArrayList<SimilarityResult>> resultadosPorSimilaridade = new HashMap<>();
InputString chunkRead = editDistanceResult.getChunkRead();
float similarityValue;
float[] validSimilarities = new float[acceptSimilaritiesNumber];
HashMap<Float, ArrayList<SimilarityResult>> resultsBySimilarity = new HashMap<>();
ArrayList<SimilarityResult> similarityResultsReturn = new ArrayList<>();
NumberFormat formatter = NumberFormat.getInstance(Locale.US);
formatter.setMaximumFractionDigits(3);
formatter.setMinimumFractionDigits(3);
formatter.setRoundingMode(RoundingMode.HALF_UP);
// Define a menor similaridade aceita para o limite de similaridades
// diferentes definido.
// IF que seleciona os melhores resultados.
float menorSimilaridadeAtual;
int qtdeSimilaridadesDiferentes;
if (qtdeSimilaridadesAceitas > 1) {
menorSimilaridadeAtual = 2;
qtdeSimilaridadesDiferentes = 0;
float lowestCurrentSimilarity;
int differentSimilaritiesFound;
if (acceptSimilaritiesNumber > 1) {
lowestCurrentSimilarity = 2;
differentSimilaritiesFound = 0;
} else {
menorSimilaridadeAtual = 0;
qtdeSimilaridadesDiferentes = 1;
lowestCurrentSimilarity = 0;
differentSimilaritiesFound = 1;
}
try {
if (listActivateLeaves != null && !listActivateLeaves.isEmpty()) {
// Percorre a lista de folhas ativas.
// All activate nodes that represents the end of at least one entrie.
for (NodeFT activateLeaf : listActivateLeaves) {
// Percorre a lista de entradas de uma determinada folha.
// All entries from each node
for (Entry e : activateLeaf.getEntries()) {
// Verifica se a similaridade deve ser case-sensitive.
if (caseSensitive) {
similaridade = secondSimilarityMetric(e.getEntry(), stringRead.getStringBuffer().toString(),
metodoComparacao, obj);
similarityValue = secondSimilarityMetric(e.getEntry(), chunkRead.getStringBuffer().toString(),
comparationMethod, comparationObject);
} else {
similaridade = secondSimilarityMetric(e.getEntry().toUpperCase(),
stringRead.getStringBuffer().toString().toUpperCase(), metodoComparacao, obj);
similarityValue = secondSimilarityMetric(e.getEntry().toUpperCase(),
chunkRead.getStringBuffer().toString().toUpperCase(), comparationMethod, comparationObject);
}
// Se a similaridade for maior que a mnima definida ela
// aceita.
if (similaridade > similaridadeMinima) {
if (qtdeSimilaridadesAceitas > 1) {
// Similaridade menor que a menor similaridade
// atualmente aceita.
if (qtdeSimilaridadesDiferentes < qtdeSimilaridadesAceitas
&& similaridade < menorSimilaridadeAtual) {
// Format float value.
Float formatedFloat = new Float(formatter.format(similarityValue));
similarityValue = formatedFloat.floatValue();
//If a similarity is higher than the minimum accepted, it is considered.
if (similarityValue > minimumSimilarity) {
if (acceptSimilaritiesNumber > 1) {
// Similarity is lower than the current lowest accepted similarity and
//this chunk accept at least one more similarity.
if (differentSimilaritiesFound < acceptSimilaritiesNumber
&& similarityValue < lowestCurrentSimilarity) {
ArrayList<SimilarityResult> srTemp = new ArrayList<>();
srTemp.add(new SimilarityResult(e, similaridade, activateLeaf.getEDValue()));
similaridadesValidas[qtdeSimilaridadesDiferentes] = similaridade;
resultadosPorSimilaridade.put(similaridade, srTemp);
menorSimilaridadeAtual = similaridade;
qtdeSimilaridadesDiferentes++;
// Similaridade maior que a menor
// similaridade atualmente aceita.
} else if (similaridade > menorSimilaridadeAtual) {
// Similaridade j presente entre as
// similaridades aceitas.
if (resultadosPorSimilaridade.containsKey(similaridade)) {
srTemp.add(new SimilarityResult(e, similarityValue, activateLeaf.getEDValue()));
validSimilarities[differentSimilaritiesFound] = similarityValue;
resultsBySimilarity.put(similarityValue, srTemp);
lowestCurrentSimilarity = similarityValue;
differentSimilaritiesFound++;
// Similarity is higher than the current lowest accepted similarity.
} else if (similarityValue > lowestCurrentSimilarity) {
// Similarity value already exists in accepted similarity set.
if (resultsBySimilarity.containsKey(similarityValue)) {
ArrayList<SimilarityResult> srTemp = new ArrayList<>();
srTemp = resultadosPorSimilaridade.get(similaridade);
srTemp.add(new SimilarityResult(e, similaridade, activateLeaf.getEDValue()));
resultadosPorSimilaridade.put(similaridade, srTemp);
} else {// Similaridade não presente entre
// as similaridades aceitas.
if (qtdeSimilaridadesDiferentes == qtdeSimilaridadesAceitas) {
resultadosPorSimilaridade
.remove(similaridadesValidas[qtdeSimilaridadesAceitas - 1]);
for (int i = 0; i < qtdeSimilaridadesAceitas; i++) {
if (similaridade > similaridadesValidas[i]) {
for (int j = (qtdeSimilaridadesAceitas - 2); j >= i; j--) {
similaridadesValidas[j + 1] = similaridadesValidas[j];
srTemp = resultsBySimilarity.get(similarityValue);
srTemp.add(new SimilarityResult(e, similarityValue, activateLeaf.getEDValue()));
resultsBySimilarity.put(similarityValue, srTemp);
} else {
if (differentSimilaritiesFound == acceptSimilaritiesNumber) {
resultsBySimilarity
.remove(validSimilarities[acceptSimilaritiesNumber - 1]);
for (int i = 0; i < acceptSimilaritiesNumber; i++) {
if (similarityValue > validSimilarities[i]) {
for (int j = (acceptSimilaritiesNumber - 2); j >= i; j--) {
validSimilarities[j + 1] = validSimilarities[j];
}
similaridadesValidas[i] = similaridade;
validSimilarities[i] = similarityValue;
break;
}
}
} else {
qtdeSimilaridadesDiferentes++;
for (int i = 0; i < qtdeSimilaridadesDiferentes; i++) {
if (similaridade > similaridadesValidas[i]) {
for (int j = (qtdeSimilaridadesDiferentes - 2); j >= i; j--) {
similaridadesValidas[j + 1] = similaridadesValidas[j];
} else {//New similarity is placed in correct order by value.
differentSimilaritiesFound++;
for (int i = 0; i < differentSimilaritiesFound; i++) {
if (similarityValue > validSimilarities[i]) {
for (int j = (differentSimilaritiesFound - 2); j >= i; j--) {
validSimilarities[j + 1] = validSimilarities[j];
}
similaridadesValidas[i] = similaridade;
validSimilarities[i] = similarityValue;
break;
}
}
}
ArrayList<SimilarityResult> srTemp = new ArrayList<>();
srTemp.add(new SimilarityResult(e, similaridade, activateLeaf.getEDValue()));
resultadosPorSimilaridade.put(similaridade, srTemp);
menorSimilaridadeAtual = similaridadesValidas[qtdeSimilaridadesDiferentes - 1];
srTemp.add(new SimilarityResult(e, similarityValue, activateLeaf.getEDValue()));
resultsBySimilarity.put(similarityValue, srTemp);
lowestCurrentSimilarity = validSimilarities[differentSimilaritiesFound - 1];
}
} else if (similaridade == menorSimilaridadeAtual) {
} else if (similarityValue == lowestCurrentSimilarity) {
ArrayList<SimilarityResult> srTemp = new ArrayList<>();
srTemp = resultadosPorSimilaridade.get(similaridade);
srTemp.add(new SimilarityResult(e, similaridade, activateLeaf.getEDValue()));
resultadosPorSimilaridade.put(similaridade, srTemp);
srTemp = resultsBySimilarity.get(similarityValue);
srTemp.add(new SimilarityResult(e, similarityValue, activateLeaf.getEDValue()));
resultsBySimilarity.put(similarityValue, srTemp);
}
} else {// Se for aceita apenas a melhor associao
// para cada parte do texto.
if (similaridade < menorSimilaridadeAtual) {
} else {// Only highest similarity for each chunk.
if (similarityValue < lowestCurrentSimilarity) {
// Only to avoid execute other two ifs.
} else if (similaridade > menorSimilaridadeAtual) {
resultadosPorSimilaridade.remove(menorSimilaridadeAtual);
} else if (similarityValue > lowestCurrentSimilarity) {
resultsBySimilarity.remove(lowestCurrentSimilarity);
ArrayList<SimilarityResult> srTemp = new ArrayList<>();
srTemp.add(new SimilarityResult(e, similaridade, activateLeaf.getEDValue()));
resultadosPorSimilaridade.put(similaridade, srTemp);
similaridadesValidas[0] = similaridade;
menorSimilaridadeAtual = similaridade;
} else if (similaridade == menorSimilaridadeAtual) {
srTemp.add(new SimilarityResult(e, similarityValue, activateLeaf.getEDValue()));
resultsBySimilarity.put(similarityValue, srTemp);
validSimilarities[0] = similarityValue;
lowestCurrentSimilarity = similarityValue;
} else {
ArrayList<SimilarityResult> srTemp = new ArrayList<>();
srTemp = resultadosPorSimilaridade.get(similaridade);
srTemp.add(new SimilarityResult(e, similaridade, activateLeaf.getEDValue()));
resultadosPorSimilaridade.put(similaridade, srTemp);
srTemp = resultsBySimilarity.get(similarityValue);
srTemp.add(new SimilarityResult(e, similarityValue, activateLeaf.getEDValue()));
resultsBySimilarity.put(similarityValue, srTemp);
}
}
}
}
}
if (!resultadosPorSimilaridade.isEmpty()) {
for (int i = 0; i < qtdeSimilaridadesDiferentes; i++) {
for (SimilarityResult sr : resultadosPorSimilaridade.get(similaridadesValidas[i])) {
if (!resultsBySimilarity.isEmpty()) {
for (int i = 0; i < differentSimilaritiesFound; i++) {
for (SimilarityResult sr : resultsBySimilarity.get(validSimilarities[i])) {
similarityResultsReturn.add(sr);
}
}
......@@ -172,7 +169,6 @@ public class AuxiliarStringSimilarity {
return similarityResultsReturn;
} catch (Exception e) {
logger.severe(e.getMessage());
throw new ExecutionException("Error in similarity definition. " + this.getClass().getName());
}
}
......@@ -183,16 +179,16 @@ public class AuxiliarStringSimilarity {
*
* @param string1
* @param string2
* @param metodoComparacao
* @param comparationMethod
* @param obj
* @return float
*/
private float secondSimilarityMetric(String string1, String string2, Method metodoComparacao, Object obj)
private float secondSimilarityMetric(String string1, String string2, Method comparationMethod, Object obj)
throws ExecutionException {
float similarity = 0;
try {
// Execute method to obtain similarity.
similarity = (float) metodoComparacao.invoke(obj, string1, string2);
similarity = (float) comparationMethod.invoke(obj, string1, string2);
} catch (SecurityException | InvocationTargetException | IllegalAccessException ex) {
throw new ExecutionException("Error in similarity definition. " + this.getClass().getName());
......
......@@ -13,19 +13,19 @@ import java.util.List;
*/
public class EditDistanceResult {
private List<NodeFT> listActivateLeaves;
private InputString stringRead;
private InputString chunkRead;
public EditDistanceResult(List<NodeFT> listActivateLeaves, InputString stringRead) {
public EditDistanceResult(List<NodeFT> listActivateLeaves, InputString chunkRead) {
this.listActivateLeaves = listActivateLeaves;
this.stringRead = stringRead;
this.chunkRead = chunkRead;
}
public List<NodeFT> getListActivateLeaves() {
return listActivateLeaves;
}
public InputString getStringRead() {
return stringRead;
public InputString getChunkRead() {
return chunkRead;
}
}
......@@ -47,7 +47,7 @@ import gate.util.OffsetComparator;
* @author Junior
*
*/
@CreoleResource(name = "IMGazetteer", comment = "Cria anotaes em gazetteers baseado em associao aproximada.", helpURL = "http://www.prppg.ufpr.br/ppginformatica/")
@CreoleResource(name = "IMGazetteer", comment = "Approximate string matching gazetteer.", helpURL = "http://www.prppg.ufpr.br/ppginformatica/")
@SuppressWarnings("javadoc")
public class InexactGazetteer extends AbstractLanguageAnalyser implements ActionsPublisher {
......@@ -146,7 +146,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome do tipo da anotao que ser criada.", defaultValue = "Lookup")
@CreoleParameter(comment = "Annotation name that will be used", defaultValue = "IM_Lookup")
public void setAnnotationType(String at) {
annotationType = at;
}
......@@ -158,7 +158,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private String annotationType;
@RunTime
@CreoleParameter(comment = "ED mximo aceito na busca.")
@CreoleParameter(comment = "Maximum Edit Distance accepted.")
public void setMaxED(Integer ed) {
maxED = ed;
}
......@@ -170,7 +170,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private Integer maxED;
@RunTime
@CreoleParameter(comment = "Nmero mximo de melhores similaridades que sero aceitas para cada pedao do texto.")
@CreoleParameter(comment = "Number of higher similarities that will be returned for each chunk.")
public void setNumberBetterSimilarities(Integer nbsString) {
numberBetterSimilarities = nbsString;
}
......@@ -182,7 +182,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private Integer numberBetterSimilarities;
@RunTime
@CreoleParameter(comment = "Similaridade mnima que deve ser obtida entre o pedao do texto e o gazetteer.")
@CreoleParameter(comment = "Minimum accepteed value for similarity between entrie and chunk.")
public void setMinAcceptedSimilarity(Float nbsString) {
minAcceptedSimilarity = nbsString;
}
......@@ -195,7 +195,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome da feature que vai idicar o valor para EditDistance.", defaultValue = "ED")
@CreoleParameter(comment = "Name of feature used for Edit Distance value within annotations.", defaultValue = "ED")
public void setEditDistanceFeatureName(String edfn) {
editDistanceFeatureName = edfn;
}
......@@ -208,7 +208,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caminho completo para a Classe que contm a implementao do algoritmo de similaridade.", defaultValue = "")
@CreoleParameter(comment = "Path of class that contains the method to calculate string similarity.", defaultValue = "")
public void setSimilarityClass(String sc) {
similarityClass = sc;
}
......@@ -221,7 +221,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome do mtodo que contm a implementao do algoritmo de similaridade.", defaultValue = "")
@CreoleParameter(comment = "Name of method that calculates string similarity.", defaultValue = "")
public void setSimilarityMethod(String sm) {
similarityMethod = sm;
}
......@@ -234,7 +234,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caminho completo para a Classe que contm a implementao do algoritmo de transformao.")
@CreoleParameter(comment = "Path of class that contains the method that performs string transformation.")
public void setTransformationClass(String cc) {
transformationClass = cc;
}
......@@ -247,7 +247,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome do mtodo que contm a implementao do algoritmo de transformao.")
@CreoleParameter(comment = "Name of method that performs string transformation.")
public void setTransformationMethod(String cm) {
transformationMethod = cm;
}
......@@ -260,7 +260,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome da feature que vai idicar o valor da similaridade.", defaultValue = "Similarity")
@CreoleParameter(comment = "Name of feature used for similarity value.", defaultValue = "Similarity")
public void setSimilarityFeatureName(String sfn) {
similarityFeatureName = sfn;
}
......@@ -273,7 +273,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caractere utilizado para separar as features.", defaultValue = ";")
@CreoleParameter(comment = "Character used to indicate the of a feature value and the start of another feature name.", defaultValue = ";")
public void setFeaturesSeparator(String fs) {
featuresSeparator = fs;
}
......@@ -286,7 +286,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caractere utilizado para separar o nome do valor em cada feature.", defaultValue = ":")
@CreoleParameter(comment = "Character used between name and value of each feature.", defaultValue = ":")
public void setFeatureNameValueSeparator(String fnvs) {
featureNameValueSeparator = fnvs;
}
......@@ -299,7 +299,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caractere utilizado para marcar o final do texto que representa cada gazetteer.", defaultValue = "#")
@CreoleParameter(comment = "Character that indicates the end of each entrie.", defaultValue = "#")
public void setGazetterDelimiter(String ed) {
entrieDelimiter = ed;
}
......@@ -311,7 +311,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private String entrieDelimiter;
@RunTime
@CreoleParameter(comment = "Indica se devem ser aplicado algum algoritmo de transformao.", defaultValue = "false")
@CreoleParameter(comment = "Indicate if a string transformation is used.", defaultValue = "false")
public void setUseTransformation(Boolean yn) {
useTransformation = yn;
}
......@@ -339,8 +339,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@Override
public Resource init() throws ResourceInstantiationException {
// precompile the pattern used to replace all unicode whitespace in
// gazetteer
// entries with a single space.
// gazetteer entries with a single space.
ws_pattern = Pattern.compile(ws_patternstring);
createTrie();
......@@ -586,7 +585,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
for (SimilarityResult sr : similarityResults) {
FeatureMap fm = Factory.newFeatureMap();
fm.put(this.similarityFeatureName, sr.similarity);
fm.put("String", edr.getStringRead().getStringBuffer().toString());
fm.put("String", edr.getChunkRead().getStringBuffer().toString());
fm.put("ED", sr.editDistance);
fm.put("Option", sr.entry.getEntry());
fm.putAll(features4ALL);
......@@ -594,7 +593,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
for (GazetteerFeature gf : gazetteerFeaturesList) {
fm.put(gf.getName(), gf.getValue());
}
lookupid = addAnAnnotation(edr.getStringRead().getStartOffset(), edr.getStringRead().getEndOffset(),
lookupid = addAnAnnotation(edr.getChunkRead().getStartOffset(), edr.getChunkRead().getEndOffset(),
this.annotationType, fm);
}
}
......
......@@ -10,9 +10,9 @@ package br.ufpr.inf.junior.imgazetteer;
* @author Junior
*/
public class InputString{
private StringBuffer stringBuffer;//String lida do texto
private Integer startOffset;//Posição inicial no texto
private Integer endOffset;//Posição final no texto
private StringBuffer stringBuffer;//Text chunk
private Integer startOffset;
private Integer endOffset;
private Character nextCharacter;
public InputString() {
......@@ -62,11 +62,4 @@ public class InputString{
this.nextCharacter = nextCharacter;
}
// public void addCharacter2StringBuffer(InputString inputString, Character c){
// inputString.getStringBuffer().append(c);
// }
//
// public void addString2StringBuffer(InputString inputString, String s){
// inputString.getStringBuffer().append(s);
// }
}
/*
* Contém a estrutura utilizada para montar cada nó da Trie.
*
*/
package br.ufpr.inf.junior.imgazetteer;
......@@ -12,14 +12,14 @@ import java.util.List;
* @author Junior
*/
public class NodeFT implements Serializable{
private Integer id;//Identificardor unico de cada nó.
private Character symbol;//Armazena o caracter que representa aquele nó.
private Integer id;//Unique ID
private Character symbol;//Character that represents the node.
private List<NodeFT> childs;//Child nodes.
private Boolean endEntrie;//Define se o nó representa o fim da entrada (true = nó folha).
private boolean validNode;//quando TRUE significa que o nó está a uma distância menor que a máxima aceita do último caracter lido.
private Boolean endEntrie;//Indicate if this node is the end of an entrie.
private boolean validNode;//Used in EditDistance search to indicate that this node ED is lower than Maximum ED.
private int EDValue;
private int level;//Indica o nível de profundidade do nó na árvore. O nó RAIZ tem level 0 (zero).
private List<Entry> entries;
private int level;//Indicate the level of this node in trie. Root node is level zero.
private List<Entry> entries;// Nodes that represents the end of an entrie, store that entrie.
public NodeFT() {
......
......@@ -44,22 +44,22 @@ public class Search {
String convertedToken = new String();
List<EditDistanceResult> editDistanceResultList = new ArrayList<>();
// Cria o array de ns ativos com tamanho igual ao nvel
// mais profundo da rvore. Em cada posio um Array inserido,
// assim temos um array para guardar os ns ativos de cada nvel.
List<ArrayList<NodeFT>> activNodesArray = new ArrayList<>();
// Create an array, which size is the maximum level of trie.
// Each position of this array store a list, which will store
// active nodes of same level.
List<ArrayList<NodeFT>> activeNodesArray = new ArrayList<>();
for (int i = 0; i <= deepestLevel; i++) {
activNodesArray.add(i, new ArrayList<>());
activeNodesArray.add(i, new ArrayList<>());
}
// Adiciona o n raiz na lista de ns ativos.
(activNodesArray.get(this.trie.getLevel())).add(this.trie);
// Add root node, based on its level, to the list of active nodes.
(activeNodesArray.get(this.trie.getLevel())).add(this.trie);
this.trie.setValidNode(true);
List<NodeFT> activLeavesList = new ArrayList<>();
InputString stringRead = new InputString();
stringRead.setStartOffset(0);
stringRead.setEndOffset(0);
InputString chunkRead = new InputString();
chunkRead.setStartOffset(0);
chunkRead.setEndOffset(0);
boolean nextStartDefined = false;
int tempIndex = 0;
......@@ -67,12 +67,11 @@ public class Search {
List<Character> convertedTokenArray = new ArrayList<>();
while (i < tokenList.size()) {
Annotation tokenAnnotation = tokenList.get(i);
// Toda anotao do tipo space_token considerada como um
// espao simples.
// Space token annotations are considered single blank space.
if (tokenAnnotation.getType().equals(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE)) {
convertedToken += " ";
stringRead.setEndOffset(tokenAnnotation.getEndNode().getOffset().intValue());
stringRead.setStringBuffer(stringRead.getStringBuffer().append(" "));
chunkRead.setEndOffset(tokenAnnotation.getEndNode().getOffset().intValue());
chunkRead.setStringBuffer(chunkRead.getStringBuffer().append(" "));
i++;
} else {
if (tokenAnnotation.getFeatures().get(ANNIEConstants.TOKEN_KIND_FEATURE_NAME).equals("word")
......@@ -85,82 +84,73 @@ public class Search {
.get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
}
stringRead.setEndOffset(tokenAnnotation.getEndNode().getOffset().intValue());
stringRead.setStringBuffer(stringRead.getStringBuffer()
chunkRead.setEndOffset(tokenAnnotation.getEndNode().getOffset().intValue());
chunkRead.setStringBuffer(chunkRead.getStringBuffer()
.append((String) tokenAnnotation.getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME)));