Commit 47b6b6cd authored by JR's avatar JR

Translation and 3 decimal places for similarity.

parent 35c46ea2
......@@ -13,19 +13,19 @@ import java.util.List;
*/
public class EditDistanceResult {
private List<NodeFT> listActivateLeaves;
private InputString stringRead;
private InputString chunkRead;
public EditDistanceResult(List<NodeFT> listActivateLeaves, InputString stringRead) {
public EditDistanceResult(List<NodeFT> listActivateLeaves, InputString chunkRead) {
this.listActivateLeaves = listActivateLeaves;
this.stringRead = stringRead;
this.chunkRead = chunkRead;
}
public List<NodeFT> getListActivateLeaves() {
return listActivateLeaves;
}
public InputString getStringRead() {
return stringRead;
public InputString getChunkRead() {
return chunkRead;
}
}
......@@ -47,7 +47,7 @@ import gate.util.OffsetComparator;
* @author Junior
*
*/
@CreoleResource(name = "IMGazetteer", comment = "Cria anotaes em gazetteers baseado em associao aproximada.", helpURL = "http://www.prppg.ufpr.br/ppginformatica/")
@CreoleResource(name = "IMGazetteer", comment = "Approximate string matching gazetteer.", helpURL = "http://www.prppg.ufpr.br/ppginformatica/")
@SuppressWarnings("javadoc")
public class InexactGazetteer extends AbstractLanguageAnalyser implements ActionsPublisher {
......@@ -146,7 +146,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome do tipo da anotao que ser criada.", defaultValue = "Lookup")
@CreoleParameter(comment = "Annotation name that will be used", defaultValue = "IM_Lookup")
public void setAnnotationType(String at) {
annotationType = at;
}
......@@ -158,7 +158,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private String annotationType;
@RunTime
@CreoleParameter(comment = "ED mximo aceito na busca.")
@CreoleParameter(comment = "Maximum Edit Distance accepted.")
public void setMaxED(Integer ed) {
maxED = ed;
}
......@@ -170,7 +170,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private Integer maxED;
@RunTime
@CreoleParameter(comment = "Nmero mximo de melhores similaridades que sero aceitas para cada pedao do texto.")
@CreoleParameter(comment = "Number of higher similarities that will be returned for each chunk.")
public void setNumberBetterSimilarities(Integer nbsString) {
numberBetterSimilarities = nbsString;
}
......@@ -182,7 +182,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private Integer numberBetterSimilarities;
@RunTime
@CreoleParameter(comment = "Similaridade mnima que deve ser obtida entre o pedao do texto e o gazetteer.")
@CreoleParameter(comment = "Minimum accepteed value for similarity between entrie and chunk.")
public void setMinAcceptedSimilarity(Float nbsString) {
minAcceptedSimilarity = nbsString;
}
......@@ -195,7 +195,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome da feature que vai idicar o valor para EditDistance.", defaultValue = "ED")
@CreoleParameter(comment = "Name of feature used for Edit Distance value within annotations.", defaultValue = "ED")
public void setEditDistanceFeatureName(String edfn) {
editDistanceFeatureName = edfn;
}
......@@ -208,7 +208,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caminho completo para a Classe que contm a implementao do algoritmo de similaridade.", defaultValue = "")
@CreoleParameter(comment = "Path of class that contains the method to calculate string similarity.", defaultValue = "")
public void setSimilarityClass(String sc) {
similarityClass = sc;
}
......@@ -221,7 +221,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome do mtodo que contm a implementao do algoritmo de similaridade.", defaultValue = "")
@CreoleParameter(comment = "Name of method that calculates string similarity.", defaultValue = "")
public void setSimilarityMethod(String sm) {
similarityMethod = sm;
}
......@@ -234,7 +234,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caminho completo para a Classe que contm a implementao do algoritmo de transformao.")
@CreoleParameter(comment = "Path of class that contains the method that performs string transformation.")
public void setTransformationClass(String cc) {
transformationClass = cc;
}
......@@ -247,7 +247,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome do mtodo que contm a implementao do algoritmo de transformao.")
@CreoleParameter(comment = "Name of method that performs string transformation.")
public void setTransformationMethod(String cm) {
transformationMethod = cm;
}
......@@ -260,7 +260,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Nome da feature que vai idicar o valor da similaridade.", defaultValue = "Similarity")
@CreoleParameter(comment = "Name of feature used for similarity value.", defaultValue = "Similarity")
public void setSimilarityFeatureName(String sfn) {
similarityFeatureName = sfn;
}
......@@ -273,7 +273,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caractere utilizado para separar as features.", defaultValue = ";")
@CreoleParameter(comment = "Character used to indicate the of a feature value and the start of another feature name.", defaultValue = ";")
public void setFeaturesSeparator(String fs) {
featuresSeparator = fs;
}
......@@ -286,7 +286,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caractere utilizado para separar o nome do valor em cada feature.", defaultValue = ":")
@CreoleParameter(comment = "Character used between name and value of each feature.", defaultValue = ":")
public void setFeatureNameValueSeparator(String fnvs) {
featureNameValueSeparator = fnvs;
}
......@@ -299,7 +299,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@RunTime
@Optional
@CreoleParameter(comment = "Caractere utilizado para marcar o final do texto que representa cada gazetteer.", defaultValue = "#")
@CreoleParameter(comment = "Character that indicates the end of each entrie.", defaultValue = "#")
public void setGazetterDelimiter(String ed) {
entrieDelimiter = ed;
}
......@@ -311,7 +311,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
private String entrieDelimiter;
@RunTime
@CreoleParameter(comment = "Indica se devem ser aplicado algum algoritmo de transformao.", defaultValue = "false")
@CreoleParameter(comment = "Indicate if a string transformation is used.", defaultValue = "false")
public void setUseTransformation(Boolean yn) {
useTransformation = yn;
}
......@@ -339,8 +339,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@Override
public Resource init() throws ResourceInstantiationException {
// precompile the pattern used to replace all unicode whitespace in
// gazetteer
// entries with a single space.
// gazetteer entries with a single space.
ws_pattern = Pattern.compile(ws_patternstring);
createTrie();
......@@ -586,7 +585,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
for (SimilarityResult sr : similarityResults) {
FeatureMap fm = Factory.newFeatureMap();
fm.put(this.similarityFeatureName, sr.similarity);
fm.put("String", edr.getStringRead().getStringBuffer().toString());
fm.put("String", edr.getChunkRead().getStringBuffer().toString());
fm.put("ED", sr.editDistance);
fm.put("Option", sr.entry.getEntry());
fm.putAll(features4ALL);
......@@ -594,7 +593,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
for (GazetteerFeature gf : gazetteerFeaturesList) {
fm.put(gf.getName(), gf.getValue());
}
lookupid = addAnAnnotation(edr.getStringRead().getStartOffset(), edr.getStringRead().getEndOffset(),
lookupid = addAnAnnotation(edr.getChunkRead().getStartOffset(), edr.getChunkRead().getEndOffset(),
this.annotationType, fm);
}
}
......
......@@ -10,9 +10,9 @@ package br.ufpr.inf.junior.imgazetteer;
* @author Junior
*/
public class InputString{
private StringBuffer stringBuffer;//String lida do texto
private Integer startOffset;//Posição inicial no texto
private Integer endOffset;//Posição final no texto
private StringBuffer stringBuffer;//Text chunk
private Integer startOffset;
private Integer endOffset;
private Character nextCharacter;
public InputString() {
......@@ -62,11 +62,4 @@ public class InputString{
this.nextCharacter = nextCharacter;
}
// public void addCharacter2StringBuffer(InputString inputString, Character c){
// inputString.getStringBuffer().append(c);
// }
//
// public void addString2StringBuffer(InputString inputString, String s){
// inputString.getStringBuffer().append(s);
// }
}
/*
* Contém a estrutura utilizada para montar cada nó da Trie.
*
*/
package br.ufpr.inf.junior.imgazetteer;
......@@ -12,14 +12,14 @@ import java.util.List;
* @author Junior
*/
public class NodeFT implements Serializable{
private Integer id;//Identificardor unico de cada nó.
private Character symbol;//Armazena o caracter que representa aquele nó.
private Integer id;//Unique ID
private Character symbol;//Character that represents the node.
private List<NodeFT> childs;//Child nodes.
private Boolean endEntrie;//Define se o nó representa o fim da entrada (true = nó folha).
private boolean validNode;//quando TRUE significa que o nó está a uma distância menor que a máxima aceita do último caracter lido.
private Boolean endEntrie;//Indicate if this node is the end of an entrie.
private boolean validNode;//Used in EditDistance search to indicate that this node ED is lower than Maximum ED.
private int EDValue;
private int level;//Indica o nível de profundidade do nó na árvore. O nó RAIZ tem level 0 (zero).
private List<Entry> entries;
private int level;//Indicate the level of this node in trie. Root node is level zero.
private List<Entry> entries;// Nodes that represents the end of an entrie, store that entrie.
public NodeFT() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment