From cf10b11352ef5152536bd7bac3d24f0b72004423 Mon Sep 17 00:00:00 2001 From: JR Date: Mon, 30 Oct 2017 16:34:49 -0200 Subject: [PATCH] Changes for tokenizer in trie building. --- .../inf/junior/imgazetteer/InexactGazetteer.java | 12 ++++++------ .../src/br/ufpr/inf/junior/utils/TrieUtil.java | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java b/IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java index d9a0da0..e21cf4d 100644 --- a/IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java +++ b/IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java @@ -406,7 +406,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action protected void loadDataFromDictionary() throws Exception { if (getConfigFileURL() != null) { - List gazetteerEntriesList = new ArrayList<>(); +// List gazetteerEntriesList = new ArrayList<>(); logger.info("Creating trie from " + getConfigFileURL()); BufferedReader dictionaryBuffReader = new BomStrippingInputStreamReader((configFileURL).openStream(), encoding); @@ -503,7 +503,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action this.gazetteerFeaturesMap = makeTrie.loadEntriesAndFeatures(configFileURL, this.entrieDelimiter, this.featureNameValueSeparator, this.featuresSeparator, encoding); - HashMap tamanhoLinhaMap = new HashMap<>(); + HashMap entrieLineLengthMap = new HashMap<>(); Integer linhaCount = 0; File configFile = gate.util.Files.fileFromURL(configFileURL); String entriesFileName = configFile.getAbsolutePath().replaceAll("\\.txt$", "\\_GAZETTEERS.txt"); @@ -512,16 +512,16 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action linhaCount++; String entrieNormalized = element.getKey().trim(); entrieNormalized = ws_pattern.matcher(entrieNormalized).replaceAll(" "); - gazetteerEntriesList.add(entrieNormalized); - tamanhoLinhaMap.put(linhaCount, entrieNormalized.length()); +// gazetteerEntriesList.add(entrieNormalized); + entrieLineLengthMap.put(linhaCount, entrieNormalized.length()); writer.println(entrieNormalized); } writer.close(); // Build trie this.trie = new NodeFT(null, Boolean.FALSE, new ArrayList<>(), 0); - this.trie = new TrieUtil().buildTrie(gazetteerEntriesList, trie, this.useTransformation, encoding, - this.transformationClass, this.transformationMethod, entriesFileName, tamanhoLinhaMap, + this.trie = new TrieUtil().buildTrie(trie, this.useTransformation, encoding, + this.transformationClass, this.transformationMethod, entriesFileName, entrieLineLengthMap, this.tokeniserGatePR); this.deepestLevel = trie.getLevel(); this.trie.setLevel(0); diff --git a/IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java b/IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java index 2b32bb0..c4df437 100644 --- a/IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java +++ b/IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java @@ -72,7 +72,7 @@ public class TrieUtil { * @return NodeFT * @throws Exception */ - public NodeFT buildTrie(List gazetteerList, NodeFT root, boolean useTransformation, String encoding, + public NodeFT buildTrie(NodeFT root, boolean useTransformation, String encoding, String transformationClass, String transformationMethod, String entriesFilePath, HashMap tamanhoLinhaMap, String tokeniserPR) throws Exception { int deepestLevel = 0; @@ -208,7 +208,7 @@ public class TrieUtil { } root.setLevel(deepestLevel); - logger.info("Number of entries: " + gazetteerList.size()); + logger.info("Number of entries: " + lineCount); logger.info("Number of characters: " + numberChars); logger.info("Number of nodes: " + totalNos); -- GitLab