Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
asm
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
faes
asm
Commits
6bb269ab
Commit
6bb269ab
authored
Aug 11, 2017
by
JR
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Alterações na aplicação de transformação em tokens.
parent
8c35264c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
148 additions
and
202 deletions
+148
-202
IMGazetteer/.classpath
IMGazetteer/.classpath
+1
-0
IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java
.../src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java
+23
-30
IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/Search.java
IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/Search.java
+9
-9
IMGazetteer/src/br/ufpr/inf/junior/utils/StringUtil.java
IMGazetteer/src/br/ufpr/inf/junior/utils/StringUtil.java
+85
-95
IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java
IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java
+30
-68
No files found.
IMGazetteer/.classpath
View file @
6bb269ab
...
...
@@ -5,5 +5,6 @@
<classpathentry
kind=
"con"
path=
"org.eclipse.jdt.USER_LIBRARY/GATE"
/>
<classpathentry
kind=
"lib"
path=
"E:/workspace/libraries/apache_log4j_1_2_17/log4j-1.2.17.jar"
/>
<classpathentry
kind=
"lib"
path=
"E:/Programas/GATE/plugins/InexactGazetteer/lib/lucene-suggest-5.2.1.jar"
/>
<classpathentry
kind=
"lib"
path=
"E:/Programas/GATE/plugins/InexactGazetteer/lib/StringSim.jar"
/>
<classpathentry
kind=
"output"
path=
"bin"
/>
</classpath>
IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/InexactGazetteer.java
View file @
6bb269ab
...
...
@@ -338,10 +338,11 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
@Override
public
Resource
init
()
throws
ResourceInstantiationException
{
// precompile the pattern used to replace all unicode whitespace in gazetteer
// entries with a single space.
ws_pattern
=
Pattern
.
compile
(
ws_patternstring
);
// precompile the pattern used to replace all unicode whitespace in
// gazetteer
// entries with a single space.
ws_pattern
=
Pattern
.
compile
(
ws_patternstring
);
createTrie
();
return
this
;
}
...
...
@@ -365,6 +366,7 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
}
/**
* Use dictionary file to configure PR and to build the trie.
*
* @throws Exception
*/
...
...
@@ -450,13 +452,13 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
setGazetterDelimiter
(
listConfigFromDictionary
.
get
(
"_entrieDelimiter"
));
else
throw
new
ExecutionException
(
"Your gazetteer miss a configuration: _entrieDelimiter"
);
if
(
listConfigFromDictionary
.
containsKey
(
"_caseSensitive"
))
setCaseSensitive
(
listConfigFromDictionary
.
get
(
"_caseSensitive"
).
equalsIgnoreCase
(
"YES"
)
?
true
:
false
);
TrieUtil
makeTrie
=
new
TrieUtil
();
// C
arrega um map com as features de cada gazetteer
.
this
.
gazetteerFeaturesMap
=
makeTrie
.
load
Gazetteer
sAndFeatures
(
configFileURL
,
this
.
entrieDelimiter
,
// C
reate a MAP with entries and their features
.
this
.
gazetteerFeaturesMap
=
makeTrie
.
load
Entrie
sAndFeatures
(
configFileURL
,
this
.
entrieDelimiter
,
this
.
featureNameValueSeparator
,
this
.
featuresSeparator
,
encoding
);
for
(
Map
.
Entry
<
String
,
List
<
GazetteerFeature
>>
element
:
gazetteerFeaturesMap
.
entrySet
())
{
...
...
@@ -466,9 +468,9 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
gazetteerEntriesList
.
add
(
entrieNormalized
);
}
//
Create
trie
//
Build
trie
this
.
trie
=
new
NodeFT
(
null
,
Boolean
.
FALSE
,
new
ArrayList
<>(),
0
);
this
.
trie
=
new
TrieUtil
().
fill
Trie
(
gazetteerEntriesList
,
trie
,
this
.
useTransformation
,
encoding
,
this
.
trie
=
new
TrieUtil
().
build
Trie
(
gazetteerEntriesList
,
trie
,
this
.
useTransformation
,
encoding
,
this
.
transformationClass
,
this
.
transformationMethod
);
this
.
deepestLevel
=
trie
.
getLevel
();
this
.
trie
.
setLevel
(
0
);
...
...
@@ -546,30 +548,27 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
this
.
transformationClass
,
this
.
transformationMethod
,
this
.
caseSensitive
);
if
(
editDistanceResultList
!=
null
&&
!
editDistanceResultList
.
isEmpty
())
{
String
nomeClasseSimilaridade
=
getSimilarityClass
();
String
nomeMetodoSimilaridade
=
getSimilarityMethod
();
// Create similarity class
Class
c
=
Class
.
forName
(
nomeClasseSimilaridade
);
Class
c
=
Class
.
forName
(
getSimilarityClass
()
);
// New Object.
Object
obj
=
c
.
newInstance
();
// Create similarity method.
Method
m
etodoComparacao
=
c
.
getMethod
(
nomeMetodoSimilaridade
,
String
.
class
,
String
.
class
);
Method
m
=
c
.
getMethod
(
getSimilarityMethod
()
,
String
.
class
,
String
.
class
);
// Annotate the document.
ArrayList
<
SimilarityResult
>
similarityResults
Return
=
new
ArrayList
<>();
ArrayList
<
SimilarityResult
>
similarityResults
=
new
ArrayList
<>();
for
(
EditDistanceResult
edr
:
editDistanceResultList
)
{
similarityResults
Return
=
auxiliarStringSimilarity
.
secondMatch
(
edr
,
metodoComparacao
,
obj
,
similarityResults
=
auxiliarStringSimilarity
.
secondMatch
(
edr
,
m
,
obj
,
this
.
minAcceptedSimilarity
,
this
.
numberBetterSimilarities
,
caseSensitive
);
if
(
similarityResults
Return
!=
null
&&
!
similarityResultsReturn
.
isEmpty
())
{
createLookups
(
edr
,
similarityResults
Return
);
similarityResults
Return
.
clear
();
if
(
similarityResults
!=
null
&&
!
similarityResults
.
isEmpty
())
{
createLookups
(
edr
,
similarityResults
);
similarityResults
.
clear
();
}
}
}
}
catch
(
Exception
e
)
{
logger
.
info
(
e
.
getMessage
());
e
.
printStackTrace
();
throw
new
ExecutionException
(
e
.
getMessage
()
+
e
.
getCause
().
getMessage
());
}
...
...
@@ -580,11 +579,11 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
/**
*
* @param edr
* @param similarityResults
Return
* @param similarityResults
*/
protected
void
createLookups
(
EditDistanceResult
edr
,
ArrayList
<
SimilarityResult
>
similarityResults
Return
)
{
protected
void
createLookups
(
EditDistanceResult
edr
,
ArrayList
<
SimilarityResult
>
similarityResults
)
{
Integer
lookupid
;
for
(
SimilarityResult
sr
:
similarityResults
Return
)
{
for
(
SimilarityResult
sr
:
similarityResults
)
{
FeatureMap
fm
=
Factory
.
newFeatureMap
();
fm
.
put
(
this
.
similarityFeatureName
,
sr
.
similarity
);
fm
.
put
(
"String"
,
edr
.
getStringRead
().
getStringBuffer
().
toString
());
...
...
@@ -630,26 +629,20 @@ public class InexactGazetteer extends AbstractLanguageAnalyser implements Action
}
private
List
<
Action
>
actions
;
@Override
public
List
<
Action
>
getActions
()
{
if
(
actions
==
null
)
{
actions
=
new
ArrayList
<
Action
>();
// Action 1: remove the gazbin file and re-initialize the gazetteer
actions
.
add
(
new
AbstractAction
(
"Initialize"
)
{
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
actionPerformed
(
ActionEvent
evt
)
{
File
configFile
=
gate
.
util
.
Files
.
fileFromURL
(
getConfigFileURL
());
String
configFileName
=
configFile
.
getAbsolutePath
();
try
{
// reInit();
init
();
}
catch
(
ResourceInstantiationException
e
x
)
{
throw
new
GateRuntimeException
(
"
Re-initialization failed"
,
ex
);
}
catch
(
ResourceInstantiationException
e
)
{
throw
new
GateRuntimeException
(
"
Initialization failed"
,
e
);
}
}
});
...
...
IMGazetteer/src/br/ufpr/inf/junior/imgazetteer/Search.java
View file @
6bb269ab
...
...
@@ -75,10 +75,11 @@ public class Search {
stringRead
.
setStringBuffer
(
stringRead
.
getStringBuffer
().
append
(
" "
));
i
++;
}
else
{
if
(
tokenAnnotation
.
getFeatures
().
get
(
ANNIEConstants
.
TOKEN_KIND_FEATURE_NAME
).
equals
(
"word"
))
{
convertedToken
+=
StringUtil
.
convert
(
if
(
tokenAnnotation
.
getFeatures
().
get
(
ANNIEConstants
.
TOKEN_KIND_FEATURE_NAME
).
equals
(
"word"
)
&&
useTransformation
)
{
convertedToken
+=
StringUtil
.
stringTransformation
(
(
String
)
tokenAnnotation
.
getFeatures
().
get
(
ANNIEConstants
.
TOKEN_STRING_FEATURE_NAME
),
useTransformation
,
convertionClass
,
convertionMethod
);
convertionClass
,
convertionMethod
);
}
else
{
convertedToken
+=
(
String
)
tokenAnnotation
.
getFeatures
()
.
get
(
ANNIEConstants
.
TOKEN_STRING_FEATURE_NAME
);
...
...
@@ -180,8 +181,8 @@ public class Search {
* @return
* @throws Exception
*/
private
List
<
ArrayList
<
NodeFT
>>
runSearchOnTrieCaseSensitive
(
Character
currentChar
,
List
<
ArrayList
<
NodeFT
>>
activNodesArray
,
int
deepestLevel
)
throws
Exception
{
private
List
<
ArrayList
<
NodeFT
>>
runSearchOnTrieCaseSensitive
(
Character
currentChar
,
List
<
ArrayList
<
NodeFT
>>
activNodesArray
,
int
deepestLevel
)
throws
Exception
{
List
<
ArrayList
<
NodeFT
>>
activNodesArrayTemp
=
new
ArrayList
<
ArrayList
<
NodeFT
>>();
for
(
int
i
=
0
;
i
<=
deepestLevel
;
i
++)
{
...
...
@@ -282,7 +283,7 @@ public class Search {
}
// O n que j estava ativo antes de iniciar as
// associaes para o caractere atual tem o
// associaes para o caractere atual tem o
// EDValue incrementado.
if
(
rootNode
.
getEDValue
()
+
1
<=
this
.
maxEd
)
{
rootNode
.
setEDValue
(
rootNode
.
getEDValue
()
+
1
);
...
...
@@ -405,7 +406,7 @@ public class Search {
}
// O nó que j estava ativo antes de iniciar as
// associaes para o caractere atual tem o
// associaes para o caractere atual tem o
// EDValue incrementado.
if
(
rootNode
.
getEDValue
()
+
1
<=
this
.
maxEd
)
{
rootNode
.
setEDValue
(
rootNode
.
getEDValue
()
+
1
);
...
...
@@ -421,8 +422,7 @@ public class Search {
}
/**
* Return a list of activate nodes that indicate the end of an
* entry.
* Return a list of activate nodes that indicate the end of an entry.
*
* @param list
* @return List<NodeFT>
...
...
IMGazetteer/src/br/ufpr/inf/junior/utils/StringUtil.java
View file @
6bb269ab
...
...
@@ -20,7 +20,6 @@ import br.ufpr.inf.junior.imgazetteer.InputString;
*/
public
class
StringUtil
{
/**
* Invoke transformation method.
*
...
...
@@ -31,106 +30,97 @@ public class StringUtil {
* @throws IllegalArgumentException
* @return String
*/
public
static
String
convert
(
String
originalString
,
boolean
useTransformation
,
String
convertionClass
,
String
convertionMethod
)
throws
IllegalArgumentException
{
if
(
useTransformation
)
{
String
convertedString
=
new
String
();
try
{
// Create class
Class
c
=
Class
.
forName
(
convertionClass
);
// Instantiate class
Object
obj
=
c
.
newInstance
();
// Create method
Method
metodoConversor
=
c
.
getMethod
(
convertionMethod
,
String
.
class
);
// Invoke method
convertedString
=
(
String
)
metodoConversor
.
invoke
(
obj
,
originalString
);
}
catch
(
NoSuchMethodException
|
SecurityException
|
InvocationTargetException
|
ClassNotFoundException
|
IllegalAccessException
|
InstantiationException
ex
)
{
Logger
.
getLogger
(
StringUtil
.
class
.
getName
()).
log
(
Level
.
SEVERE
,
ex
.
getMessage
(),
ex
);
}
public
static
String
stringTransformation
(
String
originalString
,
String
convertionClass
,
String
convertionMethod
)
throws
IllegalArgumentException
{
String
convertedString
=
new
String
();
try
{
// Create class
Class
c
=
Class
.
forName
(
convertionClass
);
// Instantiate class
Object
obj
=
c
.
newInstance
();
// Create method
Method
m
=
c
.
getMethod
(
convertionMethod
,
String
.
class
);
// Invoke method
convertedString
=
(
String
)
m
.
invoke
(
obj
,
originalString
);
return
convertedString
;
}
else
{
return
originalString
;
}
catch
(
NoSuchMethodException
|
SecurityException
|
InvocationTargetException
|
ClassNotFoundException
|
IllegalAccessException
|
InstantiationException
ex
)
{
Logger
.
getLogger
(
StringUtil
.
class
.
getName
()).
log
(
Level
.
SEVERE
,
ex
.
getMessage
(),
ex
);
}
return
convertedString
;
}
/**
*
* Return a list containing tokens from received string.
*
* @param stringIn
* @return List
*/
public
static
List
<
InputString
>
string2TokenList
(
String
stringIn
)
{
InputString
readString
=
new
InputString
();
boolean
isNewToken
=
false
;
List
<
InputString
>
tokenList
=
new
ArrayList
<>();
int
index
=
0
;
StringBuffer
stringBufferTemp
=
new
StringBuffer
();
try
{
if
(
stringIn
!=
null
&&
stringIn
.
length
()
>
0
)
{
stringBufferTemp
.
append
(
stringIn
.
charAt
(
index
));
index
=
1
;
while
(
index
<
stringIn
.
length
())
{
isNewToken
=
isEndOfToken
(
stringIn
.
charAt
(
index
-
1
),
stringIn
.
charAt
(
index
));
if
(
isNewToken
)
{
readString
.
setStringBuffer
(
stringBufferTemp
);
readString
.
setEndOffset
(
index
-
1
);
readString
.
setStartOffset
((
index
)
-
(
stringBufferTemp
.
length
()));
tokenList
.
add
(
readString
);
readString
=
new
InputString
();
stringBufferTemp
=
new
StringBuffer
();
}
stringBufferTemp
.
append
(
stringIn
.
charAt
(
index
));
index
++;
}
//Adiciona o ltimo token do texto na lista.
readString
.
setStringBuffer
(
stringBufferTemp
);
readString
.
setStartOffset
((
index
)
-
(
stringBufferTemp
.
length
()));
readString
.
setEndOffset
(
index
-
1
);
tokenList
.
add
(
readString
);
}
}
catch
(
Exception
e
)
{
System
.
err
.
println
(
"text2TokenList "
+
e
.
getMessage
());
}
/**
*
* Return a list containing tokens from received string.
*
* @param stringIn
* @return List
*/
public
static
List
<
InputString
>
string2TokenList
(
String
stringIn
)
{
InputString
readString
=
new
InputString
();
boolean
isNewToken
=
false
;
List
<
InputString
>
tokenList
=
new
ArrayList
<>();
int
index
=
0
;
StringBuffer
stringBufferTemp
=
new
StringBuffer
();
return
tokenList
;
}
try
{
if
(
stringIn
!=
null
&&
stringIn
.
length
()
>
0
)
{
stringBufferTemp
.
append
(
stringIn
.
charAt
(
index
));
index
=
1
;
while
(
index
<
stringIn
.
length
())
{
isNewToken
=
isEndOfToken
(
stringIn
.
charAt
(
index
-
1
),
stringIn
.
charAt
(
index
));
if
(
isNewToken
)
{
readString
.
setStringBuffer
(
stringBufferTemp
);
readString
.
setEndOffset
(
index
-
1
);
readString
.
setStartOffset
((
index
)
-
(
stringBufferTemp
.
length
()));
tokenList
.
add
(
readString
);
readString
=
new
InputString
();
stringBufferTemp
=
new
StringBuffer
();
}
stringBufferTemp
.
append
(
stringIn
.
charAt
(
index
));
index
++;
}
// Add text's last token to the list.
readString
.
setStringBuffer
(
stringBufferTemp
);
readString
.
setStartOffset
((
index
)
-
(
stringBufferTemp
.
length
()));
readString
.
setEndOffset
(
index
-
1
);
tokenList
.
add
(
readString
);
}
}
catch
(
Exception
e
)
{
System
.
err
.
println
(
"text2TokenList "
+
e
.
getMessage
());
}
return
tokenList
;
}
/**
* Verify if the current character represents the end of a token.
*
* @param currentChar
* @param nextChar
* @return boolean
*/
public
static
boolean
isEndOfToken
(
Character
currentChar
,
Character
nextChar
)
{
//Current char is NOT blank space and the next is blank space.
if
(!
currentChar
.
equals
(
" "
.
charAt
(
0
))
&&
nextChar
.
equals
(
" "
.
charAt
(
0
)))
{
return
true
;
//Current char is blank space and the next is NOT blank space.
}
else
if
(
currentChar
.
equals
(
" "
.
charAt
(
0
))
&&
!
nextChar
.
equals
(
" "
.
charAt
(
0
)))
{
return
true
;
//Current char is NOT letter and NOT number and NOT blank space.
}
else
if
(
currentChar
.
toString
().
matches
(
"\\W"
)
&&
!
currentChar
.
toString
().
matches
(
"\\s"
))
{
return
true
;
//Current char is letter and the next is NOT letter.
}
else
if
(
Character
.
isLetter
(
currentChar
)
&&
!
Character
.
isLetter
(
nextChar
))
{
return
true
;
//Current char is number and the next is NOT number.
}
else
if
(
Character
.
isDigit
(
currentChar
)
&&
!
Character
.
isDigit
(
nextChar
))
{
return
true
;
}
else
{
return
false
;
}
}
/**
* Verify if current character represents the end of a token.
*
* @param currentChar
* @param nextChar
* @return boolean
*/
public
static
boolean
isEndOfToken
(
Character
currentChar
,
Character
nextChar
)
{
// Current char is NOT blank space and the next is blank space.
if
(!
currentChar
.
equals
(
" "
.
charAt
(
0
))
&&
nextChar
.
equals
(
" "
.
charAt
(
0
)))
{
return
true
;
// Current char is blank space and the next is NOT blank space.
}
else
if
(
currentChar
.
equals
(
" "
.
charAt
(
0
))
&&
!
nextChar
.
equals
(
" "
.
charAt
(
0
)))
{
return
true
;
// Current char is NOT letter and NOT number and NOT blank space.
}
else
if
(
currentChar
.
toString
().
matches
(
"\\W"
)
&&
!
currentChar
.
toString
().
matches
(
"\\s"
))
{
return
true
;
// Current char is letter and the next is NOT letter.
}
else
if
(
Character
.
isLetter
(
currentChar
)
&&
!
Character
.
isLetter
(
nextChar
))
{
return
true
;
// Current char is number and the next is NOT number.
}
else
if
(
Character
.
isDigit
(
currentChar
)
&&
!
Character
.
isDigit
(
nextChar
))
{
return
true
;
}
else
{
return
false
;
}
}
}
IMGazetteer/src/br/ufpr/inf/junior/utils/TrieUtil.java
View file @
6bb269ab
...
...
@@ -45,8 +45,8 @@ public class TrieUtil {
* @return
* @throws Exception
*/
public
NodeFT
fill
Trie
(
List
<
String
>
gazetteerList
,
NodeFT
root
,
boolean
useTransformation
,
String
encoding
,
String
convertionClass
,
String
conver
tionMethod
)
throws
Exception
{
public
NodeFT
build
Trie
(
List
<
String
>
gazetteerList
,
NodeFT
root
,
boolean
useTransformation
,
String
encoding
,
String
transformationClass
,
String
transforma
tionMethod
)
throws
Exception
{
int
deepestLevel
=
0
;
int
numberChars
=
0
;
...
...
@@ -56,55 +56,56 @@ public class TrieUtil {
int
length
=
0
;
int
levelInTrie
=
0
;
Integer
nodeId
=
0
;
String
convertedStr
=
new
String
();
List
<
InputString
>
tokenList
=
new
ArrayList
();
String
convertedStr
ing
=
new
String
();
List
<
InputString
>
tokenList
=
new
ArrayList
();
List
<
String
>
convertedTokenList
=
new
ArrayList
();
String
convertedToken
=
new
String
();
for
(
String
s
:
gazetteerList
)
{
if
(!
useTransformation
)
{
convertedStr
=
s
;
}
else
{
//
A t
ransformation algorithm will be
convertedStr
ing
=
s
;
}
else
{
//
T
ransformation algorithm will be
// used.
/*
*
Utiliza um mtodo prprio para dividir as entradas e
m
*
tokens
.
*
Entries are divided in tokens using an algorith
m
*
developed in this project. Do not use annie gazetteer
.
*/
convertedTokenList
.
clear
();
tokenList
.
clear
();
//
Carrega todos os tokens de uma entrada
.
//
Get tokens from an entrie
.
tokenList
=
StringUtil
.
string2TokenList
(
s
);
//
Converte os tokens
.
//
Tokens are transformed
.
for
(
InputString
tempToken
:
tokenList
)
{
numberChars
+=
tempToken
.
getStringBuffer
().
length
();
convertedToken
=
StringUtil
.
convert
(
tempToken
.
getStringBuffer
().
toString
(),
useTransformation
,
convertionClass
,
convertionMethod
);
// Se o resultado da funo de converso for NULL ou
// nada, mantm a string original.
if
(
convertedToken
!=
null
&&
!
convertedToken
.
equals
(
""
))
{
convertedTokenList
.
add
(
convertedToken
);
// ONLY apply transformation on tokens that start with a
// letter.
if
(
Character
.
isLetter
(
tempToken
.
getStringBuffer
().
charAt
(
0
)))
{
convertedToken
=
StringUtil
.
stringTransformation
(
tempToken
.
getStringBuffer
().
toString
(),
transformationClass
,
transformationMethod
);
}
else
{
convertedToken
List
.
add
(
tempToken
.
getStringBuffer
().
toString
()
);
convertedToken
=
tempToken
.
getStringBuffer
().
toString
(
);
}
convertedTokenList
.
add
(
convertedToken
);
}
//Junta os tokens de uma entrada em uma string nica.
// Create a single string using transformed tokens from an
// entrie.
StringBuffer
stringBufferTemp
=
new
StringBuffer
();
for
(
String
stringTemp
:
convertedTokenList
)
{
stringBufferTemp
.
append
(
stringTemp
);
}
convertedStr
=
stringBufferTemp
.
toString
();
convertedStr
ing
=
stringBufferTemp
.
toString
();
}
currentlyNodeFT
=
root
;
boolean
foundChild
=
false
;
currentPosition
=
0
;
length
=
convertedStr
.
length
();
length
=
convertedStr
ing
.
length
();
// Use each
string
character to buil the trie.
// Use each
entrie
character to buil the trie.
while
(
currentPosition
<
length
)
{
Character
c
=
convertedStr
.
charAt
(
currentPosition
);
Character
c
=
convertedStr
ing
.
charAt
(
currentPosition
);
levelInTrie
=
currentlyNodeFT
.
getLevel
()
+
1
;
foundChild
=
false
;
// If currently node doesn't has any child.
...
...
@@ -154,7 +155,7 @@ public class TrieUtil {
}
/**
* Create a node
* Create a node
*
* @param c
* @param levelInTrie
...
...
@@ -168,48 +169,8 @@ public class TrieUtil {
}
/**
* Identify and return entries from dictionary file.
*
* @param dictionaryBuffReader
* @return linesList
*/
public
List
<
String
>
extractGazettersFromList
(
BufferedReader
dictionaryBuffReader
)
{
String
line
;
logger
.
info
(
"Linhas no metodo extractGazettersFromList: "
+
dictionaryBuffReader
.
lines
().
count
());
try
{
line
=
dictionaryBuffReader
.
readLine
();
logger
.
info
(
"Linha 1: "
+
line
);
logger
.
info
(
"Linha 1 tamanho: "
+
line
.
length
());
List
<
String
>
linesList
=
new
ArrayList
<
String
>();
// Carrega em uma lista todas as linhas com gazetteers (abaixo do
// marcador
// "|ENTRIES|")
while
(
null
!=
line
)
{
logger
.
info
(
"Linha: "
+
line
);
// if (line.equals("|ENTRIES|")) {
logger
.
info
(
"Linha lida: "
+
line
);
if
(
line
.
trim
().
compareTo
(
"|ENTRIES|"
)
==
0
)
{
logger
.
info
(
"Passou no IF do compareTo"
);
line
=
dictionaryBuffReader
.
readLine
();
while
(
line
!=
null
)
{
linesList
.
add
(
line
);
// Get next line.
line
=
dictionaryBuffReader
.
readLine
();
}
}
else
{
line
=
dictionaryBuffReader
.
readLine
();
}
}
return
linesList
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"extractGazettersFromList: "
+
e
.
toString
());
return
null
;
}
}
/**
* Separate the entries from its features. Both are defined in the same line.
* Separate the entries from its features. Both are defined in the same
* line.
*
* @param configFileURL
* @param gazetterDelimiter
...
...
@@ -217,7 +178,7 @@ public class TrieUtil {
* @param featuresSeparator
* @param encoding
*/
public
HashMap
<
String
,
List
<
GazetteerFeature
>>
load
Gazetteer
sAndFeatures
(
java
.
net
.
URL
configFileURL
,
public
HashMap
<
String
,
List
<
GazetteerFeature
>>
load
Entrie
sAndFeatures
(
java
.
net
.
URL
configFileURL
,
String
gazetterDelimiter
,
String
featureNameValueSeparator
,
String
featuresSeparator
,
String
encoding
)
throws
Exception
{
HashMap
<
String
,
List
<
GazetteerFeature
>>
gazetteerFeaturesMap
=
new
HashMap
<>();
...
...
@@ -225,7 +186,8 @@ public class TrieUtil {
BufferedReader
dictionaryBuffReader
=
new
BomStrippingInputStreamReader
((
configFileURL
).
openStream
(),
encoding
);
String
line
=
dictionaryBuffReader
.
readLine
();
// Put all entries in a list. Entries are every line bellow the tag "|ENTRIES|".
// Put all entries in a list. Entries are every line bellow the tag
// "|ENTRIES|".
while
(
null
!=
line
)
{
if
(
line
.
equals
(
"|ENTRIES|"
))
{
String
linesArray
[]
=
new
String
[
2
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment