Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
cleaning-portalmec
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Richard Fernando Heise Ferreira
cleaning-portalmec
Commits
4a76d518
Commit
4a76d518
authored
7 years ago
by
bfs15
Browse files
Options
Downloads
Patches
Plain Diff
Added new way of finding LOs in the same cluster & changed names
parent
39702fa5
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
app/services/tag_search_service.rb
+102
-21
102 additions, 21 deletions
app/services/tag_search_service.rb
lib/tasks/tag_clustering.rake
+2
-4
2 additions, 4 deletions
lib/tasks/tag_clustering.rake
with
104 additions
and
25 deletions
app/services/tag_search_service.rb
+
102
−
21
View file @
4a76d518
...
...
@@ -19,7 +19,8 @@
module
TagSearchService
# DEBUG = true
CACHE_KEY
=
"tag_clusters"
.
freeze
FTREE_CACHE_KEY
=
"ftree"
.
freeze
LO_CLUSTER_CACHE_KEY
=
"lo_cluster"
.
freeze
PAD
=
1
PADM
=
0.05
...
...
@@ -31,19 +32,32 @@ module TagSearchService
Rails
.
root
.
join
(
root_dir
,
"tags"
).
to_s
end
def
tags_cluster
Rails
.
cache
.
fetch
(
CACHE_KEY
)
do
parseFtree
(
file_path
)
def
parse_ftree_cache
Rails
.
cache
.
fetch
(
FTREE_CACHE_KEY
)
do
parse_ftree
(
file_path
)
end
end
def
lo_cluster_cache
(
tag_clusters
)
Rails
.
cache
.
fetch
(
LO_CLUSTER_CACHE_KEY
)
do
lo_cluster
(
tag_clusters
)
end
end
def
search
(
tag
,
limit
=
-
1
)
# Create clusters from ftree
tags
=
tags_cluster
()
ftree
=
parse_ftree_cache
()
tag_cluster_hash
=
ftree
[
:tag_cluster_hash
]
# NEW: Uncomment to test this, see if it takes too long for its benefits
# if it's good, put it in the tag_clustering.rake
# lo_cluster = lo_cluster_cache(ftree[:tag_clusters])
# puts "\nStarted Calculating relevant results for tag #{tag.name}\n" if DEBUG
relevant_los
=
find_relevant_results
(
tag
.
id
,
tags
,
limit
)
relevant_los
=
find_relevant_results
(
tag
.
id
,
tag_cluster_hash
,
limit
)
# relevant_los = find_relevant_results_v2(tag.id, tag_cluster_hash, lo_cluster, limit) ##
# NEW: Uncomment to test this, see if it takes too long for its benefits
# puts "\n============ Learning Objects - Cluster Search ===============\n"
# puts "ID: Ranking | Name | Tags\n\n"
...
...
@@ -138,12 +152,12 @@ module TagSearchService
return
merged_los
end
def
find_relevant_results
(
tagId
,
tag
s
,
limit
)
def
find_relevant_results
(
tagId
,
tag
_cluster_hash
,
limit
)
los_ranked
=
{}
# puts "\nGetting tags from the same cluster\n" if DEBUG
# puts "Normalization with padding = #{PAD}\n" if DEBUG
close_tags
=
ranked_close_tags
(
tagId
,
tag
s
)
close_tags
=
ranked_close_tags
(
tagId
,
tag
_cluster_hash
)
# Uncomment the next line if you want to sort by global tag frequency
# freq_cluster = cluster_frequency(close_tags)
...
...
@@ -154,7 +168,7 @@ module TagSearchService
# puts "Name | ID | Normalized Ranking\n\n" if DEBUG
close_tags
.
each
do
|
ct
|
tag
=
Tag
.
find
(
ct
[
:id
])
# puts tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tag
s
[ct[:id]][:rank].to_s if DEBUG
# puts tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tag
_cluster_hash
[ct[:id]][:rank].to_s if DEBUG
tag
.
taggings
.
where
(
taggable_type:
"LearningObject"
).
each
do
|
tagging
|
lo
=
tagging
.
taggable
if
los_ranked
[
lo
].
nil?
...
...
@@ -179,15 +193,15 @@ module TagSearchService
end
# ranking #
def
ranked_close_tags
(
tagId
,
tag
s
)
def
ranked_close_tags
(
tagId
,
tag
_cluster_hash
)
# puts "Rank close tags" if DEBUG
close_tags
=
[]
tag
s
[
tagId
][
:parent
][
:childs
].
each
do
|
t
|
tag
_cluster_hash
[
tagId
][
:parent
][
:childs
].
each
do
|
t
|
# calculate logarithmic distance between tag flows
# lower value, closer, more relevant
# the tag you are searching for will be at distance 0 of itself
lg_dist
=
(
Math
.
log2
(
tag
s
[
tagId
][
:flow
])
-
Math
.
log2
(
t
[
:flow
])
).
abs
lg_dist
=
(
Math
.
log2
(
tag
_cluster_hash
[
tagId
][
:flow
])
-
Math
.
log2
(
t
[
:flow
])
).
abs
close_tags
<<
{
id:
t
[
:id
],
rank:
lg_dist
}
# puts "Rank for tag_id=#{close_tags[close_tags.length - 1][:id]}: #{close_tags[close_tags.length - 1][:rank]}" if DEBUG
...
...
@@ -281,15 +295,15 @@ module TagSearchService
return
rank_sum
end
def
parse
F
tree
(
path
)
def
parse
_f
tree
(
path
)
# parse .ftree into a 'tree', leafs are tags with flow number, branches are the clustes
# create tags list, tags[tagId] == the tag leaf inside the cluster tree
# puts "\nParsing .ftree output into a 'tree'\n" if DEBUG
clusters
=
{
childs:
[],
parent:
nil
}
tag
s
=
{}
countC
lusters
=
{}
root
=
{
childs:
[],
parent:
nil
}
tag
_cluster_hash
=
{}
tag_c
lusters
=
[]
File
.
open
(
path
+
".ftree"
,
"r"
)
do
|
f
|
f
.
gets
...
...
@@ -313,7 +327,7 @@ module TagSearchService
name
=
tmp
[
2
..-
2
].
join
(
' '
)[
1
..-
2
]
# iterate through the levels of the tree
it
=
clusters
# start at the root
it
=
root
#
re
start at the root
ftree
.
each
do
|
clusterId
|
clusterId
=
clusterId
.
to_i
-
1
# on the file they start at 1, we want 0
if
it
[
:childs
][
clusterId
].
nil?
# if this id doesn't exist, create it as child of 'it'
...
...
@@ -323,16 +337,83 @@ module TagSearchService
# go down a level
it
=
it
[
:childs
][
clusterId
]
end
countClusters
[
it
]
=
1
# set this cluster in this hash, for counting purposes
tag_clusters
<<
it
# 'it' is the cluster leafId is a child of, so add it
it
[
:childs
][
leafId
-
1
]
=
{
id:
tagId
,
flow:
flow
,
name:
name
,
parent:
it
}
# put the leaf on this hash for easy acess by the tagId
tag
s
[
tagId
]
=
it
[
:childs
][
leafId
-
1
]
tag
_cluster_hash
[
tagId
]
=
it
[
:childs
][
leafId
-
1
]
end
end
# puts "\nNumber of clusters found: #{
countC
lusters.size}\n" if DEBUG
# puts "\nNumber of clusters found: #{
tag_c
lusters.size}\n" if DEBUG
return
tags
return
{
tag_cluster_hash:
tag_cluster_hash
,
tag_clusters:
tag_clusters
}
end
# NEW: test this, see if it takes too long for its benefits
def
find_relevant_results_v2
(
tagId
,
tag_cluster_hash
,
lo_cluster
,
limit
)
los_ranked
=
{}
# puts "\nGetting tags from the same cluster\n" if DEBUG
# puts "Normalization with padding = #{PAD}\n" if DEBUG
close_tags
=
ranked_close_tags
(
tagId
,
tag_cluster_hash
)
# Uncomment the next line if you want to sort by global tag frequency
# freq_cluster = cluster_frequency(close_tags)
# puts "\nStarted Ranking LOs...\n" if DEBUG
# puts "\n====== Close Tags =========\n" if DEBUG
# puts "Name | ID | Normalized Ranking\n\n" if DEBUG
# NEW: for each LO in the cluster of this tag
lo_cluster
[
tagId
].
each
do
|
lo_id
|
lo
=
LearningObject
.
find
(
lo_id
)
# Uncomment the next line if you want to sort by local tag frequency
# los_ranked[lo.id] = relevance_frequency_rank(lo, close_tags)
# Uncomment the next line if you want to sort by global tag frequency
# los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster)
# Uncomment the next line if you want to sort by tag cluster rank
los_ranked
[
lo
]
=
relevance_raw_rank
(
lo
,
close_tags
)
# puts "Found lo of id=#{lo.id} with rank=#{los_ranked[lo.id]}" if DEBUG
end
# puts "Sorting LOs...\n" if DEBUG
# highest ranks first
los_ranked
=
los_ranked
.
sort_by
{
|
lo
,
rank
|
rank
*-
1
}
return
los_ranked
.
first
(
limit
)
if
limit
>
0
return
los_ranked
end
# returns a hash keyed by tag id,
# contains an array with all learning object ids that the cluster contains
def
lo_clusters
(
tag_clusters
)
lo_clusters
=
{}
tag_clusters
.
each
do
|
tag_cluster
|
# for each cluster, find all LOs tagged by the tags in the cluster
# insert their ids in lo_cluster array
lo_cluster
=
[]
aux
=
{}
# for each tag in the cluster
tag_cluster
[
:childs
].
each
do
|
leaf
|
tag_id
=
leaf
[
:id
]
# save the reference in the tag_id hash
lo_clusters
[
tag_id
]
=
lo_cluster
tag
=
Tag
.
find
(
tag_id
)
# find LOs with this tag
tag
.
taggings
.
where
(
taggable_type:
"LearningObject"
).
each
do
|
tagging
|
lo
=
tagging
.
taggable
if
aux
[
lo
.
id
].
nil?
aux
[
lo
.
id
]
=
1
lo_cluster
<<
lo
.
id
end
end
end
end
return
lo_clusters
end
end
This diff is collapsed.
Click to expand it.
lib/tasks/tag_clustering.rake
+
2
−
4
View file @
4a76d518
...
...
@@ -33,10 +33,8 @@ namespace :tag do
infomap_ftree
(
graph_path
,
TagSearchService
.
root_dir
)
# Cluster needs to be read from disk again, so clear cache of TagSearchService
Rails
.
cache
.
delete
(
TagSearchService
::
CACHE_KEY
)
Rails
.
cache
.
fetch
(
TagSearchService
::
CACHE_KEY
)
do
parseFtree
(
file_path
)
end
Rails
.
cache
.
delete
(
TagSearchService
::
FTREE_CACHE_KEY
)
TagSearchService
::
parse_ftree_cache
()
end
# task
private
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment