From 6590821aa58b10ee99aeb0e3dac3a978ca628bfb Mon Sep 17 00:00:00 2001 From: bfs15 <bruno.serbena@gmail.com> Date: Mon, 10 Jul 2017 10:00:17 -0300 Subject: [PATCH] Added comments --- lib/tasks/tag_clustering.rake | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/lib/tasks/tag_clustering.rake b/lib/tasks/tag_clustering.rake index efe13270..88d8f0c5 100644 --- a/lib/tasks/tag_clustering.rake +++ b/lib/tasks/tag_clustering.rake @@ -22,12 +22,14 @@ namespace :tag do task :generate_clusters => [:environment] do # DEBUG = true graph_path = TagSearchService.file_path+".net" + # Create hash of tag co occurrence hash = create_hash() - + # Create a graph infomap can read create_pajek_net_graph(hash, graph_path) infomap_ftree(graph_path, TagSearchService.root_dir) + # Cluster needs to be read from disk again, so clear cache of TagSearchService Rails.cache.delete(TagSearchService::CACHE_KEY) end # task @@ -42,28 +44,25 @@ namespace :tag do # for each lo, count tags and tag pairs and add to hash # if id1 <= id2 lo.tags.each.with_index do |t, i| - # initialize value + # initialize values hash[t.id] = {} if hash[t.id].nil? hash[t.id][t.id] = 0 if hash[t.id][t.id].nil? - hash[t.id][t.id] += 1 + hash[t.id][t.id] += 1 # add self occurrence - # for each next tags (with higher index) + # for each next tags (with higher index than t) lo.tags.drop(i+1).each do |t2| - # [t1][t2], t1 should always be lower + # swap if necessary ([t1][t2], t1 should always have lower id) if t.id > t2.id - # swaps + # swaps t with t2 t, t2 = t2, t # check nil hash[t.id] = {} if hash[t.id].nil? end - # initialize value - if hash[t.id][t2.id].nil? - hash[t.id][t2.id] = 0 - end + hash[t.id][t2.id] = 0 if hash[t.id][t2.id].nil? - hash[t.id][t2.id] += 1 + hash[t.id][t2.id] += 1 # add co occurrence end end end -- GitLab