Implemented functions to get objects from tag cluster

e841c770 · Israel Barreto Sant'Anna · 5343b99a · e841c770
Commit e841c770 authored 7 years ago by Israel Barreto Sant'Anna
--- a/lib/tasks/tag_clustering.rake
+++ b/lib/tasks/tag_clustering.rake
@@ -7,63 +7,63 @@ namespace :tag do

    hash = {}
    edges_total = 0
-    graphPath = Rails.root.join(outDir, fileName + ".net")
-
-    def swap(a, b)
-      tmp = a
-      a = b
-      b = tmp
-    end
-
-    LearningObject.all.each do |lo|
-      # for each lo, count tags and tag pairs and add to hash
-      # if id1 <= id2
-      # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO
-      lo.tags.each.with_index do |t, i|
-        hash[t.id] = {} if hash[t.id].nil?
-        hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
-        hash[t.id][t.id] += 1
-        lo.tags.drop(i+1).each do |t2|
-          if t.id > t2.id
-            swap(t, t2)
-            hash[t.id] = {} if hash[t.id].nil?
-          end
-          if hash[t.id][t2.id].nil?
-            hash[t.id][t2.id] = 0
-          end
-          hash[t2.id] = {} if hash[t2.id].nil?
-
-          # hash[t2.id][t2.id] = 0 if hash[t2.id][t2.id].nil?
-          # hash[t2.id][t2.id] += 1
-          # hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
-          # hash[t.id][t.id] += 1
-
-          hash[t.id][t2.id] += 1
-        end
-      end
-    end
-
-    File.open(graphPath, "w+") do |f|
-      f << "*Vertices #{Tag.all.size}\n"
-      # tags = Tag.all.to_ary
-      tag_index = {}
-      Tag.all.each_with_index do |t,i|
-        f << "#{i+1} \"#{t.name}\"\n"
-        tag_index[t.id] = i+1
-      end
-
-      f << "*Edges #{edges_total}\n"
-
-      hash.each do |id1, ids2Hash|
-        ids2Hash.each do |id2, value|
-          if id1 != id2
-            f << "#{tag_index[id1]} #{tag_index[id2]} #{hash[id1][id2].to_f/(Math.sqrt(hash[id1][id1]*hash[id2][id2]))}\n"
-          end
-        end
-      end
-    end
-
-    system("infomap --ftree #{graphPath} #{Rails.root.join(outDIR)}")
+    graphPath = Rails.root.join(outDIR, fileName + ".net")
+    #
+    # def swap(a, b)
+    #   tmp = a
+    #   a = b
+    #   b = tmp
+    # end
+    #
+    # LearningObject.all.each do |lo|
+    #   # for each lo, count tags and tag pairs and add to hash
+    #   # if id1 <= id2
+    #   # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO
+    #   lo.tags.each.with_index do |t, i|
+    #     hash[t.id] = {} if hash[t.id].nil?
+    #     hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
+    #     hash[t.id][t.id] += 1
+    #     lo.tags.drop(i+1).each do |t2|
+    #       if t.id > t2.id
+    #         swap(t, t2)
+    #         hash[t.id] = {} if hash[t.id].nil?
+    #       end
+    #       if hash[t.id][t2.id].nil?
+    #         hash[t.id][t2.id] = 0
+    #       end
+    #       hash[t2.id] = {} if hash[t2.id].nil?
+    #
+    #       # hash[t2.id][t2.id] = 0 if hash[t2.id][t2.id].nil?
+    #       # hash[t2.id][t2.id] += 1
+    #       # hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
+    #       # hash[t.id][t.id] += 1
+    #
+    #       hash[t.id][t2.id] += 1
+    #     end
+    #   end
+    # end
+    #
+    # File.open(graphPath, "w+") do |f|
+    #   f << "*Vertices #{Tag.all.size}\n"
+    #   # tags = Tag.all.to_ary
+    #   tag_index = {}
+    #   Tag.all.each_with_index do |t,i|
+    #     f << "#{i+1} \"#{t.name}\"\n"
+    #     tag_index[t.id] = i+1
+    #   end
+    #
+    #   f << "*Edges #{edges_total}\n"
+    #
+    #   hash.each do |id1, ids2Hash|
+    #     ids2Hash.each do |id2, value|
+    #       if id1 != id2
+    #         f << "#{tag_index[id1]} #{tag_index[id2]} #{hash[id1][id2].to_f/(Math.sqrt(hash[id1][id1]*hash[id2][id2]))}\n"
+    #       end
+    #     end
+    #   end
+    # end
+    #
+    # system("infomap --ftree #{graphPath} #{Rails.root.join(outDIR)}")

    clusters = {childs: [], parent: nil}
    tags = {}
@@ -84,8 +84,7 @@ namespace :tag do

        it = clusters
        ftree.each do |clusterId|
-          # p it
-          clusterId = clusterId.to_i
+          clusterId = clusterId.to_i - 1
          if it[:childs][clusterId].nil?
            it[:childs][clusterId] = {childs: [], parent: nil}
            it[:childs][clusterId][:parent] = it
@@ -93,9 +92,127 @@ namespace :tag do
          it = it[:childs][clusterId]
        end

-        it[:childs][leafId] = {id: tagId, rank: rank, name: name, parent: it}
-        tags[tagId] = it
+        it[:childs][leafId-1] = {id: tagId, rank: rank, name: name, parent: it}
+        tags[tagId] = it[:childs][leafId-1]
      end
    end
+
+    def calculate_relevance(lo, close_tags)
+      rel = 0
+      lo.tags.each do |t|
+        close_tags.each do |cloT|
+          if cloT[:id] == t.id
+            rel += cloT[:rank]
+          end
+        end
+      end
+      return rel
+    end
+
+    def closest(tagId, tags)
+      clos = []
+      tags[tagId][:parent][:childs].each do |t1|
+        rank = (Math.log2(tags[tagId][:rank])-Math.log2(t1[:rank])).abs
+        if rank < 4
+          clos << { id: t1[:id], rank: rank}
+        end
+      end
+      normalize(clos)
+    end
+
+    def normalize(tags)
+      sum = 0
+      max = 0
+      tags.each do |t|
+        sum += t[:rank]
+        max = t[:rank] if t[:rank] > max
+      end
+      tags.each do |t|
+        t[:rank] = 1 - (t[:rank]/(max*1.05))
+      end
+      tags
+    end
+
+    def find_relevant_results(tagId, tags)
+      los = {}
+      close_tags = closest(tagId, tags)
+
+      p "==============="
+      p "close_tags"
+      close_tags.each {|ct| p Tag.find(ct[:id]).name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s}
+      p "==============="
+
+      freq = cluster_frequency(close_tags)
+      LearningObject.all.each do |lo|
+        los[lo.id] = calculate_relevance(lo, close_tags)
+        # los[lo.id] = frequency_rank_global(lo, close_tags, freq)
+        # los[lo.id] = frequency_rank(lo, close_tags)
+      end
+
+      los = los.sort_by { |id, rel| rel }
+      lol = los.last(25).reverse
+      lol.each do |key, value|
+        puts "#{key}: #{value}"
+      end
+      lol
+      # lol.map {|lo| lo[0]}
+    end
+
+    def frequency_rank(lo, close_tags)
+      itf_sum = 0
+      wdf = 0
+      # t_size = lo.tags.size == 1 ? 2 : lo.tags.size
+      # wdf = 1/(Math.log2(t_size)) if lo.tags.size != 0
+      wdf = 1/(Math.log2(lo.tags.size)+1) if lo.tags.size != 0
+
+      lo.tags.each do |t|
+        close_tags.each do |cloT|
+          if cloT[:id] == t.id
+            itf_sum += cloT[:rank]*(Math.log2(Tag.all.size/t.taggings.size)+1)
+          end
+        end
+      end
+
+      return wdf*itf_sum
+    end
+
+    def cluster_frequency(cluster)
+      freq_cluster = 0
+      cluster.each do |t|
+        freq_cluster += Tag.find(t[:id]).taggings.size
+      end
+      freq_cluster
+    end
+
+    def frequency_rank_global(lo, close_tags, freq_cluster)
+      freq = 0
+      # rel = 0
+      lo.tags.each do |t|
+        close_tags.each do |cloT|
+          if cloT[:id] == t.id
+            freq += 1
+            # rel += cloT[:rank]
+          end
+        end
+      end
+
+      if lo.tags.size != 0
+        wdf = (Math.log2(freq+1)/(Math.log2(lo.tags.size)+1))
+      else
+        wdf = 0
+      end
+      itf = Math.log2(Tag.all.size/freq_cluster)+1
+      return wdf*itf#*rel
+    end
+
+    lol = find_relevant_results(22794, tags)
+
+    lol.each do |id, rank|
+      lo = LearningObject.find(id)
+      puts "-----"
+      p lo.id.to_s+": "+rank.to_s+" | "+lo.name
+      lo.tags.each {|t| print t.name+" - "+tags[t.id][:rank].to_s+" | "}
+      puts ""
+    end
  end
 end