diff --git a/lib/tasks/dbInfo.rake b/lib/tasks/dbInfo.rake new file mode 100644 index 0000000000000000000000000000000000000000..c941824e1d33df9f62f7588d2e5c50b8ec1e209d --- /dev/null +++ b/lib/tasks/dbInfo.rake @@ -0,0 +1,199 @@ +namespace :dbinfo do + desc "Database Information Tasks" + + task bitstreams_formats: :environment do + desc "List bitstreams formats" + + include RepositoriesProxy + include Thumbnail::Formats + + # Quantity of LearningObjects fetched on each iteration + limit = 1000 + # Starting point from where LearningObjects will be fetched + offset = 0 + + bitstreams_formats_hash = Hash.new + + loop do + print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}" + + begin + # Get LearningObjects from OrientDB (from offset to offset+limit) + learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit) + rescue + # Sleeps for a while to wait database's recovery + sleep(30.seconds) + # Goes to next iteration to retry + next + else + # Terminate loop if there are no more LearningObjects + break if learning_objects.empty? + + learning_objects.each do |lo| + bitstream_filename = lo.get_bitstream_filename_of "ORIGINAL" + unless bitstream_filename.nil? + bitstream_format = File.extname(bitstream_filename) + unless bitstreams_formats_hash[bitstream_format].nil? + bitstreams_formats_hash[bitstream_format] += 1 + else + bitstreams_formats_hash[bitstream_format] = 1 + end + end + end + + offset += limit + end + end + + bitstreams_formats = bitstreams_formats_hash.sort_by {|key, value| value} + + puts "\n\n" + puts "---------------------------------" + puts "---- BITSTREAMS FORMATS LIST ----" + puts "---------------------------------" + puts "Ext\tTotal\tAccepts Thumbnail" + puts "---------------------------------" + bitstreams_formats.each do |key,value| + accepts_thumbnail = accepted_formats.include? key + puts "#{key}\t#{value}\t#{accepts_thumbnail}" + end + puts "---------------------------------" + puts "Ext\tTotal\tAccepts Thumbnail" + puts "---------------------------------" + + end + + task verify_duplicated_items: :environment do + desc "Verify duplicated items" + + include RepositoriesProxy + + # Quantity of LearningObjects fetched on each iteration + limit = 1000 + # Starting point from where LearningObjects will be fetched + offset = 0 + + lo_hash = Hash.new + + loop do + print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}" + + begin + # Get LearningObjects from OrientDB (from offset to offset+limit) + learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit) + rescue + # Sleeps for a while to wait database's recovery + sleep(30.seconds) + # Goes to next iteration to retry + next + else + # Terminate loop if there are no more LearningObjects + break if learning_objects.empty? + + learning_objects.each do |lo| + # Verify duplicity using LO's unique attributes + lo_key = encode_hash_from learning_object_unique_attributes(lo) + if lo_hash[lo_key].nil? + lo_hash[lo_key] = Array.new + end + lo_hash[lo_key] << lo.id + end + + offset += limit + end + end + + duplicated_lo = lo_hash.select {|key,value| value.length > 1} + + puts "\n\n" + puts "---------------------------------" + puts "----- DUPLICATED ITEMS LIST -----" + puts "---------------------------------" + puts "TOTAL CASES: #{duplicated_lo.length}" + puts "---------------------------------" + duplicated_lo.each do |key, value| + puts "#{value.to_s}" + end + puts "---------------------------------" + + end + + task verify_dead_bitstreams: :environment do + desc "Verify dead bitstreams" + + include RepositoriesProxy + + # Quantity of LearningObjects fetched on each iteration + limit = 1000 + # Starting point from where LearningObjects will be fetched + offset = 0 + + dead_bitstreams = Array.new + + loop do + print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}" + + begin + # Get LearningObjects from OrientDB (from offset to offset+limit) + learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit) + rescue + # Sleeps for a while to wait database's recovery + sleep(30.seconds) + # Goes to next iteration to retry + next + else + # Terminate loop if there are no more LearningObjects + break if learning_objects.empty? + + learning_objects.each do |lo| + bitstream_url = lo.get_bitstream_retrievelink_of "ORIGINAL" + unless bitstream_url.nil? + unless bitstream_url_is_alive bitstream_url + dead_bitstreams << {id: lo.id, url: bitstream_url} + end + end + end + + offset += limit + end + end + + puts "\n\n" + puts "---------------------------------" + puts "------ DEAD BITSTREAMS LIST -----" + puts "---------------------------------" + puts "TOTAL CASES: #{dead_bitstreams.length}" + puts "---------------------------------" + dead_bitstreams.each do |dead_bitstream| + puts "#{dead_bitstream[:id]} -> #{dead_bitstream[:url]}" + end + puts "---------------------------------" + + end + + private + + def bitstream_url_is_alive(url) + c = Curl::Easy.http_head(url){ |easy| + easy.follow_location = true + easy.ssl_verify_peer = false + easy.ssl_verify_host = false + } + c.status.eql? "200 OK" + end + + def learning_object_unique_attributes(lo) + unique_attributes = Array.new + unique_attributes << lo.name + unique_attributes << lo.author + unique_attributes << lo.description + unique_attributes << lo.type + unique_attributes << lo.metadata + unique_attributes.to_s + end + + def encode_hash_from(object) + Digest::SHA1.hexdigest object + end + +end