Skip to content
Snippets Groups Projects
Commit dcd665b2 authored by Mauricio Giacomini Girardello's avatar Mauricio Giacomini Girardello
Browse files

Merge branch 'add-dbinfo-tasks' into 'master'

Add DBInfo tasks



See merge request !61
parents 3c175e05 270e64bd
No related branches found
No related tags found
No related merge requests found
namespace :dbinfo do
desc "Database Information Tasks"
task bitstreams_formats: :environment do
desc "List bitstreams formats"
include RepositoriesProxy
include Thumbnail::Formats
# Quantity of LearningObjects fetched on each iteration
limit = 1000
# Starting point from where LearningObjects will be fetched
offset = 0
bitstreams_formats_hash = Hash.new
loop do
print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}"
begin
# Get LearningObjects from OrientDB (from offset to offset+limit)
learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit)
rescue
# Sleeps for a while to wait database's recovery
sleep(30.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more LearningObjects
break if learning_objects.empty?
learning_objects.each do |lo|
bitstream_filename = lo.get_bitstream_filename_of "ORIGINAL"
unless bitstream_filename.nil?
bitstream_format = File.extname(bitstream_filename)
unless bitstreams_formats_hash[bitstream_format].nil?
bitstreams_formats_hash[bitstream_format] += 1
else
bitstreams_formats_hash[bitstream_format] = 1
end
end
end
offset += limit
end
end
bitstreams_formats = bitstreams_formats_hash.sort_by {|key, value| value}
puts "\n\n"
puts "---------------------------------"
puts "---- BITSTREAMS FORMATS LIST ----"
puts "---------------------------------"
puts "Ext\tTotal\tAccepts Thumbnail"
puts "---------------------------------"
bitstreams_formats.each do |key,value|
accepts_thumbnail = accepted_formats.include? key
puts "#{key}\t#{value}\t#{accepts_thumbnail}"
end
puts "---------------------------------"
puts "Ext\tTotal\tAccepts Thumbnail"
puts "---------------------------------"
end
task verify_duplicated_items: :environment do
desc "Verify duplicated items"
include RepositoriesProxy
# Quantity of LearningObjects fetched on each iteration
limit = 1000
# Starting point from where LearningObjects will be fetched
offset = 0
lo_hash = Hash.new
loop do
print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}"
begin
# Get LearningObjects from OrientDB (from offset to offset+limit)
learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit)
rescue
# Sleeps for a while to wait database's recovery
sleep(30.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more LearningObjects
break if learning_objects.empty?
learning_objects.each do |lo|
# Verify duplicity using LO's unique attributes
lo_key = encode_hash_from learning_object_unique_attributes(lo)
if lo_hash[lo_key].nil?
lo_hash[lo_key] = Array.new
end
lo_hash[lo_key] << lo.id
end
offset += limit
end
end
duplicated_lo = lo_hash.select {|key,value| value.length > 1}
puts "\n\n"
puts "---------------------------------"
puts "----- DUPLICATED ITEMS LIST -----"
puts "---------------------------------"
puts "TOTAL CASES: #{duplicated_lo.length}"
puts "---------------------------------"
duplicated_lo.each do |key, value|
puts "#{value.to_s}"
end
puts "---------------------------------"
end
task verify_dead_bitstreams: :environment do
desc "Verify dead bitstreams"
include RepositoriesProxy
# Quantity of LearningObjects fetched on each iteration
limit = 1000
# Starting point from where LearningObjects will be fetched
offset = 0
dead_bitstreams = Array.new
loop do
print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}"
begin
# Get LearningObjects from OrientDB (from offset to offset+limit)
learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit)
rescue
# Sleeps for a while to wait database's recovery
sleep(30.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more LearningObjects
break if learning_objects.empty?
learning_objects.each do |lo|
bitstream_url = lo.get_bitstream_retrievelink_of "ORIGINAL"
unless bitstream_url.nil?
unless bitstream_url_is_alive bitstream_url
dead_bitstreams << {id: lo.id, url: bitstream_url}
end
end
end
offset += limit
end
end
puts "\n\n"
puts "---------------------------------"
puts "------ DEAD BITSTREAMS LIST -----"
puts "---------------------------------"
puts "TOTAL CASES: #{dead_bitstreams.length}"
puts "---------------------------------"
dead_bitstreams.each do |dead_bitstream|
puts "#{dead_bitstream[:id]} -> #{dead_bitstream[:url]}"
end
puts "---------------------------------"
end
private
def bitstream_url_is_alive(url)
c = Curl::Easy.http_head(url){ |easy|
easy.follow_location = true
easy.ssl_verify_peer = false
easy.ssl_verify_host = false
}
c.status.eql? "200 OK"
end
def learning_object_unique_attributes(lo)
unique_attributes = Array.new
unique_attributes << lo.name
unique_attributes << lo.author
unique_attributes << lo.description
unique_attributes << lo.type
unique_attributes << lo.metadata
unique_attributes.to_s
end
def encode_hash_from(object)
Digest::SHA1.hexdigest object
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment