Skip to content
Snippets Groups Projects
Commit bd1b9a43 authored by Bruno Nocera Zanette's avatar Bruno Nocera Zanette
Browse files

Merge branch 'sidekiq-workers' into 'master'

Sidekiq workers

Tasks:
* dspace:import
* thumbnail:generate
* fakedata:generate_users
* fakedata:generate_data

Sidekiq Workers:
* MassiveLikesCreatorWorker
* MassiveViewsCreatorWorker
* ThumbnailGeneratorWorker

*** Importante ***
Para o "dspace:import" funcionar, é preciso usar o branch "add-limit_offset-query-parameter" da gem "dspace_rest_client". Uma maneira de fazer isso é fazer o pull desse branch, e usar o parametro ":path => '/dspace-rest-client'" na definição da gem no arquivo Gemfile.


See merge request !40
parents 12447f5b ef2a5917
No related branches found
No related tags found
No related merge requests found
...@@ -112,3 +112,13 @@ group :test do ...@@ -112,3 +112,13 @@ group :test do
gem 'shoulda' gem 'shoulda'
gem 'shoulda-callback-matchers', '~> 1.1.1' gem 'shoulda-callback-matchers', '~> 1.1.1'
end end
gem 'streamio-ffmpeg', '~> 1.0.0'
# sidekiq
gem 'sidekiq'
gem 'sinatra', require: false
gem 'slim'
# CUrl
gem 'curb', '~> 0.8.8'
...@@ -38,6 +38,20 @@ class LearningObject ...@@ -38,6 +38,20 @@ class LearningObject
values values
end end
def get_bitstream_retrievelink_of name
values = @bitstreams.select { |v| v["bundleName"] == name }
unless values.empty?
return Dspace::Config.rest_url + values.first["retrieveLink"]
end
end
def get_bitstream_filename_of name
values = @bitstreams.select { |v| v["bundleName"] == name }
unless values.empty?
return values.first["name"]
end
end
private private
def defaults def defaults
......
...@@ -15,16 +15,6 @@ module OrientDb ...@@ -15,16 +15,6 @@ module OrientDb
create_edge "Likes", user.rid, learning_object.id create_edge "Likes", user.rid, learning_object.id
end end
# Example:
# list = repository.for(:learning_objects).all
# list.each do |learning_object|
# learning_object.inspect <LearningObject model>
# end
def all
learning_objects_hash = connection.query "SELECT FROM LearningObject"
build_objects(learning_objects_hash) || []
end
# Usage: # Usage:
# learning_object = repository.for(:learning_objects).get_by_dspace_id 123 # learning_object = repository.for(:learning_objects).get_by_dspace_id 123
# #
...@@ -49,6 +39,16 @@ module OrientDb ...@@ -49,6 +39,16 @@ module OrientDb
edges.flatten edges.flatten
end end
def save(learning_object)
result = connection.command "INSERT INTO LearningObject CONTENT #{learning_object.to_json}"
end
def update_property(learning_object,property,value)
if accepted_properties.include? property
connection.command "UPDATE LearningObject SET #{property}='#{value}' WHERE @rid = #{learning_object.id}"
end
end
## ##
# To create "index:learningobject_search" on OrientDB, use the following command: # To create "index:learningobject_search" on OrientDB, use the following command:
# CREATE INDEX learningobject_search # CREATE INDEX learningobject_search
...@@ -95,6 +95,20 @@ module OrientDb ...@@ -95,6 +95,20 @@ module OrientDb
private private
def accepted_properties
['thumbnail']
end
def create_edges_from_array(edge_class, id, array, unique=false)
edges = []
array.each do |o|
unless unique && edge_exists?(edge_class, id, o.id)
edges << create_edge(edge_class, id, o.id)
end
end
edges
end
def odb_class def odb_class
"LearningObject" "LearningObject"
end end
......
class MassiveLikesCreatorWorker
include Sidekiq::Worker
include RepositoriesProxy
def perform(item_id, users_ids)
item = learning_object_repository.find item_id
users_ids.each do |user_id|
user = User.find user_id
begin
learning_object_repository.like user, item
rescue
next
end
end
end
end
class MassiveViewsCreatorWorker
include Sidekiq::Worker
include RepositoriesProxy
def perform(item_id, users_ids)
item = learning_object_repository.find item_id
users_ids.each do |user_id|
user = User.find user_id
begin
learning_object_repository.increment_views user, item
rescue
next
end
end
end
end
class ThumbnailGeneratorWorker
include Sidekiq::Worker
include RepositoriesProxy
include Thumbnail::Creation
include Thumbnail::Formats
def perform(learning_object_id)
item = learning_object_repository.find(learning_object_id)
filename = item.get_bitstream_filename_of "ORIGINAL"
size = "530x298"
unless accepted_formats.include? file_format filename
item.thumbnail = default_thumbnail
else
begin
retrieve_link = item.get_bitstream_retrievelink_of "ORIGINAL"
file = download_bitstream(retrieve_link,filename)
rescue
puts "ERROR!!! Some error occurred during file download."
else
item.thumbnail = generate_thumbnail(file,filename,size)
delete_downloaded_bitstream(file)
end
end
learning_object_repository.update_property(item,"thumbnail",item.thumbnail)
end
private
def delete_downloaded_bitstream(file)
File.unlink(file) if File.exist?(file)
end
def download_bitstream(url, filename)
output_dir = "/tmp"
output_file = "#{output_dir}/#{filename}"
c = Curl::Easy.new(url)
c.ssl_verify_peer = false
c.ssl_verify_host = false
File.open(output_file, 'wb') do |f|
# c.on_progress { |dl_total, dl_now, ul_total, ul_now|
# puts "#{dl_total}, #{dl_now}, #{ul_total}, #{ul_now}"; true
# }
c.on_body {|data| f << data; data.size }
c.perform
end
return output_file
end
end
Rails.application.routes.draw do Rails.application.routes.draw do
require 'sidekiq/web'
devise_for :users devise_for :users
namespace :auth do namespace :auth do
...@@ -18,7 +20,10 @@ Rails.application.routes.draw do ...@@ -18,7 +20,10 @@ Rails.application.routes.draw do
end end
resources :users resources :users
resources :highlights
resources :carousels resources :carousels
mount Sidekiq::Web, at: '/sidekiq'
end end
root 'welcome#index' root 'welcome#index'
......
class OrientDb::Client class Dspace::Client
@@client = nil
def self.instance
if !@@client.nil?
return @@client
end
begin
@@client = DspaceClient.new(Dspace::Config.rest_url)
rescue
raise 'Wrong orient db credentials'
end
end
end end
require 'dspace_rest_client'
namespace :dspace do
desc "Dspace tasks"
task import: :environment do
desc "Import DSpace items to LearningObject Repositoy"
include RepositoriesProxy
# Quantity of items fetched on each iteration
limit = 500
# Start point from where items will be fetched
offset = 0
loop do
puts " --> importing items from #{offset} to #{offset+limit}"
begin
# Get items from dspace (from offset to offset+limit)
dspace_items = get_items_from_dspace(limit,offset)
rescue
# Sleeps for a while to wait database's recovery
sleep(10.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more items to import
break if dspace_items.empty?
# Increment offset, to get new items on next iteration
offset = offset + limit
# Iterates through items
# Verifies if item is already on repository
# Initializes new LearningObjects
# and saves them on LearningObjects's repository
dspace_items.each do |item|
result = learning_object_repository.get_by_dspace_id item.id
if result.nil?
learning_object = initialize_learning_object item
learning_object_repository.save learning_object
end
end
end
end
end
private
def dspace_repository
@dspace_repository ||= Dspace::Client.instance.repository.item_repository
end
def get_items_from_dspace(limit, offset)
dspace_repository.get_all_items(
expand: ['metadata','bitstreams'],
limit: limit, offset: offset
)
end
def initialize_learning_object item
metadata = build_array_of(item.metadata)
bitstreams = build_array_of(item.bit_streams)
current_date = Time.new
LearningObject.new(
:name => item.name,
:description => select_value_of(metadata, "dc.description"),
:thumbnail => '/thumbnails/default_thumbnail.jpg',
:created_at => current_date,
:last_modified => current_date,
:id_dspace => item.id,
:type => item.type,
:bitstreams => bitstreams,
:metadata => metadata
)
end
def build_array_of(item_content=[])
return item_content if item_content.nil?
content_array = []
item_content.each do |i|
content_array << i.to_h
end
return content_array
end
def select_value_of(array, key)
descriptions = array.select { |a| a[:key] == key }
unless descriptions.empty?
descriptions.first[:value]
end
end
end
namespace :fakedata do
desc "Fake data generation tasks"
task generate_users: :environment do
desc "Generate fake Users"
include RepositoriesProxy
total_new_users = 50
(1..total_new_users).each do |i|
user = User.new(
:name => "User#{i}",
:email => "user#{i}@inf.ufpr.br",
:password => "1234567890",
:password_confirmation => "1234567890"
)
user.save
end
end
task generate_data: :environment do
desc "Generate fake user Likes/Views data"
include RepositoriesProxy
# Quantity of items fetched on each iteration
limit = 100
# Start point from where items will be fetched
offset = 0
users = User.all
loop do
begin
# Get items from dspace (from offset to offset+limit)
learning_objects = learning_object_repository.all_from_offset_to_limit(limit,offset)
rescue
# Sleeps for a while to wait database's recovery
sleep(30.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more items to import
break if learning_objects.empty?
# Increment offset, to get new items on next iteration
offset = offset + limit
learning_objects.each do |lo|
users_views = []
users_likes = []
users.each do |user|
# Randomize "Views"
if random_check(30)
users_views << user. id
# Randomize "Likes"
if random_check(70)
users_likes << user.id
end
end
end
MassiveViewsCreatorWorker.perform_async(lo.id, users_views)
MassiveLikesCreatorWorker.perform_async(lo.id, users_likes)
end
end
end
end
private
def random_check(threshold)
if rand(1..100) <= threshold
return true
else
return false
end
end
end
namespace :thumbnail do
desc "Generate Thumbnails"
task :generate => :environment do
include RepositoriesProxy
# Quantity of items fetched on each iteration
limit = 500
# Start point from where items will be fetched
offset = 0
loop do
begin
# Get items from dspace (from offset to offset+limit)
items = learning_object_repository.all_from_offset_to_limit(limit,offset)
rescue
# Sleeps for a while to wait database's recovery
sleep(30.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more items to import
break if items.empty?
# Increment offset, to get new items on next iteration
offset = offset + limit
items.each do |item|
ThumbnailGeneratorWorker.perform_async(item.id)
end
end
end
end
end
module Thumbnail
module Creation
include Formats
def generate_thumbnail(input, filename, size)
unless accepted_formats.include? file_format(filename)
return default_thumbnail
else
thumbnail = thumbnail_path(filename, size)
output = "#{root_dir}#{thumbnail}"
begin
if accepted_video_formats.include? File.extname(input)
generate_video_thumbnail(input, output, size)
else
generate_image_thumbnail(input, output, size)
end
rescue
return default_thumbnail
else
return thumbnail
end
end
end
def default_thumbnail
@default_thumbnail ||= nil
end
private
def generate_video_thumbnail(input,output,size)
movie = FFMPEG::Movie.new(input)
frame = (movie.duration * 25/100).floor
movie.screenshot(output,
{ seek_time: frame, resolution: size },
preserve_aspect_ratio: :width)
end
def generate_image_thumbnail(input,output,size)
# Read the image and resize it. The `change_geometry' method
# computes the new image geometry and yields to a block. The
# return value of the block is the return value of the method.
img = Magick::Image.read(input)[0]
img.change_geometry!(size) { |cols, rows| img.thumbnail! cols, rows }
img.write(output)
end
def encode_hash_from(object)
Digest::SHA1.hexdigest object
end
def thumbnail_path(filename, size)
thumbnail_name = encode_hash_from filename
thumbnail_path = "#{thumbnails_dir}/#{thumbnail_name}_#{size}.jpg"
end
def thumbnails_dir
@thumbnails_dir ||= "/thumbnails"
end
def root_dir
@root_dir ||= Rails.root.join('public')
end
end
end
module Thumbnail
module Formats
def file_format file
unless file.nil?
file_format = File.extname(file)
else
file_format = ""
end
file_format
end
def accepted_video_formats
lower = [".mp4", ".wmv", ".3gp", ".asf", ".avi", ".flv", ".mov", ".mpg", ".mpeg", ".rmvb", ".vob", ".webm"]
upper = [".MP4", ".WMV", ".3GP", ".ASF", ".AVI", ".FLV", ".MOV", ".MPG", ".MPEG", ".RMVB", ".VOB", ".WEBM"]
return lower + upper
end
def accepted_image_formats
lower = [".jpg", ".jpeg", ".gif", ".png", ".bmp", ".tif"]
upper = [".JPG", ".JPEG", ".GIF", ".PNG", ".BMP", ".TIF"]
return lower + upper
end
def accepted_other_formats
lower = [".pdf", ".pps"]
upper = [".PDF", ".PPS"]
return lower + upper
end
def accepted_formats
return accepted_video_formats + accepted_image_formats + accepted_other_formats
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment