Commit 4aaa3a80 authored by Erik Alexandre Pucci's avatar Erik Alexandre Pucci

database: Replanning the database and its data marts 31

Signed-off-by: default avatarErik Alexandre Pucci <eap08@c3sl.ufpr.br>
parent bce71a5c
......@@ -23,60 +23,93 @@
/* Enumerations */
comment on type category_enum is 'Enum type for the components category';
comment on type project_enum is 'Enum type for the MEC project which the machines belong to';
comment on type os_type_enum is 'Enum type for the OS type, which can be "linux" or "windows"';
comment on type class_enum is 'Enum type for the classification of the machines according to their availability : green if communication occured at most 10 days ago, yellow if it ocurred between 10 and 30, and red if it has not occured for more than 30 days)';
comment on type project_enum is 'Enum type for the MEC project which the '
'machines belong to';
comment on type os_type_enum is 'Enum type for the OS type, which can be '
'"linux" or "windows"';
comment on type class_enum is 'Enum type for the classification of the '
'machines according to their availability : green if communication occured '
'at most 10 days ago, yellow if it ocurred between 10 and 30, and red if '
'it has not occured for more than 30 days)';
/* -------------------------------------------------------------------------- */
/* Staging Areas */
comment on table sa_inventory is 'Transition table with temporary machines inventory data';
comment on table sa_inventory is 'Transition table with temporary machines '
'inventory data';
comment on column sa_inventory.inep is 'Longest INEP has 10 characters';
comment on column sa_inventory.hd_model is 'Longest HD model has 47 characters';
comment on column sa_inventory.processor is 'Longest processor model has 46 chars';
comment on column sa_inventory.os_distro is 'Longest OS distro has 35 characters';
comment on column sa_inventory.kernel is 'Longest kernel version has 24 chars';
comment on table sa_net_usage is 'Transition table with temporary network usage data per machine';
comment on column sa_inventory.processor is 'Longest processor model has 46 '
'characters';
comment on column sa_inventory.os_distro is 'Longest OS distro has 35 '
'characters';
comment on column sa_inventory.kernel is 'Longest kernel version has 24'
'characters';
comment on table sa_net_usage is 'Transition table with temporary network '
'usage data per machine';
/* -------------------------------------------------------------------------- */
/* Data Warehouse */
comment on table dim_date is 'Dimension table with dates representing each load done';
comment on table dim_school is 'Dimension table with data about the schools. Since this table is loaded apart from the rest of the tables, this is the only one who does not have nor need the load_date field';
comment on table dim_date is 'Dimension table with dates representing each '
'load done';
comment on table dim_school is 'Dimension table with data about the schools. '
'Since this table is loaded apart from the rest of the tables, this is the '
'only one who does not have nor need the load_date field';
comment on column dim_school.city is 'Longest city name has 32 characters';
comment on column dim_school.school is 'MEC database limit for school names is 100 characters';
comment on column dim_school.school is 'MEC database limit for school names is '
'100 characters';
comment on column dim_school.address is 'Longest address has 70 characters';
comment on column dim_school.fts_school is 'Full text search field for schools';
comment on table dim_component is 'Dimension table of software and hardware components';
comment on column dim_component.description is 'HD model, OS type, processor model and memory size';
comment on table dim_component is 'Dimension table of software and hardware '
'components';
comment on column dim_component.description is 'HD model, OS type, processor '
'model and memory size';
comment on column dim_component.detail is 'HD size and OS distro';
comment on table dim_machine is 'Dimension table with machines inventory';
comment on table fact_availability is 'Fact table with the history of machines connections';
comment on table fact_availability is 'Fact table with the history of machines '
'connections';
comment on table fact_inventory is 'Fact table with all the machines inventory';
comment on table fact_alert is 'Fact table with the history of machines modifications';
comment on table fact_net_usage is 'Fact table with the history of network usage per school and 5 minutes sample';
comment on table fact_net_usage_avail is 'Fact table of the list of schools with network usage data collected (i.e. at least one machine sent the net usage data)';
comment on table control is 'Control table with the starting and ending times of each load process';
comment on table fact_alert is 'Fact table with the history of machines '
'modifications';
comment on table fact_net_usage is 'Fact table with the history of network '
'usage per school and 5 minutes sample';
comment on table fact_net_usage_avail is 'Fact table of the list of schools '
'with network usage data collected (i.e. at least one machine sent the net '
'usage data)';
comment on table control is 'Control table with the starting and ending times '
'of each load process';
/* -------------------------------------------------------------------------- */
/* Data Marts */
comment on table dm_avail_state is 'Data mart table of availability per state';
comment on table dm_avail_city is 'Data mart table of availability per city';
comment on table dm_avail_school is 'Data mart table of availability per school';
comment on table dm_avail_machine is 'Data mart table with availability per machine';
comment on table dm_avail_school is 'Data mart table of availability per '
'school';
comment on table dm_avail_machine is 'Data mart table with availability per '
'machine';
comment on column dm_avail_machine.total_contacts is '89 years seems enough';
comment on column dm_avail_machine.days_last_contact is '89 years seems enough';
comment on table dm_avail_state_history is 'Data mart table with the history of availability per state and month';
comment on table dm_avail_city_history is 'Data mart table with the history of availability per city and month';
comment on table dm_invent_machine is 'Data mart table with the machines inventory';
comment on table dm_audit_zm_school is 'Data mart table with schools without any machine communicating';
comment on table dm_avail_state_history is 'Data mart table with the history '
'of availability per state and month';
comment on table dm_avail_city_history is 'Data mart table with the history of '
'availability per city and month';
comment on table dm_invent_machine is 'Data mart table with the machines '
'inventory';
comment on table dm_audit_zm_school is 'Data mart table with schools without '
'any machine communicating';
comment on table dm_alert_state is 'Data mart table with alerts per state';
comment on table dm_alert_city is 'Data mart table with alerts per city';
comment on table dm_alert_school is 'Data mart table with alerts per school';
comment on table dm_alert_machine is 'Data mart table with machines alerts';
comment on table dm_alert_state_history is 'Data mart table with the history of alerts per state and month';
comment on table dm_alert_city_history is 'Data mart table with the history of alerts per city and month';
comment on table dm_net_usage_monthly is 'Data mart table with network usage per school with 4 hours sample';
comment on table dm_net_usage_weekly is 'Data mart table with network usage per school with 1 hour sample';
comment on table dm_net_usage_daily is 'Data mart table with network usage per school with 5 minutes sample';
comment on table dm_alert_state_history is 'Data mart table with the history '
'of alerts per state and month';
comment on table dm_alert_city_history is 'Data mart table with the history of '
'alerts per city and month';
comment on table dm_net_usage_monthly is 'Data mart table with network usage '
'per school with 4 hours sample';
comment on table dm_net_usage_weekly is 'Data mart table with network usage '
'per school with 1 hour sample';
comment on table dm_net_usage_daily is 'Data mart table with network usage per '
'school with 5 minutes sample';
......@@ -27,8 +27,11 @@ declare
count integer := 1;
tmp integer;
start_time timestamp;
load_date_id integer;
/* -------------------------------------------------------------------------- */
begin
raise log 'Migration start point'
start_time := current_time;
/* ---------------------------------------------------------------------- */
......@@ -38,11 +41,11 @@ begin
insert into sa_inventory (contact_date, project, inep, machine, os_type,
os_distro, kernel, processor, memory_size, hd_model, hd_size, hd_used,
hd2_model, hd2_size, hd2_used, extra_hds)
select sa_data, sa_projeto, sa_inep, sa_mac, sa_so_nome,
sa_so_distribuicao, sa_so_kernel, sa_processador, sa_memoria,
sa_disco1_modelo, sa_disco1_capacidade, sa_disco1_usado,
sa_disco2_modelo, sa_disco2_capacidade, sa_disco2_usado, sa_discos_ad
from mectb00_staging_area;
select sa_data, sa_projeto, sa_inep, sa_mac, sa_so_nome,
sa_so_distribuicao, sa_so_kernel, sa_processador, sa_memoria,
sa_disco1_modelo, sa_disco1_capacidade, sa_disco1_usado,
sa_disco2_modelo, sa_disco2_capacidade, sa_disco2_usado,
sa_discos_ad from mectb00_staging_area;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
......@@ -51,42 +54,74 @@ begin
/* Migrate dimension tables */
raise log 'Inserting "current_date" into "dim_date"...';
insert into dim_date (id, load_date)
values (default, current_date);
insert into dim_date (load_date) values (current_date) returning id into
load_date_id;
raise log 'Done';
raise log 'Migrating "mectb01_escola_dim" data to "dim_school"...';
insert into dim_school (inep, region, state, city, school, code, address,
fts_school)
select esc_inep, esc_regiao, esc_uf, esc_municipio, esc_nome, esc_cep,
esc_endereco, to_tsvector(esc_nome)
from mectb01_escola_dim;
insert into dim_school (id, inep, region, state, city, school, code,
address, fts_school)
select esc_id, esc_inep, esc_regiao, esc_uf,esc_municipio, esc_nome,
esc_cep, esc_endereco, to_tsvector(esc_nome) from
mectb01_escola_dim;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
raise log 'Migrating "" data to "dim_component"...';
raise log 'Migrating "mectb02_catalogo_dim" data to "dim_component"...';
insert into dim_component (load_date, category, description, detail)
select cat_componente, cat_modelo, cat_descricao, cat_descricao2
from mectb02_catalogo_dim;
select load_date_id, case when cat_componente = 'SO' then 'os' when
cat_componente = 'KERNEL' then 'kernel' when cat_componente = 'PROC'
then 'processor' when cat_componente = 'HD' then 'hd' end,
cat_modelo, cat_descricao from mectb02_catalogo_dim where
cat_componente <> 'MEM';
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
raise log 'Migrating "" data to "dim_machine"...';
raise log 'Migrating "mectb04_maquina_dim" data to "dim_machine"...';
insert into dim_machine (load_date, school_id, machine, project)
select maq_identificador, maq_dat_id, maq_esc_id, maq_mac,
maq_inventario, maq_data_remocao, maq_origem, maq_conexao,
case when maq_projeto = '0'
from mectb04_maquina_dim;
select load_date_id, maq_esc_id, maq_mac, case when maq_projeto = 0 then
'proinfo' when maq_projeto = 1 then 'uca_classmate' when maq_projeto
= 2 then 'uca_server' when maq_projeto = 3 then 'projector' end from
mectb04_maquina_dim;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
/* ---------------------------------------------------------------------- */
/* Migrate fact tables */
raise log 'Migrating "" data to ""...';
raise log 'Migrating "mectb05_disponibilidade_fact" data to '
'"fact_availability"...';
insert into fact_availability (load_date, contact_date, machine_id)
select load_date_id, dat_data, dis_maq_id from
mectb05_disponibilidade_fact, mectb03_data_dim where dis_dat_id =
dat_id;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
raise log 'Migrating "" data to ""...';
-- Check old load functions to obtain the data from old hash
raise log 'Migrating "mectb07_inventario_fact" data to '
'"fact_inventory"...';
insert into fact_inventory (load_date, contact_date, machine_id, so_id,
kernel_id, processor_id, memory, hd_id, hd_used, hd2_id, hd2_used,
extra_hds)
select load_date_id, dat_data, inv_maq_id,
from mectb07_inventario_fact, mectb03_data_dim where inv_dat_id =
dat_id;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
raise log 'Migrating "" data to ""...';
raise log 'Migrating "mectb07_inventario_fact" data to "fact_alert"...';
insert into fact_alert (load_date, old_inventory, inventory, memory_alert,
hd_alert)
select
from ;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
/* ---------------------------------------------------------------------- */
......@@ -94,8 +129,8 @@ begin
raise log 'Migrating "mectb99_control" data to "control"...';
insert into control (name, start_time, end_time, total)
select ctr_bd, ctr_inicio::timestamp, ctr_fim:timestamp, ctr_total
from mectb99_control;
select ctr_bd, ctr_inicio::timestamp, ctr_fim:timestamp, ctr_total from
mectb99_control;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
......@@ -105,97 +140,6 @@ begin
start_time, current_time, count);
raise log 'Done';
raise log 'Migration end point'
end;
$$ language plpgsql;
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
/* Enum type for the components category */
create type category_enum as enum (
'os',
'kernel',
'processor',
'memory',
'hd'
);
/* -------------------------------------------------------------------------- */
/* Dimension tables */
/* Dimension table of software and hardware components */
create table dim_component (
id serial primary key,
load_date integer references dim_date not null,
category category_enum not null,
description text not null, /* HD model, OS type, processor model
* and memory size */
detail text not null /* HD size and OS distro */
);
/* Dimension table with machines inventory */
create table dim_machine (
id serial primary key,
load_date integer references dim_date not null,
school_id integer references dim_school not null,
machine macaddr not null,
project project_enum default 'proinfo' not null,
unique(school_id, machine)
);
/* -------------------------------------------------------------------------- */
/* Fact tables */
/* Fact table with the history of machines connections */
create table fact_availability (
load_date integer references dim_date not null,
contact_date date not null,
machine_id integer references dim_machine not null
);
/* Fact table with all the machines inventory */
create table fact_inventory (
id serial primary key,
load_date integer references dim_date not null,
contact_date date not null,
machine_id integer references dim_machine not null,
os_id integer references dim_component not null,
kernel_id integer references dim_component not null,
processor_id integer references dim_component not null,
memory_id integer references dim_component not null,
hd_id integer references dim_component not null,
hd_used integer not null,
hd2_id integer references dim_component,
hd2_used integer,
extra_hds integer default 0 not null
);
/* Fact table with the history of machines modifications */
create table fact_alert (
load_date integer references dim_date not null,
old_inventory integer references fact_inventory not null,
inventory integer references fact_inventory not null,
memory_alert boolean default false not null,
hd_alert boolean default false not null
);
/* Fact table of the list of schools with network usage data collected (i.e. at
* least one machine sent the net usage data) */
create table fact_net_usage_avail (
load_date integer references dim_date not null,
school_id integer references dim_school not null,
net_data_avail boolean default false not null
);
/* Fact table with the history of network usage per school and 5 minutes
* sample */
create table fact_net_usage (
load_date integer references dim_date not null,
collect_date date not null,
school_id integer references dim_school not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_packages integer not null
);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment