Commit bce71a5c authored by Erik Alexandre Pucci's avatar Erik Alexandre Pucci

database: Replanning the database and its data marts 30

Signed-off-by: default avatarErik Alexandre Pucci <eap08@c3sl.ufpr.br>
parent e0a827b6
......@@ -24,7 +24,7 @@
comment on type category_enum is 'Enum type for the components category';
comment on type project_enum is 'Enum type for the MEC project which the machines belong to';
comment on type os_type_enum is 'Enum type for the OS type, which can be "Linux" or "Windows"';
comment on type os_type_enum is 'Enum type for the OS type, which can be "linux" or "windows"';
comment on type class_enum is 'Enum type for the classification of the machines according to their availability : green if communication occured at most 10 days ago, yellow if it ocurred between 10 and 30, and red if it has not occured for more than 30 days)';
/* -------------------------------------------------------------------------- */
......
......@@ -24,7 +24,6 @@ create type category_enum as enum (
'os',
'kernel',
'processor',
'memory',
'hd'
);
......@@ -57,8 +56,8 @@ create table dim_component (
id serial primary key,
load_date integer references dim_date not null,
category category_enum not null,
description text not null, /* HD model, OS type, processor model
* and memory size */
description text not null, /* HD model, OS type and processor
* model */
detail text not null /* HD size and OS distro */
);
......@@ -91,12 +90,13 @@ create table fact_inventory (
os_id integer references dim_component not null,
kernel_id integer references dim_component not null,
processor_id integer references dim_component not null,
memory_id integer references dim_component not null,
memory integer not null,
hd_id integer references dim_component not null,
hd_used integer not null,
hd2_id integer references dim_component,
hd2_used integer,
extra_hds integer default 0 not null
extra_hds integer default 0 not null,
hash text
);
/* Fact table with the history of machines modifications */
......
......@@ -19,7 +19,8 @@
* USA.
*/
create index dim_s_index on dim_school (region, state, city);
create index dim_s_index on dim_school using gin (fts_school);
--create index dim_s_index on dim_school (region, state, city);
--create index on fact_net_usage (school_id, date_id);
create index dm_ash_index on dm_avail_state_history (contact_date desc);
......
......@@ -27,10 +27,10 @@ create type project_enum as enum (
'projector'
);
/* Enum type for the OS type, which can be "Linux" or "Windows" */
/* Enum type for the OS type, which can be "linux" or "windows" */
create type os_type_enum as enum (
'Linux',
'Windows'
'linux',
'windows'
);
/* -------------------------------------------------------------------------- */
......
......@@ -27,7 +27,11 @@ elif test "$1" = "$2"; then
exit 2
fi
PREFIX="$(dirname $(readlink -f $0))"
old_database=$1
new_database=$2
pg_dump -Fc -b -t mectb* ${old_database} | pg_restore -d ${new_database}
psql -d ${database} -f ${PREFIX}/migrate_data.sql
psql -d ${database} -c 'select migrate_data()'
......@@ -19,21 +19,92 @@
* USA.
*/
/* NOTE: Network usage data is going to be discarted, as agreed previously by
* the team */
create or replace function migrate_data() returns void as $$
declare
count integer := 1;
--tmp integer;
tmp integer;
start_time timestamp;
/* -------------------------------------------------------------------------- */
begin
insert into dim_date (id, load_date) values (default, current_date);
insert into control (id, name, start_time, end_time, total) select (default,
ctr_bd, ctr_inicio::timestamp, ctr_fim:timestamp, ctr_total);
get diagnostics count = count + ROW_COUNT;
--get diagnostics tmp = ROW_COUNT;
--count := count + tmp;
start_time := current_time;
/* ---------------------------------------------------------------------- */
/* Migrate staging area */
raise log 'Migrating "mectb00_staging_area" data to "sa_inventory"...';
insert into sa_inventory (contact_date, project, inep, machine, os_type,
os_distro, kernel, processor, memory_size, hd_model, hd_size, hd_used,
hd2_model, hd2_size, hd2_used, extra_hds)
select sa_data, sa_projeto, sa_inep, sa_mac, sa_so_nome,
sa_so_distribuicao, sa_so_kernel, sa_processador, sa_memoria,
sa_disco1_modelo, sa_disco1_capacidade, sa_disco1_usado,
sa_disco2_modelo, sa_disco2_capacidade, sa_disco2_usado, sa_discos_ad
from mectb00_staging_area;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
/* ---------------------------------------------------------------------- */
/* Migrate dimension tables */
raise log 'Inserting "current_date" into "dim_date"...';
insert into dim_date (id, load_date)
values (default, current_date);
raise log 'Done';
raise log 'Migrating "mectb01_escola_dim" data to "dim_school"...';
insert into dim_school (inep, region, state, city, school, code, address,
fts_school)
select esc_inep, esc_regiao, esc_uf, esc_municipio, esc_nome, esc_cep,
esc_endereco, to_tsvector(esc_nome)
from mectb01_escola_dim;
raise log 'Done';
raise log 'Migrating "" data to "dim_component"...';
insert into dim_component (load_date, category, description, detail)
select cat_componente, cat_modelo, cat_descricao, cat_descricao2
from mectb02_catalogo_dim;
raise log 'Done';
raise log 'Migrating "" data to "dim_machine"...';
insert into dim_machine (load_date, school_id, machine, project)
select maq_identificador, maq_dat_id, maq_esc_id, maq_mac,
maq_inventario, maq_data_remocao, maq_origem, maq_conexao,
case when maq_projeto = '0'
from mectb04_maquina_dim;
raise log 'Done';
/* ---------------------------------------------------------------------- */
/* Migrate fact tables */
raise log 'Migrating "" data to ""...';
raise log 'Done';
raise log 'Migrating "" data to ""...';
raise log 'Done';
raise log 'Migrating "" data to ""...';
raise log 'Done';
/* ---------------------------------------------------------------------- */
/* Migrate and update control table */
raise log 'Migrating "mectb99_control" data to "control"...';
insert into control (name, start_time, end_time, total)
select ctr_bd, ctr_inicio::timestamp, ctr_fim:timestamp, ctr_total
from mectb99_control;
get diagnostics tmp = ROW_COUNT;
count := count + tmp;
raise log 'Done';
raise log 'Updating "control" table with current migration process...';
insert into control (name, start_time, end_time, total) values ('migration',
start_time, current_time, count);
raise log 'Done';
return;
end;
$$ language plpgsql;
......@@ -52,27 +123,6 @@ create type category_enum as enum (
/* -------------------------------------------------------------------------- */
/* Dimension tables */
/* Dimension table with dates representing each load done */
create table dim_date (
id serial primary key,
load_date date unique not null
);
/* Dimension table with data about the schools. Since this table is loaded apart
* and "manually" from the rest of the tables, this is the only one who does not
* have nor need the load_date field */
create table dim_school (
id integer primary key,
inep text unique not null,
region text not null,
state text not null,
city text not null, /* Longest city name has 32 characters */
school text not null, /* MEC database limit for schools is 100 */
code text,
address text, /* Longest address has 70 characters */
fts_school tsvector not null /* Full text search field for schools */
);
/* Dimension table of software and hardware components */
create table dim_component (
id serial primary key,
......@@ -149,15 +199,3 @@ create table fact_net_usage (
up_bytes bigint not null,
up_packages integer not null
);
/* -------------------------------------------------------------------------- */
/* Control table */
/* Control table with the starting and ending times of each load process */
create table control (
id serial primary key,
name text not null,
start_time timestamp not null,
end_time timestamp not null,
total integer not null
);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment