Commit ee61788e authored by Erik Alexandre Pucci's avatar Erik Alexandre Pucci

database: Finish network usage data warehouse and data marts load

The last things that need to be done is the load of the history data marts.
Signed-off-by: default avatarErik Alexandre Pucci <eap08@c3sl.ufpr.br>
parent 56c900bf
......@@ -94,9 +94,9 @@ comment on table dm_avail_machine is 'Data mart with availability per machine';
comment on column dm_avail_machine.total_contacts is '89 years seems enough';
comment on column dm_avail_machine.days_last_contact is '89 years seems enough';
comment on table dm_avail_state_history is 'Data mart with the history '
'of availability per state and month';
'of availability per state for the last 6 months';
comment on table dm_avail_city_history is 'Data mart with the history of '
'availability per city and month';
'availability per city for the last 6 months';
comment on table dm_invent_machine is 'Data mart with the machines inventory';
comment on table dm_audit_zm_school is 'Data mart with schools without any '
'machine communicating';
......@@ -105,9 +105,9 @@ comment on table dm_alert_city is 'Data mart with alerts per city';
comment on table dm_alert_school is 'Data mart with alerts per school';
comment on table dm_alert_machine is 'Data mart with machines alerts';
comment on table dm_alert_state_history is 'Data mart with the history of '
'alerts per state and month';
'alerts per state for the last 6 months';
comment on table dm_alert_city_history is 'Data mart with the history of '
'alerts per city and month';
'alerts per city for the last 6 months';
comment on table dm_net_usage_monthly is 'Data mart with network usage '
'per school with 4 hours sample';
comment on table dm_net_usage_weekly is 'Data mart with network usage '
......
......@@ -10,7 +10,7 @@
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPosE. See the
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
......@@ -78,7 +78,8 @@ create table dm_avail_machine (
/* -------------------------------------------------------------------------- */
/* History availability tables */
/* Data mart table with the history of availability per state and month */
/* Data mart table with the history of availability per state for the last 6
* months */
create table dm_avail_state_history (
project project_enum default 'proinfo' not null,
contact_date date not null,
......@@ -90,7 +91,8 @@ create table dm_avail_state_history (
red integer not null
);
/* Data mart table with the history of availability per city and month */
/* Data mart table with the history of availability per city for the last 6
* months */
create table dm_avail_city_history (
project project_enum default 'proinfo' not null,
contact_date date not null,
......@@ -191,7 +193,7 @@ create table dm_alert_machine (
/* -------------------------------------------------------------------------- */
/* History alert tables */
/* Data mart table with the history of alerts per state and month */
/* Data mart table with the history of alerts per state for the last 6 months */
create table dm_alert_state_history (
project project_enum default 'proinfo' not null,
contact_date date not null,
......@@ -201,7 +203,7 @@ create table dm_alert_state_history (
hd_amount integer not null
);
/* Data mart table with the history of alerts per city and month */
/* Data mart table with the history of alerts per city for the last 6 months */
create table dm_alert_city_history (
project project_enum default 'proinfo' not null,
contact_date date not null,
......@@ -217,40 +219,33 @@ create table dm_alert_city_history (
/* Data mart table with network usage per school with 4 hours sample */
create table dm_net_usage_monthly (
region text not null,
state text not null,
city text not null,
inep text not null,
school text not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_packages integer not null
state text not null,
city text not null,
inep text not null,
school text not null,
collect_time timestamp not null,
down_kbits bigint not null,
up_kbits bigint not null
);
/* Data mart table with network usage per school with 1 hour sample */
create table dm_net_usage_weekly (
state text not null,
city text not null,
inep text not null,
school text not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_packages integer not null
state text not null,
city text not null,
inep text not null,
school text not null,
collect_time timestamp not null,
down_kbits bigint not null,
up_kbits bigint not null
);
/* Data mart table with network usage per school with 5 minutes sample */
create table dm_net_usage_daily (
state text not null,
city text not null,
inep text not null,
school text not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_packages integer not null
state text not null,
city text not null,
inep text not null,
school text not null,
collect_time timestamp not null,
down_kbits bigint not null,
up_kbits bigint not null
);
......@@ -110,9 +110,9 @@ create table fact_net_usage (
load_date integer references dim_date not null,
machine_id integer references dim_machine not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_kbits bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_kbits bigint not null,
up_packages integer not null
);
......@@ -122,9 +122,9 @@ create table fact_net_usage_school (
load_date integer references dim_date not null,
school_id integer references dim_school not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_kbits bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_kbits bigint not null,
up_packages integer not null
);
......
......@@ -48,8 +48,8 @@ create table sa_net_usage (
inep text not null,
machine macaddr not null,
collect_time timestamp not null,
down_bytes bigint not null,
down_kbits bigint not null,
down_packages integer not null,
up_bytes bigint not null,
up_kbits bigint not null,
up_packages integer not null
);
......@@ -19,6 +19,8 @@
* USA.
*/
/* Create function that updates the data marts using the data from the data
* warehouse tables */
create or replace function load_data_marts() returns void as $$
declare
count_total integer := 0;
......@@ -27,6 +29,14 @@ declare
begin
raise log 'Data marts load start point: %', current_timestamp;
/* Truncate data marts */
truncate dm_avail_state, dm_avail_city, dm_avail_school, dm_avail_machine,
dm_avail_state_history, dm_avail_city_history, dm_invent_machine,
dm_audit_zm_school, dm_alert_state, dm_alert_city, dm_alert_school,
dm_alert_machine, dm_alert_state_history, dm_alert_city_history,
dm_net_usage_monthly, dm_net_usage_weekly, dm_net_usage_daily restart
identity cascade;
/* ---------------------------------------------------------------------- */
/* Update availability data marts */
......@@ -46,7 +56,7 @@ begin
sum(case when contact_date >= date_trunc('month', current_date) then
1 else 0 end) as month_contacts from fact_inventory group by
machine_id) u where i.machine_id = t.machine_id and i.machine_id =
u.machine_id and i.machine_id = m.id and m.school_id = s.id
u.machine_id and i.machine_id = m.id and m.school_id = s.id;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
......@@ -90,6 +100,27 @@ begin
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
/* ---------------------------------------------------------------------- */
/* Update availability history data marts */
raise log 'Inserting state availability history into '
'"dm_avail_city_history"...';
insert into dm_avail_city_history (project, contact_date, region, state,
city, total, green, yellow, red)
select
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
raise log 'Inserting state availability history into '
'"dm_avail_state_history"...';
insert into dm_avail_state_history (project, contact_date, region, state,
total, green, yellow, red)
select
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
/* ---------------------------------------------------------------------- */
/* Update inventory data mart */
......@@ -178,6 +209,68 @@ begin
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
/* ---------------------------------------------------------------------- */
/* Update alert history data marts */
raise log 'Inserting state availability data into "dm_avail_state"...';
insert into dm_alert_state_history (project, contact_date, region, state,
memory_amount, hd_amount)
select
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
raise log 'Inserting state availability data into "dm_avail_state"...';
insert into dm_alert_city_history (project, contact_date, region, state,
city, memory_amount, hd_amount)
select
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
/* ---------------------------------------------------------------------- */
/* Update network usage data marts */
raise log 'Inserting network usage data per day into '
'"dm_net_usage_daily"...';
insert into dm_net_usage_daily (state, city, inep, school, collect_time,
down_kbits, up_kbits)
select state, city, inep, school, collect_time, down_kbits, up_kbits
from fact_net_usage_school, dim_school where school_id = id;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
raise log 'Inserting network usage data per week using 1 hour sample into '
'"dm_net_usage_weekly"...';
insert into dm_net_usage_weekly (state, city, inep, school, collect_time,
down_kbits, up_kbits)
select distinct on (inep, w) state, city, inep, school,
date_trunc('hour', collect_time) + '00:30'::time as w,
sum(down_kbits) over (partition by inep, date_trunc('hour',
collect_time) + '00:30'::time), sum(up_kbits) over (partition by
inep, date_trunc('hour', collect_time) + '00:30'::time) from
dm_net_usage_daily;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
raise log 'Inserting network usage data per month using 4 hours sample '
'into "dm_net_usage_monthly"...';
insert into dm_net_usage_monthly (state, city, inep, school, collect_time,
down_kbits, up_kbits)
select distinct on (inep, w) state, city, inep, school,
collect_time::date + trunc(extract(hour from collect_time)/4) *
'04:00'::time + '02:00'::time as w, sum(down_kbits) over (partition
by inep, collect_time::date + trunc(extract(hour from
collect_time)/4) * '04:00'::time + '02:00'::time), sum(up_kbits)
over (partition by inep, collect_time::date + trunc(extract(hour
from collect_time)/4) * '04:00'::time + '02:00'::time) from
dm_net_usage_weekly;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
/* ---------------------------------------------------------------------- */
/* Update control table */
......
......@@ -21,12 +21,17 @@
/* Create function that updates the data warehouse using the new data in the
* staging area, removing trailing white spaces from both extremes of the text
* types */
* types
*
* NOTE: The load of the network usage fact tables must be tested with large
* amounts of data, up to 288 * 1 million of entries per day. By then, the where
* clause with minimum date to load will probably start to make a positive
* difference to the load duration and thus will need to be added */
create or replace function load_data_warehouse() returns void as $$
declare
count_total integer := 0;
tmp integer;
load_date_id integer;
1 integer;
min_date date;
/* -------------------------------------------------------------------------- */
begin
......@@ -45,10 +50,10 @@ begin
/* Update dimension tables */
raise log 'Inserting "current_date" into "dim_date"...';
select id into load_date_id from dim_date where load_date = current_date;
if load_date_id is null then
select id into 1 from dim_date where load_date = current_date;
if 1 is null then
insert into dim_date (load_date) values (current_date) returning id into
load_date_id;
1;
raise log 'Done - 1 row inserted';
count_total := 1;
else
......@@ -56,7 +61,7 @@ begin
end if;
raise log 'Updating nud_avail field inside "dim_school"...';
update dim_school s set load_date = load_date_id, nud_avail = true
update dim_school s set load_date = 1, nud_avail = true
from tmp_net_usage n where s.inep = n.inep and nud_avail is false;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
......@@ -64,22 +69,22 @@ begin
raise log 'Inserting new component data into "dim_component"...';
insert into dim_component (load_date, category, description, detail)
select distinct load_date_id, 'os'::category_enum, trim(both from
select distinct 1, 'os'::category_enum, trim(both from
os_type), trim(both from os_distro) from tmp_inventory
union all select distinct load_date_id, 'kernel'::category_enum,
union all select distinct 1, 'kernel'::category_enum,
trim(both from kernel), null from tmp_inventory
union all select distinct load_date_id, 'processor'::category_enum,
union all select distinct 1, 'processor'::category_enum,
trim(both from processor), null from tmp_inventory
union all select distinct load_date_id, 'hd'::category_enum, trim(both
union all select distinct 1, 'hd'::category_enum, trim(both
from hd_model), hd_size::text from tmp_inventory
except select load_date_id, category, description, detail from
except select 1, category, description, detail from
dim_component;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
raise log 'Updating project field of old machines inside "dim_machine"...';
update dim_machine m set load_date = load_date_id, project = i.project
update dim_machine m set load_date = 1, project = i.project
from tmp_inventory i, dim_school s where i.machine = m.machine and
i.inep = s.inep and s.id = m.school_id and i.project <> m.project;
get diagnostics tmp = ROW_COUNT;
......@@ -88,9 +93,9 @@ begin
raise log 'Inserting new machine data into "dim_machine"...';
insert into dim_machine (load_date, school_id, machine, project)
select load_date_id, id, machine, project from tmp_inventory i,
select 1, id, machine, project from tmp_inventory i,
dim_school s where i.inep = s.inep
except select load_date_id, school_id, machine, project from
except select 1, school_id, machine, project from
dim_machine;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
......@@ -162,7 +167,7 @@ begin
insert into fact_inventory (load_date, contact_date, machine_id, os_id,
kernel_id, processor_id, memory_size, hd_id, hd_used, hd2_id, hd2_used,
extra_hds, hash)
select distinct on (contact_date, m.id, hash) load_date_id,
select distinct on (contact_date, m.id, hash) 1,
contact_date, m.id, o.id, k.id, p.id, memory_size, h.id, hd_used,
i.id, hd2_used, extra_hds, md5(o.id::text || k.id::text ||
p.id::text || memory_size::text || h.id::text || hd_used::text ||
......@@ -195,23 +200,24 @@ begin
fact_inventory where contact_date >= min_date) s group by machine_id
having count(*) > 1) t where i.machine_id = t.machine_id and i.hd_id
= c.id) u
) select load_date_id, o.id, n.id, case when n.memory_size <
) select 1, o.id, n.id, case when n.memory_size <
o.memory_size * 0.9 then true else false end, case when n.hd_size <
o.hd_size * 0.9 then true else false end from tmp_alert o, tmp_alert
n where o.machine_id = n.machine_id and o.row_n = n.row_n - 1
except select load_date_id, old_inventory, inventory, memory_alert,
except select 1, old_inventory, inventory, memory_alert,
hd_alert from fact_alert;
get diagnostics tmp = ROW_COUNT;
count_total := count_total + tmp;
raise log 'Done - % rows inserted', tmp;
/* ---------------------------------------------------------------------- */
/* Update network usage fact tables */
raise log 'Inserting new network usage data into "fact_net_usage"...';
insert into fact_net_usage (load_date, machine_id, collect_time, down_bytes,
down_packages, up_bytes, up_packages)
select load_date_id, m.id, collect_time, down_bytes, down_packages,
up_bytes, up_packages from tmp_net_usage n, dim_machine m,
insert into fact_net_usage (load_date, machine_id, collect_time, down_kbits,
down_packages, up_kbits, up_packages)
select 1, m.id, collect_time, down_kbits, down_packages,
up_kbits, up_packages from tmp_net_usage n, dim_machine m,
dim_school s where n.machine = m.machine and n.inep = s.inep and
m.school_id = s.id;
get diagnostics tmp = ROW_COUNT;
......@@ -220,10 +226,10 @@ begin
raise log 'Updating existing rows with new network usage data per school '
'in "fact_net_usage_school"...';
update fact_net_usage_school f set load_date = load_date_id, down_bytes =
sdb, down_packages = sdp, up_bytes = sub, up_packages = sup
from (select t.id, t.collect_time, sum(down_bytes) as sdb,
sum(down_packages) as sdp, sum(up_bytes) as sub, sum(up_packages) as
update fact_net_usage_school f set load_date = 1, down_kbits =
sdb, down_packages = sdp, up_kbits = sub, up_packages = sup
from (select t.id, t.collect_time, sum(down_kbits) as sdb,
sum(down_packages) as sdp, sum(up_kbits) as sub, sum(up_packages) as
sup from (select id, collect_time from tmp_net_usage n, dim_school s
where n.inep = s.inep
intersect select school_id, collect_time from fact_net_usage_school)
......@@ -238,9 +244,9 @@ begin
raise log 'Inserting new network usage data per school into '
'"fact_net_usage_school"...';
insert into fact_net_usage_school (load_date, school_id, collect_time,
down_bytes, down_packages, up_bytes, up_packages)
select load_date_id, t.id, t.collect_time, sum(down_bytes),
sum(down_packages), sum(up_bytes), sum(up_packages) from (select id,
down_kbits, down_packages, up_kbits, up_packages)
select 1, t.id, t.collect_time, sum(down_kbits),
sum(down_packages), sum(up_kbits), sum(up_packages) from (select id,
collect_time from tmp_net_usage n, dim_school s where n.inep =
s.inep
except select school_id, collect_time from fact_net_usage_school) t,
......
......@@ -22,15 +22,15 @@
# This script migrates the data from mectb00_net_staging_area to sa_net_usage
if test $# -ne 1; then
printf "Usage: ./load_sa_net_usage.sh <database>\n"
printf "Usage: ./migrate_net_sa.sh <database>\n"
exit 1
fi
database=$1
psql -d ${database} -c "
insert into sa_net_usage (insertion_date, inep, machine, collect_time,
down_bytes, down_packages, up_bytes, up_packages)
select current_date - 1, net_inep, net_mac::macaddr, net_data + net_hora,
net_bytes_in, net_pacotes_in, net_bytes_out, net_pacotes_out from
psql -d ${database} -c "insert into sa_net_usage (insertion_date, inep, machine,
collect_time, down_kbits, down_packages, up_kbits, up_packages)
select current_date - 1, net_inep, net_mac::macaddr, net_data + net_hora -
'00:02:30'::time, net_bytes_in * 8 / 300 / 1024, net_pacotes_in,
net_bytes_out * 8 / 300 / 1024, net_pacotes_out from
mectb00_net_staging_area;"
......@@ -29,10 +29,10 @@ fi
database=$1
psql -d ${database} -c "truncate sa_inventory, sa_net_usage, dim_date,
dim_school, dim_component, dim_machine, fact_inventory, fact_alert,
fact_net_usage, fact_net_usage_school, control, dm_avail_state, dm_avail_city,
dm_avail_school, dm_avail_machine, dm_avail_state_history,
dm_avail_city_history, dm_invent_machine, dm_audit_zm_school, dm_alert_state,
dm_alert_city, dm_alert_school, dm_alert_machine, dm_alert_state_history,
dm_alert_city_history, dm_net_usage_monthly, dm_net_usage_weekly,
dm_net_usage_daily restart identity cascade;"
dim_school, dim_component, dim_machine, fact_inventory, fact_alert,
fact_net_usage, fact_net_usage_school, control, dm_avail_state,
dm_avail_city, dm_avail_school, dm_avail_machine, dm_avail_state_history,
dm_avail_city_history, dm_invent_machine, dm_audit_zm_school,
dm_alert_state, dm_alert_city, dm_alert_school, dm_alert_machine,
dm_alert_state_history, dm_alert_city_history, dm_net_usage_monthly,
dm_net_usage_weekly, dm_net_usage_daily restart identity cascade;"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment