Commit 004c1612 authored by Eduardo L. Buratti's avatar Eduardo L. Buratti

Merge branch 'db-msl09' into charts

parents d8d957c6 2be0c887
......@@ -24,8 +24,8 @@ DB_LINK_LOCATION=/usr/share/postgresql/8.4/contrib/dblink.sql
# checking parameters
if [[ -z $DB_NAME ]]; then
echo "usage: $0 [database_name]"
exit 1
echo "usage: $0 [database_name]"
exit 1
fi
......@@ -34,22 +34,22 @@ psql $DB_NAME -f $DB_LINK_LOCATION
# creating database tables and structure
for file in $(ls -B create/); do
psql $DB_NAME -f create/$file
psql $DB_NAME -f create/$file
done
# importing database functions
for file in $(ls -B load/); do
psql $DB_NAME -f load/$file
psql $DB_NAME -f load/$file
done
# restore database data
for file in $(ls -B data/); do
psql $DB_NAME -f data/$file
psql $DB_NAME -f data/$file
done
# importing pre-defined queries
for file in $(ls -B query/); do
psql $DB_NAME -f query/$file
psql $DB_NAME -f query/$file
done
# installing crontab
......
/* types */
CREATE TYPE project_enum AS enum (
'proinfo',
'uca_classmate',
'uca_server',
'projector'
'proinfo',
'uca_classmate',
'uca_server',
'projector'
);
/* OLTP operational database tables */
CREATE TABLE proinfo_inventory (
contact_date DATE DEFAULT CURRENT_DATE NOT NULL,
project INTEGER DEFAULT 0 NOT NULL,
inep CHARACTER VARYING(18) not null,
macaddr CHARACTER VARYING(18) not null,
os_type TEXT,
os_distro TEXT,
os_kernel TEXT,
processor TEXT,
memory INTEGER,
disk1_model TEXT,
disk1_size INTEGER,
disk1_used INTEGER,
disk2_model TEXT,
disk2_size INTEGER,
disk2_used INTEGER,
extra_hds SMALLINT DEFAULT 0 NOT NULL
contact_date DATE DEFAULT CURRENT_DATE NOT NULL,
project INTEGER DEFAULT 0 NOT NULL,
inep CHARACTER VARYING(18) NOT NULL,
macaddr CHARACTER VARYING(18) NOT NULL,
os_type TEXT,
os_distro TEXT,
os_kernel TEXT,
processor TEXT,
memory INTEGER,
disk1_model TEXT,
disk1_size INTEGER,
disk1_used INTEGER,
disk2_model TEXT,
disk2_size INTEGER,
disk2_used INTEGER,
extra_hds SMALLINT DEFAULT 0 NOT NULL
);
CREATE TABLE proinfo_net_usage (
contact_date date default CURRENT_DATE not null,
inep CHARACTER VARYING(18) not null,
macaddr CHARACTER VARYING(18) not null,
collect_time TIME WITHOUT TIME ZONE,
down_kbits BIGINT,
down_packages INTEGER,
up_kbits BIGINT,
up_packages INTEGER
contact_date DATE DEFAULT CURRENT_DATE NOT NULL,
inep CHARACTER VARYING(18) NOT NULL,
macaddr CHARACTER VARYING(18) NOT NULL,
collect_time TIME WITHOUT TIME ZONE,
down_kbits BIGINT,
down_packages INTEGER,
up_kbits BIGINT,
up_packages INTEGER
);
CREATE TABLE rejected_inventory (
id INTEGER,
sch_id INTEGER,
cit_id INTEGER,
contact_date DATE NOT NULL,
project INTEGER,
inep CHARACTER VARYING(18) NOT NULL,
macaddr TEXT NOT NULL,
os_type TEXT,
os_distro TEXT,
os_kernel TEXT,
processor TEXT,
memory INTEGER,
disk1_model TEXT,
disk1_size INTEGER,
disk1_used INTEGER,
disk2_model TEXT,
disk2_size INTEGER,
disk2_used INTEGER,
extra_hds SMALLINT
id INTEGER,
sch_id INTEGER,
cit_id INTEGER,
contact_date DATE NOT NULL,
project INTEGER,
inep CHARACTER VARYING(18) NOT NULL,
macaddr TEXT NOT NULL,
os_type TEXT,
os_distro TEXT,
os_kernel TEXT,
processor TEXT,
memory INTEGER,
disk1_model TEXT,
disk1_size INTEGER,
disk1_used INTEGER,
disk2_model TEXT,
disk2_size INTEGER,
disk2_used INTEGER,
extra_hds SMALLINT
);
CREATE TABLE rejected_net_usage (
sch_id INTEGER,
cit_id INTEGER,
contact_date date default CURRENT_DATE not null,
inep CHARACTER VARYING(18) not null,
macaddr CHARACTER VARYING(18) not null,
collect_time TIME WITHOUT TIME ZONE,
down_kbits BIGINT,
down_packages INTEGER,
up_kbits BIGINT,
up_packages INTEGER
sch_id INTEGER,
cit_id INTEGER,
contact_date DATE DEFAULT CURRENT_DATE NOT NULL,
inep CHARACTER VARYING(18) NOT NULL,
macaddr CHARACTER VARYING(18) NOT NULL,
collect_time TIME WITHOUT TIME ZONE,
down_kbits BIGINT,
down_packages INTEGER,
up_kbits BIGINT,
up_packages INTEGER
);
-- dimension city
CREATE TABLE dim_city (
id SERIAL PRIMARY KEY,
name CHARACTER VARYING(100) NOT NULL,
state CHARACTER VARYING(2) NOT NULL,
region CHARACTER VARYING(12) NOT NULL,
CHECK (name <> ''),
CHECK (state <> ''),
CHECK (region <> '')
id SERIAL PRIMARY KEY,
name CHARACTER VARYING(100) NOT NULL,
state CHARACTER VARYING(2) NOT NULL,
region CHARACTER VARYING(12) NOT NULL,
CHECK (name <> ''),
CHECK (state <> ''),
CHECK (region <> '')
);
-- dimension school
CREATE TABLE dim_school (
id SERIAL PRIMARY KEY,
cit_id INTEGER NOT NULL REFERENCES dim_city,
inep CHARACTER VARYING(12) NOT NULL,
name CHARACTER VARYING(150) NOT NULL,
address CHARACTER VARYING(100),
cep CHARACTER VARYING(10)
CHECK (inep <> ''),
CHECK (name <> '')
id SERIAL PRIMARY KEY,
cit_id INTEGER NOT NULL REFERENCES dim_city,
inep CHARACTER VARYING(12) NOT NULL,
name CHARACTER VARYING(150) NOT NULL,
address CHARACTER VARYING(100),
cep CHARACTER VARYING(10)
CHECK (inep <> ''),
CHECK (name <> '')
);
-- dimension date
CREATE TABLE dim_date (
id DATE PRIMARY KEY,
day SMALLINT NOT NULL,
month SMALLINT NOT NULL,
year SMALLINT NOT NULL,
CHECK (TO_DATE(year || '-' || month || '-' || day, 'YYYY-MM-DD') = id)
id DATE PRIMARY KEY,
day SMALLINT NOT NULL,
month SMALLINT NOT NULL,
year SMALLINT NOT NULL,
CHECK (TO_DATE(year || '-' || month || '-' || day, 'YYYY-MM-DD') = id)
);
-- dimension inventory
CREATE TABLE dim_inventory (
id SERIAL PRIMARY KEY,
sch_id INTEGER NOT NULL REFERENCES dim_school,
macaddr MACADDR NOT NULL,
load_date DATE NOT NULL DEFAULT CURRENT_DATE REFERENCES dim_date,
project project_enum DEFAULT 'proinfo' NOT NULL,
id SERIAL PRIMARY KEY,
sch_id INTEGER NOT NULL REFERENCES dim_school,
macaddr MACADDR NOT NULL,
load_date DATE NOT NULL DEFAULT CURRENT_DATE REFERENCES dim_date,
project project_enum DEFAULT 'proinfo' NOT NULL,
-- inventory info
disk1_model TEXT NOT NULL,
disk1_size INTEGER NOT NULL,
disk1_used INTEGER NOT NULL,
disk2_model TEXT,
disk2_size INTEGER,
disk2_used INTEGER,
-- inventory info
disk1_model TEXT NOT NULL,
disk1_size INTEGER NOT NULL,
disk1_used INTEGER NOT NULL,
disk2_model TEXT,
disk2_size INTEGER,
disk2_used INTEGER,
memory INTEGER NOT NULL,
processor TEXT NOT NULL,
os_type TEXT NOT NULL,
os_distro TEXT NOT NULL,
os_kernel TEXT NOT NULL,
memory INTEGER NOT NULL,
processor TEXT NOT NULL,
os_type TEXT NOT NULL,
os_distro TEXT NOT NULL,
os_kernel TEXT NOT NULL,
is_first BIT NOT NULL DEFAULT '0',
is_current BIT NOT NULL DEFAULT '1',
is_first BIT NOT NULL DEFAULT '0',
is_current BIT NOT NULL DEFAULT '1',
CHECK (macaddr <> '00:00:00:00:00:00')
CHECK (macaddr <> '00:00:00:00:00:00')
);
-- fact tables
CREATE TABLE fact_contact (
id SERIAL PRIMARY KEY,
sch_id INTEGER NOT NULL REFERENCES dim_school,
cit_id INTEGER NOT NULL REFERENCES dim_city,
dat_id DATE NOT NULL REFERENCES dim_date,
macaddr MACADDR NOT NULL,
project project_enum DEFAULT 'proinfo' NOT NULL,
UNIQUE (sch_id, cit_id, dat_id, macaddr)
id SERIAL PRIMARY KEY,
sch_id INTEGER NOT NULL REFERENCES dim_school,
cit_id INTEGER NOT NULL REFERENCES dim_city,
dat_id DATE NOT NULL REFERENCES dim_date,
macaddr MACADDR NOT NULL,
project project_enum DEFAULT 'proinfo' NOT NULL,
UNIQUE (sch_id, cit_id, dat_id, macaddr)
);
CREATE TABLE fact_net_usage (
id SERIAL PRIMARY KEY,
sch_id INTEGER NOT NULL REFERENCES dim_school,
cit_id INTEGER NOT NULL REFERENCES dim_city,
dat_id DATE NOT NULL REFERENCES dim_date,
macaddr MACADDR NOT NULL,
id SERIAL PRIMARY KEY,
sch_id INTEGER NOT NULL REFERENCES dim_school,
cit_id INTEGER NOT NULL REFERENCES dim_city,
dat_id DATE NOT NULL REFERENCES dim_date,
macaddr MACADDR NOT NULL,
collect_time TIME WITHOUT TIME ZONE NOT NULL,
down_kbits BIGINT NOT NULL DEFAULT 0,
down_packages INTEGER NOT NULL DEFAULT 0,
up_kbits BIGINT NOT NULL DEFAULT 0,
up_packages INTEGER NOT NULL DEFAULT 0
collect_time TIME WITHOUT TIME ZONE NOT NULL,
down_kbits BIGINT NOT NULL DEFAULT 0,
down_packages INTEGER NOT NULL DEFAULT 0,
up_kbits BIGINT NOT NULL DEFAULT 0,
up_packages INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE aggr_availability (
base_date DATE NOT NULL,
macaddr MACADDR NOT NULL,
project project_enum DEFAULT 'proinfo' NOT NULL,
sch_inep CHARACTER VARYING(12) NOT NULL,
sch_name CHARACTER VARYING(150) NOT NULL,
city CHARACTER VARYING(100) NOT NULL,
state CHARACTER VARYING(2) NOT NULL,
region CHARACTER VARYING(12) NOT NULL,
is_green SMALLINT NOT NULL DEFAULT 0,
is_yellow SMALLINT NOT NULL DEFAULT 0,
is_red SMALLINT NOT NULL DEFAULT 0
sch_id INTEGER NOT NULL REFERENCES dim_school,
cit_id INTEGER NOT NULL REFERENCES dim_city,
dat_id DATE NOT NULL REFERENCES dim_date,
mac_count BIGINT NOT NULL DEFAULT 0,
mac_count_red BIGINT NOT NULL DEFAULT 0,
mac_count_yellow BIGINT NOT NULL DEFAULT 0,
mac_count_green BIGINT NOT NULL DEFAULT 0,
project project_enum DEFAULT 'proinfo' NOT NULL,
UNIQUE (sch_id, cit_id, dat_id, project)
);
-- Control table with the starting and ending times of each load process
CREATE TABLE control (
id serial primary key,
name text not null,
start_time timestamp not null,
end_time timestamp not null,
total integer not null
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
start_time TIMESTAMP NOT NULL,
end_time TIMESTAMP NOT NULL,
total INTEGER NOT NULL
);
This diff is collapsed.
-- function to populate dim_date table. The function
-- function to populate dim_date table. The function
-- should be called in a cron script to ensure that the
-- desired date is always loaded into the database
CREATE OR REPLACE FUNCTION populate_dim_date(year_limit integer) returns void as $$
DECLARE
date_iter DATE;
date_iter DATE;
BEGIN
-- starting date
SELECT max(dim_date.id) + 1 INTO date_iter FROM dim_date;
-- starting date
SELECT max(dim_date.id) + 1 INTO date_iter FROM dim_date;
-- if the table was empty
IF date_iter IS null THEN
date_iter := TO_DATE(extract(year from CURRENT_DATE) || '-01-01', 'YYYY-MM-DD');
END IF;
-- if the table was empty
IF date_iter IS null THEN
date_iter := TO_DATE(extract(year from CURRENT_DATE) || '-01-01', 'YYYY-MM-DD');
END IF;
-- iterating each day
LOOP
EXIT WHEN extract(year from date_iter) > year_limit;
-- iterating each day
LOOP
EXIT WHEN extract(year from date_iter) > year_limit;
INSERT INTO dim_date values(date_iter,
extract(day from date_iter),
extract(month from date_iter),
extract(year from date_iter)
);
date_iter = date_iter + 1;
END LOOP;
INSERT INTO dim_date values(date_iter,
extract(day from date_iter),
extract(month from date_iter),
extract(year from date_iter)
);
date_iter = date_iter + 1;
END LOOP;
END;
$$ language plpgsql;
This diff is collapsed.
......@@ -2,30 +2,30 @@
-- area tables. This function should NOT be called directly.
CREATE OR REPLACE FUNCTION load_fact_contact() returns void as $$
DECLARE
result RECORD;
start_ts TIMESTAMP;
total_rows INTEGER;
result RECORD;
start_ts TIMESTAMP;
total_rows INTEGER;
BEGIN
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
-- getting the date limits
SELECT max(contact_date) as max_date, min(contact_date) as min_date
INTO result from sa_inventory;
-- getting the date limits
SELECT max(contact_date) as max_date, min(contact_date) as min_date
INTO result from sa_inventory;
--
INSERT INTO fact_contact (sch_id, cit_id, dat_id, macaddr, project)
(SELECT sch_id, cit_id, contact_date, macaddr::macaddr, to_project_enum(project) FROM sa_inventory WHERE valid = '1')
EXCEPT
(SELECT sch_id, cit_id, dat_id, macaddr, project FROM fact_contact WHERE dat_id >= result.min_date AND dat_id <= result.max_date);
--
INSERT INTO fact_contact (sch_id, cit_id, dat_id, macaddr, project)
(SELECT sch_id, cit_id, contact_date, macaddr::macaddr, to_project_enum(project) FROM sa_inventory WHERE valid = '1')
EXCEPT
(SELECT sch_id, cit_id, dat_id, macaddr, project FROM fact_contact WHERE dat_id >= result.min_date AND dat_id <= result.max_date);
-- how many rows were inserted?
GET DIAGNOSTICS total_rows = ROW_COUNT;
-- how many rows were inserted?
GET DIAGNOSTICS total_rows = ROW_COUNT;
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading fact contact', start_ts, CLOCK_TIMESTAMP(), total_rows);
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading fact contact', start_ts, CLOCK_TIMESTAMP(), total_rows);
END;
$$ language plpgsql;
......@@ -2,35 +2,35 @@
-- area tables. This function should NOT be called directly.
CREATE OR REPLACE FUNCTION load_fact_net_usage() returns void as $$
DECLARE
result RECORD;
start_ts TIMESTAMP;
total_rows INTEGER;
result RECORD;
start_ts TIMESTAMP;
total_rows INTEGER;
BEGIN
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
-- getting the date limits
SELECT max(contact_date) as max_date, min(contact_date) as min_date
INTO result from sa_net_usage;
-- getting the date limits
SELECT max(contact_date) as max_date, min(contact_date) as min_date
INTO result from sa_net_usage;
--
INSERT INTO fact_net_usage (sch_id, cit_id, dat_id, macaddr,
collect_time, down_kbits, down_packages, up_kbits, up_packages)
(SELECT sch_id, cit_id, contact_date, macaddr, collect_time, down_kbits,
down_packages, up_kbits, up_packages FROM sa_net_usage
WHERE valid = '1')
EXCEPT
(SELECT sch_id, cit_id, dat_id, macaddr, collect_time, down_kbits,
down_packages, up_kbits, up_packages FROM fact_net_usage WHERE
dat_id >= result.min_date AND dat_id <= result.max_date);
--
INSERT INTO fact_net_usage (sch_id, cit_id, dat_id, macaddr,
collect_time, down_kbits, down_packages, up_kbits, up_packages)
(SELECT sch_id, cit_id, contact_date, macaddr, collect_time, down_kbits,
down_packages, up_kbits, up_packages FROM sa_net_usage
WHERE valid = '1')
EXCEPT
(SELECT sch_id, cit_id, dat_id, macaddr, collect_time, down_kbits,
down_packages, up_kbits, up_packages FROM fact_net_usage WHERE
dat_id >= result.min_date AND dat_id <= result.max_date);
-- how many rows were inserted?
GET DIAGNOSTICS total_rows = ROW_COUNT;
-- how many rows were inserted?
GET DIAGNOSTICS total_rows = ROW_COUNT;
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading net usage contact', start_ts, CLOCK_TIMESTAMP(), total_rows);
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading net usage contact', start_ts, CLOCK_TIMESTAMP(), total_rows);
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION load_aggr_availability() returns void as $$
DECLARE
date_it date;
i INTEGER;
start_ts TIMESTAMP;
total_rows INTEGER;
cur_rows INTEGER;
date_it date;
start_ts TIMESTAMP;
total_rows INTEGER;
cur_rows INTEGER;
BEGIN
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
total_rows := 0;
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
total_rows := 0;
-- remove the index - later we create it again
DROP INDEX IF EXISTS aggr_availability_base_date_idx;
-- remove old data
TRUNCATE TABLE aggr_availability;
-- remove old data
TRUNCATE TABLE aggr_availability;
-- iterate over past months
FOR date_it IN SELECT (date_trunc('month', generate_series(
current_date - interval '4 months',
current_date,
interval '1 month')) - interval '1 day')::date UNION select current_date LOOP
-- iterate over past months
FOR i IN 0..5 LOOP
date_it := CURRENT_DATE - (i * interval '1 month');
INSERT INTO aggr_availability
SELECT
date_it,
f.macaddr,
f.project,
s.inep,
s.name,
c.name,
c.state,
c.region,
CASE WHEN date_it - max_dat_id <= 10 THEN 1 ELSE 0 END as is_green,
CASE WHEN date_it - max_dat_id > 10 AND date_it - max_dat_id <= 30 THEN 1 ELSE 0 END as is_yellow,
CASE WHEN date_it - max_dat_id > 30 THEN 1 ELSE 0 END as is_red
FROM
(SELECT
max(dat_id) as max_dat_id,
f.sch_id,
f.cit_id,
f.macaddr,
f.project
FROM
fact_contact f
WHERE
dat_id <= date_it
GROUP BY
f.cit_id, f.sch_id, f.macaddr, f.project) as f
INNER JOIN
dim_school s
ON
f.sch_id = s.id
INNER JOIN
dim_city c
ON
f.cit_id = c.id;
INSERT INTO aggr_availability
SELECT
date_it,
f.macaddr,
f.project,
s.inep,
s.name,
c.name,
c.state,
c.region,
CASE WHEN date_it - max_dat_id <= 10 THEN 1 ELSE 0 END as is_green,
CASE WHEN date_it - max_dat_id > 10 AND date_it - max_dat_id <= 30 THEN 1 ELSE 0 END as is_yellow,
CASE WHEN date_it - max_dat_id > 30 THEN 1 ELSE 0 END as is_red
FROM
(SELECT
max(dat_id) as max_dat_id,
f.sch_id,
f.cit_id,
f.macaddr,
f.project
FROM
fact_contact f
WHERE
dat_id <= date_it
GROUP BY
f.cit_id, f.sch_id, f.macaddr, f.project) as f
INNER JOIN
dim_school s
ON
f.sch_id = s.id
INNER JOIN
dim_city c
ON
f.cit_id = c.id;
-- how many rows were inserted?
GET DIAGNOSTICS cur_rows := ROW_COUNT;
total_rows := total_rows + cur_rows;
-- how many rows were inserted?
GET DIAGNOSTICS cur_rows := ROW_COUNT;
total_rows := total_rows + cur_rows;
END LOOP;
END LOOP;
-- creating index to speed-up per-date queries, and it speeds up a lot..
CREATE INDEX aggr_availability_base_date_idx ON aggr_availability(base_date);
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading aggr_availability', start_ts, CLOCK_TIMESTAMP(), total_rows);
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading aggr_availability', start_ts, CLOCK_TIMESTAMP(), total_rows);
END;
$$ language plpgsql;
-- copy inventory data directly from the production database
-- copy inventory data directly from the production database
CREATE OR REPLACE FUNCTION copy_inventory_from_seed() returns void as $$
BEGIN
-- inserting rows from production staging area into sa_inventory
INSERT INTO proinfo_inventory
(contact_date, project, inep, macaddr, os_type, os_distro,
os_kernel, processor, memory, disk1_model, disk1_size,
disk1_used, disk2_model, disk2_size, disk2_used)
(SELECT
*
FROM
dblink('dbname=db
host=host
user=user
password=passwd',
'SELECT
sa_data,
sa_projeto,
sa_inep,
sa_mac,
sa_so_nome,
sa_so_distribuicao,
sa_so_kernel,
sa_processador,
sa_memoria,
sa_disco1_modelo,
sa_disco1_capacidade,
sa_disco1_usado,
sa_disco2_modelo,
sa_disco2_capacidade,
sa_disco2_usado
FROM mectb00_staging_area'
)
AS (
contact_date date,
project integer,
inep varchar(18),
macaddr varchar(18),
os_type text,
os_distro text,
os_kernel text,
processor text,
memory integer,
disk1_model text,
disk1_size integer,
disk1_used integer,
disk2_model text,
disk2_size integer,
disk2_used integer
));