...
 
Commits (2)
#!/bin/bash #!/bin/bash
# Copyright (C) 2007-2013 Centro de Computacao Cientifica e Software Livre
# Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
#
# This file is part of database
#
# database is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
DB_NAME=$1 export DB_NAME=$1
DB_LINK_LOCATION=/usr/share/postgresql/8.4/contrib/dblink.sql DB_LINK_LOCATION=/usr/share/postgresql/8.4/contrib/dblink.sql
# checking parameters # checking parameters
...@@ -9,6 +28,10 @@ if [[ -z $DB_NAME ]]; then ...@@ -9,6 +28,10 @@ if [[ -z $DB_NAME ]]; then
exit 1 exit 1
fi fi
# importing db_link functions
psql $DB_NAME -f $DB_LINK_LOCATION
# creating database tables and structure # creating database tables and structure
for file in $(ls -B create/); do for file in $(ls -B create/); do
psql $DB_NAME -f create/$file psql $DB_NAME -f create/$file
...@@ -29,5 +52,5 @@ for file in $(ls -B query/); do ...@@ -29,5 +52,5 @@ for file in $(ls -B query/); do
psql $DB_NAME -f query/$file psql $DB_NAME -f query/$file
done done
# importing db_link functions # installing crontab
psql $DB_NAMR -f $DB_LINK_LOCATION ./cron.sh
...@@ -37,21 +37,22 @@ CREATE TABLE dim_inventory ( ...@@ -37,21 +37,22 @@ CREATE TABLE dim_inventory (
macaddr MACADDR NOT NULL, macaddr MACADDR NOT NULL,
load_date DATE NOT NULL DEFAULT CURRENT_DATE REFERENCES dim_date, load_date DATE NOT NULL DEFAULT CURRENT_DATE REFERENCES dim_date,
project project_enum DEFAULT 'proinfo' NOT NULL, project project_enum DEFAULT 'proinfo' NOT NULL,
-- inventory info -- inventory info
disk1_model TEXT, disk1_model TEXT NOT NULL,
disk1_size INTEGER, disk1_size INTEGER NOT NULL,
disk1_used INTEGER, disk1_used INTEGER NOT NULL,
disk2_model TEXT, disk2_model TEXT,
disk2_size INTEGER, disk2_size INTEGER,
disk2_used INTEGER, disk2_used INTEGER,
memory INTEGER, memory INTEGER NOT NULL,
processor TEXT, processor TEXT NOT NULL,
os_type TEXT, os_type TEXT NOT NULL,
os_distro TEXT, os_distro TEXT NOT NULL,
os_kernel TEXT, os_kernel TEXT NOT NULL,
is_first BIT NOT NULL DEFAULT '0',
is_current BIT NOT NULL DEFAULT '1', is_current BIT NOT NULL DEFAULT '1',
CHECK (macaddr <> '00:00:00:00:00:00') CHECK (macaddr <> '00:00:00:00:00:00')
......
...@@ -5,6 +5,7 @@ CREATE TABLE fact_contact ( ...@@ -5,6 +5,7 @@ CREATE TABLE fact_contact (
cit_id INTEGER NOT NULL REFERENCES dim_city, cit_id INTEGER NOT NULL REFERENCES dim_city,
dat_id DATE NOT NULL REFERENCES dim_date, dat_id DATE NOT NULL REFERENCES dim_date,
macaddr MACADDR NOT NULL, macaddr MACADDR NOT NULL,
project project_enum DEFAULT 'proinfo' NOT NULL,
UNIQUE (sch_id, cit_id, dat_id, macaddr) UNIQUE (sch_id, cit_id, dat_id, macaddr)
); );
......
CREATE TABLE aggr_availability (
base_date DATE NOT NULL,
macaddr MACADDR NOT NULL,
project project_enum DEFAULT 'proinfo' NOT NULL,
sch_inep CHARACTER VARYING(12) NOT NULL,
sch_name CHARACTER VARYING(150) NOT NULL,
city CHARACTER VARYING(100) NOT NULL,
state CHARACTER VARYING(2) NOT NULL,
region CHARACTER VARYING(12) NOT NULL,
is_green SMALLINT NOT NULL DEFAULT 0,
is_yellow SMALLINT NOT NULL DEFAULT 0,
is_red SMALLINT NOT NULL DEFAULT 0
);
#!/bin/bash #!/bin/bash
# Copyright (C) 2004-2010 Centro de Computacao Cientifica e Software Livre # Copyright (C) 2007-2013 Centro de Computacao Cientifica e Software Livre
# Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR # Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
# #
# This file is part of database # This file is part of database
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA. # USA.
LOADFILE='/var/lib/postgresql/database/load/load.sh' COMMAND="psql -d $DB_NAME -c 'SELECT load_dw();'"
LOGFILE="/var/lib/postgresql/database/log/$(date +%d_%m_%y).log"
echo "59 23 * * * $LOADFILE > $LOGFILE" | crontab - echo "59 23 * * * $COMMAND" | crontab -
...@@ -49,7 +49,7 @@ BEGIN ...@@ -49,7 +49,7 @@ BEGIN
i.disk2_size, i.disk2_size,
i.disk2_used, i.disk2_used,
i.extra_hds, i.extra_hds,
'1'::boolean as valid '0'::boolean as valid
FROM FROM
proinfo_inventory i proinfo_inventory i
LEFT JOIN LEFT JOIN
...@@ -61,6 +61,9 @@ BEGIN ...@@ -61,6 +61,9 @@ BEGIN
-- how many rows were inserted? -- how many rows were inserted?
GET DIAGNOSTICS total_rows = ROW_COUNT; GET DIAGNOSTICS total_rows = ROW_COUNT;
-- creating primary key (and btree index)
ALTER TABLE sa_inventory ADD PRIMARY KEY ("id");
-- logging -- logging
INSERT INTO control (name, start_time, end_time, total) INSERT INTO control (name, start_time, end_time, total)
VALUES ('create sa inventory', start_ts, CLOCK_TIMESTAMP(), total_rows); VALUES ('create sa inventory', start_ts, CLOCK_TIMESTAMP(), total_rows);
...@@ -125,7 +128,7 @@ BEGIN ...@@ -125,7 +128,7 @@ BEGIN
-- truncating OLTP table -- truncating OLTP table
--TRUNCATE TABLE proinfo_net_usage; TRUNCATE TABLE proinfo_net_usage;
END; END;
$$ language plpgsql; $$ language plpgsql;
...@@ -147,15 +150,27 @@ CREATE OR REPLACE FUNCTION sa_sanitize_inventory() returns void as $$ ...@@ -147,15 +150,27 @@ CREATE OR REPLACE FUNCTION sa_sanitize_inventory() returns void as $$
DECLARE DECLARE
start_ts TIMESTAMP; start_ts TIMESTAMP;
total_rows INTEGER; total_rows INTEGER;
new_inv record;
BEGIN BEGIN
-- getting initial timestamp -- getting initial timestamp
start_ts = CLOCK_TIMESTAMP(); start_ts = CLOCK_TIMESTAMP();
-- updating the 'valid' column on invalid records
-- we create this table marking as rows as invalid.
-- when a computer sends more than one inventory per day, we get
-- only the last one. Validating the newers:
FOR new_inv IN
SELECT max(id) AS id FROM sa_inventory GROUP BY sch_id, macaddr, contact_date
LOOP
UPDATE sa_inventory set valid = '1' WHERE id = new_inv.id;
END LOOP;
-- Checking whether valid records have the required information
UPDATE sa_inventory UPDATE sa_inventory
SET valid = '0' SET valid = '0'
WHERE WHERE
sch_id is NULL OR valid = '1' AND
(sch_id is NULL OR
cit_id is NULL OR cit_id is NULL OR
contact_date is NULL OR contact_date is NULL OR
os_type is NULL OR os_type is NULL OR
...@@ -168,7 +183,8 @@ BEGIN ...@@ -168,7 +183,8 @@ BEGIN
disk1_used is NULL OR disk1_used is NULL OR
NOT valid_macaddress(macaddr) OR NOT valid_macaddress(macaddr) OR
project < 0 OR project > 3 OR project < 0 OR project > 3 OR
inep is NULL; inep is NULL);
-- log the invalid information into rejected table -- log the invalid information into rejected table
INSERT INTO rejected_inventory INSERT INTO rejected_inventory
......
...@@ -9,118 +9,151 @@ SELECT CASE WHEN $1 = 0 THEN 'proinfo'::project_enum ...@@ -9,118 +9,151 @@ SELECT CASE WHEN $1 = 0 THEN 'proinfo'::project_enum
END as project; END as project;
$$ LANGUAGE SQL; $$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION load_dim_inventory() returns void as $$ CREATE OR REPLACE FUNCTION load_dim_inventory() returns void as $$
DECLARE DECLARE
inv_row record; inv_row record;
date_row record;
start_ts TIMESTAMP;
total_rows INTEGER;
is_new BOOLEAN;
BEGIN BEGIN
-- I think the better way to compare the current inventory and the new -- getting initial timestamp
-- one is to iterate over each record. Using one batch insert will start_ts = CLOCK_TIMESTAMP();
-- certainly perform way faster, but here we have more flexibility. total_rows := 0;
-- Also, we are inside one trasaction and it will be commited just once
-- at the end of the function. -- since we removed multiple inventories on the same date on sa_sanitize,
FOR inv_row IN SELECT * FROM sa_inventory s LEFT JOIN (SELECT * FROM dim_inventory -- looping one day at a time will ensure that we won't have more than one
WHERE is_current = '1') d ON s.sch_id = d.sch_id AND -- inventory from the same computer.
s.macaddr::macaddr = d.macaddr AND NOT FOR date_row IN SELECT DISTINCT contact_date FROM sa_inventory WHERE valid = '1' ORDER BY contact_date
(s.memory >= d.memory * 0.9 AND LOOP
s.memory <= d.memory * 1.1 AND
s.processor = d.processor AND -- I think the better way to compare the current inventory and the new
s.os_type = d.os_type AND -- one is to iterate over each record. Using one batch insert will
s.os_distro = d.os_distro AND -- certainly perform way faster, but here we have more flexibility.
s.os_kernel = d.os_kernel AND FOR inv_row IN
s.disk1_model = d.disk1_model AND SELECT
s.disk1_size >= d.disk1_size * 0.9 AND s.sch_id as new_sch_id,
s.disk1_size <= d.disk1_size * 1.1 AND s.macaddr as new_macaddr,
s.disk1_used = d.disk1_used AND s.contact_date as new_contact_date,
s.disk2_model = d.disk2_model AND s.project as new_project,
s.disk2_size >= d.disk2_size * 0.9 AND s.disk1_model as new_disk1_model,
s.disk2_size <= d.disk2_size * 1.1 AND s.disk1_size as new_disk1_size,
s.disk2_used = d.disk2_used) s.disk1_used as new_disk1_used,
WHERE valid = '1' LOOP s.disk2_model as new_disk2_model,
s.disk2_size as new_disk2_size,
s.disk2_used as new_disk2_used,
-- IF .... THEN s.memory as new_memory,
-- CONTINUE s.processor as new_processor,
-- END IF; s.os_type as new_os_type,
s.os_distro as new_os_distro,
-- If we get here, we decided to update the record. First insert s.os_kernel as new_os_kernel,
-- the new inventory
INSERT INTO dim_inventory (sch_id, macaddr, load_date, project, d.id as cur_id,
disk1_model, disk1_size, disk1_used, disk2_model, d.sch_id as cur_sch_id,
disk2_size, disk2_used, memory, processor, os_type, d.load_date as cur_load_date,
os_distro, os_kernel, is_current) VALUES d.disk1_model as cur_disk1_model,
(inv_row.sch_id, inv_row.macaddr::macaddr, d.disk1_size as cur_disk1_size,
inv_row.contact_date, to_project_enum(inv_row.project), d.disk1_used as cur_disk1_used,
inv_row.disk1_model, inv_row.disk1_size, d.disk2_model as cur_disk2_model,
inv_row.disk1_used, inv_row.disk2_model, d.disk2_size as cur_disk2_size,
inv_row.disk2_size, inv_row.disk2_used, d.disk2_used as cur_disk2_used,
inv_row.memory, inv_row.processor, d.memory as cur_memory,
inv_row.os_type, inv_row.os_distro, d.processor as cur_processor,
inv_row.os_kernel, '1'); d.os_type as cur_os_type,
d.os_distro as cur_os_distro,
-- Then update the 'is_current' flag of the last one. d.os_kernel as cur_os_kernel
UPDATE dim_inventory SET is_current = '0' WHERE FROM sa_inventory s LEFT JOIN
id = inv_row.id; (SELECT * FROM dim_inventory WHERE is_current = '1') as d
ON s.sch_id = d.sch_id AND s.macaddr::macaddr = d.macaddr
WHERE s.contact_date = date_row.contact_date AND s.valid = '1'
LOOP
-- in which cases we shouldn't update the table
-- Note that since disk2_* can be null, the logic is a little
-- more complicated... The other columns are NOT NULL.
IF (inv_row.cur_id IS NOT NULL AND
inv_row.new_memory >= inv_row.cur_memory * 0.9 AND
inv_row.new_memory <= inv_row.cur_memory * 1.1 AND
inv_row.new_processor = inv_row.cur_processor AND
inv_row.new_os_type = inv_row.cur_os_type AND
inv_row.new_os_distro = inv_row.cur_os_distro AND
inv_row.new_os_kernel = inv_row.cur_os_kernel AND
inv_row.new_disk1_model = inv_row.cur_disk1_model AND
inv_row.new_disk1_size >= inv_row.cur_disk1_size * 0.9 AND
inv_row.new_disk1_size <= inv_row.cur_disk1_size * 1.1 AND
inv_row.new_disk1_used >= inv_row.cur_disk1_used * 0.9 AND
inv_row.new_disk1_used <= inv_row.cur_disk1_used * 1.1 AND
(
(inv_row.new_disk2_model IS NULL AND inv_row.cur_disk2_model IS NULL)
OR
inv_row.new_disk2_model = inv_row.cur_disk2_model
) AND
(
(inv_row.new_disk2_size IS NULL AND inv_row.cur_disk2_size IS NULL)
OR
(inv_row.new_disk2_size >= inv_row.cur_disk2_size * 0.9 AND
inv_row.new_disk2_size <= inv_row.cur_disk2_size * 1.1)
) AND
(
(inv_row.new_disk2_used IS NULL AND inv_row.cur_disk2_used IS NULL)
OR
(inv_row.new_disk2_used >= inv_row.cur_disk2_used * 0.9 AND
inv_row.new_disk2_used <= inv_row.cur_disk2_used * 1.1)
))
THEN
CONTINUE;
END IF;
-- if this inventory is older than the current, ignore it. Probably
-- the nicest way here would be to insert the historical data, but
-- that would complicate the loading function too much and slow all
-- things down.
IF (inv_row.new_contact_date <= inv_row.cur_load_date) THEN
CONTINUE;
END IF;
-- if cur_id is NULL then it's a new record. Marking it accordingly..
IF (inv_row.cur_id IS NULL) THEN
is_new := '1';
ELSE
is_new := '0';
END IF;
-- If we get here, we decided to update the record. First insert
-- the new inventory
INSERT INTO dim_inventory (sch_id, macaddr, load_date, project,
disk1_model, disk1_size, disk1_used, disk2_model,
disk2_size, disk2_used, memory, processor, os_type,
os_distro, os_kernel, is_first, is_current) VALUES
(inv_row.new_sch_id, inv_row.new_macaddr::macaddr,
inv_row.new_contact_date, to_project_enum(inv_row.new_project),
inv_row.new_disk1_model, inv_row.new_disk1_size,
inv_row.new_disk1_used, inv_row.new_disk2_model,
inv_row.new_disk2_size, inv_row.new_disk2_used,
inv_row.new_memory, inv_row.new_processor,
inv_row.new_os_type, inv_row.new_os_distro,
inv_row.new_os_kernel, (SELECT CASE WHEN is_new THEN B'1' ELSE B'0' END), '1');
-- only for logging purposes
total_rows := total_rows + 1;
-- Since we only insert newer inventories, we must update the
-- 'is_current' flag of the last inventory. Note that maybe we
-- don't have a previous inventory to update
IF (inv_row.cur_id IS NOT NULL) THEN
UPDATE dim_inventory SET is_current = '0' WHERE
id = inv_row.cur_id;
END IF;
END LOOP;
END LOOP; END LOOP;
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading dim_inventory', start_ts, CLOCK_TIMESTAMP(), total_rows);
END; END;
$$ language plpgsql; $$ language plpgsql;
--CREATE OR REPLACE FUNCTION load_dim_inventory() returns void as $$
--BEGIN
-- creating a temporary table containing the inventories
-- we will need to insert/update - i.e. new inventories
-- EXCEPT current inventories
-- CREATE TEMP TABLE new_inventories ON COMMIT DROP AS
-- SELECT sch_id, macaddr FROM
-- (
-- -- new computer information
-- (SELECT
-- sch_id, macaddr, project, memory,
-- disk1_model, disk1_size,
-- disk2_model, disk2_size,
-- os_type, os_distro, os_kernel
-- FROM sa_inventory
-- )
-- EXCEPT
-- list of current inventory for each computer
-- (SELECT
-- sch_id, macaddr, project, memory,
-- disk1_model, disk1_size,
-- disk2_model, disk2_size,
-- os_type, os_distro, os_kernel
-- FROM dim_inventory
-- WHERE is_current = '1'
-- )
-- ) as A;
-- setting is_current to '0' on old records
-- UPDATE dim_inventory SET is_current = '0'
-- WHERE
-- is_current = '1' and (sch_id, macaddr) IN (SELECT * FROM new_inventories);
-- inserting new records
-- INSERT INTO dim_inventory
-- (sch_id, macaddr, load_date, project,
-- disk1_model, disk1_size, disk1_used,
-- disk2_model, disk2_size, disk2_used,
-- memory, processor,
-- os_type, os_distro, os_kernel, is_current)
-- SELECT
-- s.sch_id, s.macaddr, s.contact_date, s.project,
-- s.disk1_model, s.disk1_size, s.disk1_used,
-- s.disk2_model, s.disk2_size, s.disk2_used,
-- s.memory, s.processor,
-- s.os_type, s.os_distro, s.os_kernel, '1'
-- FROM new_inventories n INNER JOIN sa_inventory s
-- ON n.sch_id = s.sch_id and n.macaddr = s.macaddr;
-- when the transaction commits, the temporary table
-- 'new_inventories' will be removed
--END;
--$$ language plpgsql;
...@@ -15,10 +15,10 @@ BEGIN ...@@ -15,10 +15,10 @@ BEGIN
INTO result from sa_inventory; INTO result from sa_inventory;
-- --
INSERT INTO fact_contact (sch_id, cit_id, dat_id, macaddr) INSERT INTO fact_contact (sch_id, cit_id, dat_id, macaddr, project)
(SELECT sch_id, cit_id, contact_date, macaddr::macaddr FROM sa_inventory WHERE valid = '1') (SELECT sch_id, cit_id, contact_date, macaddr::macaddr, to_project_enum(project) FROM sa_inventory WHERE valid = '1')
EXCEPT EXCEPT
(SELECT sch_id, cit_id, dat_id, macaddr FROM fact_contact WHERE dat_id >= result.min_date AND dat_id <= result.max_date); (SELECT sch_id, cit_id, dat_id, macaddr, project FROM fact_contact WHERE dat_id >= result.min_date AND dat_id <= result.max_date);
-- how many rows were inserted? -- how many rows were inserted?
GET DIAGNOSTICS total_rows = ROW_COUNT; GET DIAGNOSTICS total_rows = ROW_COUNT;
......
...@@ -18,7 +18,8 @@ BEGIN ...@@ -18,7 +18,8 @@ BEGIN
INSERT INTO fact_net_usage (sch_id, cit_id, dat_id, macaddr, INSERT INTO fact_net_usage (sch_id, cit_id, dat_id, macaddr,
collect_time, down_kbits, down_packages, up_kbits, up_packages) collect_time, down_kbits, down_packages, up_kbits, up_packages)
(SELECT sch_id, cit_id, contact_date, macaddr, collect_time, down_kbits, (SELECT sch_id, cit_id, contact_date, macaddr, collect_time, down_kbits,
down_packages, up_kbits, up_packages FROM sa_net_usage) down_packages, up_kbits, up_packages FROM sa_net_usage
WHERE valid = '1')
EXCEPT EXCEPT
(SELECT sch_id, cit_id, dat_id, macaddr, collect_time, down_kbits, (SELECT sch_id, cit_id, dat_id, macaddr, collect_time, down_kbits,
down_packages, up_kbits, up_packages FROM fact_net_usage WHERE down_packages, up_kbits, up_packages FROM fact_net_usage WHERE
......
CREATE OR REPLACE FUNCTION load_aggr_availability() returns void as $$
DECLARE
date_it date;
i INTEGER;
start_ts TIMESTAMP;
total_rows INTEGER;
cur_rows INTEGER;
BEGIN
-- getting initial timestamp
start_ts = CLOCK_TIMESTAMP();
total_rows := 0;
-- remove the index - later we create it again
DROP INDEX IF EXISTS aggr_availability_base_date_idx;
-- remove old data
TRUNCATE TABLE aggr_availability;
-- iterate over past months
FOR i IN 0..5 LOOP
date_it := CURRENT_DATE - (i * interval '1 month');
INSERT INTO aggr_availability
SELECT
date_it,
f.macaddr,
f.project,
s.inep,
s.name,
c.name,
c.state,
c.region,
CASE WHEN date_it - max_dat_id <= 10 THEN 1 ELSE 0 END as is_green,
CASE WHEN date_it - max_dat_id > 10 AND date_it - max_dat_id <= 30 THEN 1 ELSE 0 END as is_yellow,
CASE WHEN date_it - max_dat_id > 30 THEN 1 ELSE 0 END as is_red
FROM
(SELECT
max(dat_id) as max_dat_id,
f.sch_id,
f.cit_id,
f.macaddr,
f.project
FROM
fact_contact f
WHERE
dat_id <= date_it
GROUP BY
f.cit_id, f.sch_id, f.macaddr, f.project) as f
INNER JOIN
dim_school s
ON
f.sch_id = s.id
INNER JOIN
dim_city c
ON
f.cit_id = c.id;
-- how many rows were inserted?
GET DIAGNOSTICS cur_rows := ROW_COUNT;
total_rows := total_rows + cur_rows;
END LOOP;
-- creating index to speed-up per-date queries, and it speeds up a lot..
CREATE INDEX aggr_availability_base_date_idx ON aggr_availability(base_date);
-- logging
INSERT INTO control (name, start_time, end_time, total)
VALUES ('loading aggr_availability', start_ts, CLOCK_TIMESTAMP(), total_rows);
END;
$$ language plpgsql;
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
-- only function that should be called directly. Run it once a day. -- only function that should be called directly. Run it once a day.
CREATE OR REPLACE FUNCTION load_dw() returns void as $$ CREATE OR REPLACE FUNCTION load_dw() returns void as $$
BEGIN BEGIN
-- populating dim_date with next 10 years -- populating dim_date with next 10 years
PERFORM populate_dim_date(CAST(extract(year FROM CURRENT_DATE) AS INTEGER) + 10); PERFORM populate_dim_date(CAST(extract(year FROM CURRENT_DATE) AS INTEGER) + 10);
...@@ -17,7 +17,8 @@ BEGIN ...@@ -17,7 +17,8 @@ BEGIN
-- ET'L' -> Load -- ET'L' -> Load
PERFORM load_dim_inventory(); PERFORM load_dim_inventory();
PERFORM load_fact_contact(); PERFORM load_fact_contact();
--SELECT load_fact_net_usage(); PERFORM load_fact_net_usage();
PERFORM load_aggr_availability();
-- dropping staging area tables -- dropping staging area tables
PERFORM sa_drop(); PERFORM sa_drop();
......
CREATE OR REPLACE FUNCTION alert_hd_and_mem_brazil(proj project_enum) RETURNS
TABLE ("hd" bigint, "memory" bigint) AS $$
BEGIN
RETURN QUERY SELECT
SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END),
SUM(
CASE WHEN
i1.disk1_model != i2.disk1_model
OR
i1.disk1_size != i2.disk1_size
OR
(i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL))
OR
(i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL))
THEN 1
ELSE 0 END
)
FROM
dim_inventory i1
INNER JOIN
dim_inventory i2
ON
i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND
i1.is_first = '1' AND i2.is_current = '1'
WHERE
i1.project = proj;
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION alert_hd_and_mem_region(proj project_enum) RETURNS
TABLE ("region" varchar, "hd" bigint, "memory" bigint) AS $$
BEGIN
RETURN QUERY SELECT
c.region,
SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END),
SUM(
CASE WHEN
i1.disk1_model != i2.disk1_model
OR
i1.disk1_size != i2.disk1_size
OR
(i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL))
OR
(i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL))
THEN 1
ELSE 0 END
)
FROM
dim_inventory i1
INNER JOIN
dim_inventory i2
ON
i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND
i1.is_first = '1' AND i2.is_current = '1'
INNER JOIN
dim_school s
ON
i1.sch_id = s.id
INNER JOIN
dim_city c
ON
s.cit_id = c.id
WHERE
i1.project = proj
GROUP BY
c.region;
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION alert_hd_and_mem_state(proj project_enum, region_in text) RETURNS
TABLE ("state" varchar, "hd" bigint, "memory" bigint) AS $$
BEGIN
RETURN QUERY SELECT
c.state,
SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END),
SUM(
CASE WHEN
i1.disk1_model != i2.disk1_model
OR
i1.disk1_size != i2.disk1_size
OR
(i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL))
OR
(i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL))
THEN 1
ELSE 0 END
)
FROM
dim_inventory i1
INNER JOIN
dim_inventory i2
ON
i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND
i1.is_first = '1' AND i2.is_current = '1'
INNER JOIN
dim_school s
ON
i1.sch_id = s.id
INNER JOIN
dim_city c
ON
s.cit_id = c.id
WHERE
i1.project = proj AND c.region = region_in
GROUP BY
c.region, c.state;
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION alert_hd_and_mem_city(proj project_enum, state_in text) RETURNS
TABLE ("city" varchar, "hd" bigint, "memory" bigint) AS $$
BEGIN
RETURN QUERY SELECT
c.name,
SUM(CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END),
SUM(
CASE WHEN
i1.disk1_model != i2.disk1_model
OR
i1.disk1_size != i2.disk1_size
OR
(i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL))
OR
(i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL))
THEN 1
ELSE 0 END
)
FROM
dim_inventory i1
INNER JOIN
dim_inventory i2
ON
i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND
i1.is_first = '1' AND i2.is_current = '1'
INNER JOIN
dim_school s
ON
i1.sch_id = s.id
INNER JOIN
dim_city c
ON
s.cit_id = c.id
WHERE
i1.project = proj AND c.state = state_in
GROUP BY
c.name;
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION alert_hd_and_mem_report(proj project_enum, region_in text, state_in text, city_in text) RETURNS
TABLE ("load_date" text, "region" text, "state" varchar, "city" text, "school" text, "machine" macaddr, "old_date" date, "new_date" date, "os_distro" text, "processor" text, "old_memory_size" integer, "new_memory_size" integer, "old_hd_size" integer, "new_hd_size" integer, "memory_alert" integer, "hd_alert" integer) AS $$
BEGIN
RETURN QUERY SELECT
to_char(current_date, 'DD/MM/YYYY'),
initcap(c.region),
c.state,
initcap(c.name),
initcap(s.name),
i1.macaddr,
i1.load_date, i2.load_date,
i1.os_distro, i1.processor,
i1.memory, i2.memory,
i1.disk1_size, i2.disk1_size,
CASE WHEN i1.memory != i2.memory THEN 1 ELSE 0 END,
CASE WHEN
i1.disk1_model != i2.disk1_model
OR
i1.disk1_size != i2.disk1_size
OR
(i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL))
OR
(i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL))
THEN 1 ELSE 0 END
FROM
dim_inventory i1
INNER JOIN
dim_inventory i2
ON
i1.sch_id = i2.sch_id AND i1.macaddr = i2.macaddr AND
i1.is_first = '1' AND i2.is_current = '1'
INNER JOIN
dim_school s
ON
i1.sch_id = s.id
INNER JOIN
dim_city c
ON
s.cit_id = c.id
WHERE
i1.project = proj AND c.region = region_in AND c.state = state_in AND c.name = city_in
AND
((i1.memory != i2.memory) OR (i1.disk1_model != i2.disk1_model OR i1.disk1_size != i2.disk1_size OR
(i1.disk2_model != i2.disk2_model AND (i1.disk2_model IS NOT NULL OR i2.disk2_model IS NOT NULL))
OR
(i1.disk2_size != i2.disk2_size AND (i1.disk2_size IS NOT NULL OR i2.disk2_size IS NOT NULL))))
ORDER BY
s.name, i1.macaddr, i1.load_date;
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION audit_city(proj project_enum, region_in text, state_in text, city_in text, base_date DATE DEFAULT CURRENT_DATE) RETURNS
TABLE ("data_atual" text, "regiao" text, "aud_estado" varchar, "cidade" text, "escola" text, "instaladas" bigint, "nao_instaladas" integer) AS $$
BEGIN
RETURN QUERY SELECT
to_char(base_date, 'DD/MM/YYYY'),
initcap(region),
state,
initcap(city),
initcap(sch_name),
COUNT(*),
0
FROM
(SELECT
DISTINCT
f.sch_id as sch_id,
s.name as sch_name,
f.macaddr as macaddr,
c.region as region,
c.state as state,
c.name as city
FROM
fact_contact f
INNER JOIN
dim_city c
ON
f.cit_id = c.id
INNER JOIN
dim_school s
ON
f.sch_id = s.id
WHERE
project = proj AND c.region = upper(region_in) AND c.state = upper(state_in) AND c.name = upper(city_in) AND f.dat_id <= base_date
) AS a
GROUP BY
region, state, city, sch_name;
END;
$$ language plpgsql;
CREATE OR REPLACE FUNCTION audit_no_agent_city(proj project_enum, region_in text, state_in text, city_in text, base_date DATE DEFAULT CURRENT_DATE) RETURNS
TABLE ("data_atual" text, "regiao" text, "aud_estado" varchar, "cidade" text, "escola" text) AS $$
BEGIN
RETURN QUERY SELECT
to_char(base_date, 'DD/MM/YYYY'),
initcap(c.region),
c.state,
initcap(c.name),
initcap(s.name)
FROM
dim_school s
INNER JOIN
dim_city c
ON
s.cit_id = c.id
LEFT JOIN
(SELECT * FROM fact_contact WHERE dat_id <= base_date AND project = proj) AS f
ON
c.id = f.cit_id
WHERE
c.region = upper(region_in) AND c.state = upper(state_in) AND c.name = upper(city_in) AND f.cit_id is NULL;
END;
$$ language plpgsql;
\ No newline at end of file
This diff is collapsed.
/* Function to get the name of the month
*
* Receives as input the number of month.
*
* Returns the name of the month.
*/
CREATE OR REPLACE FUNCTION get_month_name(integer) RETURNS varchar(9) as $$
DECLARE
array_months VARCHAR[] := ARRAY['Janeiro', 'Fevereiro', 'Março','Abril',
'Maio', 'Junho', 'Julho', 'Agosto', 'Setembro', 'Outubro', 'Novembro',
'Dezembro'];
BEGIN
RETURN array_months[$1];
END;
$$ LANGUAGE 'plpgsql';
\ No newline at end of file
/**
* The functions on this file return the number of computers of a given
* 'project' that contacted the server in the last 10 days (green), 10
* to 30 days (yellow), or more than 30 days (red). You can also provide
* 'base_date' to refer to the situation in a particular date.
*/
/**
* Returns the total amount of computers of the given 'project' and their
* last contact date status.
*
* @param in project_enum proj The type of computer/machine
* @param in DATE base_date The particular date until which we're
* interested. Default value is today.
* @return TABLE(bigint, bigint, bigint)
*/
DROP FUNCTION inventory_brazil(project_enum, DATE);
CREATE OR REPLACE FUNCTION inventory_brazil(proj project_enum, b_date DATE DEFAULT CURRENT_DATE) RETURNS
TABLE ("Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
COUNT(*)
FROM
(SELECT
DISTINCT macaddr, sch_inep
FROM
aggr_availability
WHERE
project = proj
AND
base_date <= b_date
) AS a;
END;
$$ language plpgsql;
/**
* Returns the total amount of computers of the given 'project' in the
* current date and the last three months.
*
* TODO: generalize the number of months and create the 'dates' temp table
* in a more elegant manner.
*
* @param in project_enum proj The type of computer/machine
* @return TABLE(date, bigint, bigint, bigint)
*/
DROP FUNCTION inventory_brazil_monthly_history(project_enum);
CREATE OR REPLACE FUNCTION inventory_brazil_monthly_history(proj project_enum) RETURNS
TABLE ("Data" text, "Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
get_month_name(extract(month from base_date)::integer)::text,
count(*)
FROM
aggr_availability
WHERE
project = proj
GROUP BY
base_date
ORDER BY base_date;
END;
$$ language plpgsql;
/**
* Returns the total amount of computers of the given 'project' and their
* last contact date status grouped by region.
*
* @param in project_enum proj The type of computer/machine
* @param in DATE base_date The particular date until which we're
* interested. Default value is today.
* @return TABLE(varchar, bigint, bigint, bigint)
*/
DROP FUNCTION inventory_region(project_enum, DATE);
CREATE OR REPLACE FUNCTION inventory_region(proj project_enum, b_date DATE DEFAULT CURRENT_DATE) RETURNS
TABLE ("Região" varchar, "Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
region,
COUNT(*)
FROM
(SELECT macaddr, sch_inep, region
FROM aggr_availability
WHERE base_date <= b_date AND
project = proj
GROUP BY macaddr, sch_inep, region) AS a
GROUP BY region;
END;
$$ language plpgsql;
DROP FUNCTION inventory_region_monthly_history(project_enum, text);
CREATE OR REPLACE FUNCTION inventory_region_monthly_history(proj project_enum, region_in text) RETURNS
TABLE ("Data" text, "Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
get_month_name(extract(month from base_date)::integer)::text,
COUNT(*)
FROM
(SELECT
macaddr, sch_inep, base_date
FROM
aggr_availability
WHERE
project = proj AND region = region_in
GROUP BY
macaddr, sch_inep, base_date) AS a
GROUP BY base_date
ORDER BY base_date;
END;
$$ language plpgsql;
DROP FUNCTION inventory_state(project_enum, text, date);
CREATE OR REPLACE FUNCTION inventory_state(proj project_enum, region_in text, b_date DATE DEFAULT CURRENT_DATE) RETURNS
TABLE ("Estado" varchar, "Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
state,
COUNT(*)
FROM
(SELECT
state, macaddr, sch_inep
FROM
aggr_availability
WHERE
project = proj AND region = region_in AND base_date <= b_date
GROUP BY
sch_inep, macaddr, state) AS a
GROUP BY state
ORDER BY state;
END;
$$ language plpgsql;
drop function inventory_state_monthly_history(proj project_enum, state_in text);
CREATE OR REPLACE FUNCTION inventory_state_monthly_history(proj project_enum, state_in text) RETURNS
TABLE ("Data" text, "Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
get_month_name(extract(month from base_date)::integer)::text,
COUNT(*)
FROM
(SELECT
macaddr,
sch_inep,
base_date
FROM
aggr_availability
WHERE
state = state_in AND
project = proj
GROUP BY
macaddr, sch_inep, base_date) AS a
GROUP BY base_date ORDER BY base_date;
END;
$$ language plpgsql;
DROP FUNCTION inventory_city(project_enum, text, DATE);
CREATE OR REPLACE FUNCTION inventory_city(proj project_enum, state_in text, b_date DATE DEFAULT CURRENT_DATE) RETURNS
TABLE ("Cidade" varchar, "Total de máquinas" bigint) AS $$
BEGIN
RETURN QUERY SELECT
city,
count(*)
FROM
(SELECT city
FROM
aggr_availability
WHERE
state = state_in AND
base_date <= b_date
GROUP BY city, macaddr, sch_inep) as a
GROUP BY city
ORDER BY city;
END;
$$ language plpgsql;
DROP FUNCTION inventory_report(proj project_enum, region_in text, state_in text, city_in text);
CREATE OR REPLACE FUNCTION inventory_report(proj project_enum, region_in text, state_in text, city_in text) RETURNS
TABLE (data_atual text, regiao text, inv_estado character varying, cidade text,
escola text, maquina macaddr, data date, SO text, processor text,
memoria text, HD text) AS $$
BEGIN
RETURN QUERY SELECT to_char((SELECT max(base_date) FROM aggr_availability), 'DD/MM/YYYY'),
initcap(c.region),
c.state,
initcap(c.name),
initcap(s.name),
d.macaddr,
d.load_date,
d.os_distro,
d.processor,
simplify_memory(d.memory),
simplify_hd(d.disk1_size)
FROM
(SELECT
macaddr,
sch_id,
max(load_date) AS load_date
FROM dim_inventory
WHERE project = proj
GROUP BY macaddr, sch_id) AS i
JOIN dim_school s
ON i.sch_id = s.id
JOIN dim_city c
ON
s.cit_id = c.id AND
region = UPPER(region_in) AND
state = UPPER(state_in) AND
c.name = UPPER(city_in)
JOIN dim_inventory d
ON
d.sch_id = i.sch_id AND
d.macaddr = i.macaddr AND
d.load_date = i.load_date
ORDER BY s.name, macaddr;
END;
$$ LANGUAGE plpgsql;
\ No newline at end of file
CREATE OR REPLACE FUNCTION simplify_memory(integer) RETURNS text AS $$
select case
when $1 < 1024 then $1 || ' KiB'
when $1 < 1048576 then round($1 / 1024, 2) || ' MiB'
when $1 < 1073741824 then round($1 / 1048576, 2) || ' GiB'
else round($1 / 1073741824, 2) || ' TiB'
end;
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION simplify_hd(integer) RETURNS text AS $$
select case
when $1 < 1000 then $1 || ' GB'
else round($1 / 1000, 2) || ' TB' end;
$$ LANGUAGE SQL;
\ No newline at end of file