Commit 247adaa2 authored by Lucas Fernandes de Oliveira's avatar Lucas Fernandes de Oliveira
Browse files

Issue #56: Add monet adapter


Signed-off-by: Lucas Fernandes de Oliveira's avatarLucas Fernandes de Oliveira <lfo14@inf.ufpr.br>
parent b3310950
Pipeline #14258 passed with stages
in 2 minutes and 30 seconds
......@@ -211,9 +211,3 @@ dimensions:
parent: "dim:0"
relation: "year"
description: "A dimension of Blendb. Has 1 possible value."
-
name: "dim:12"
dataType: "integer"
parent: "dim:0"
relation: "dayofweek"
description: "A dimension of Blendb. Has 7 possible values."
/*
* Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
* Departamento de Informatica - Universidade Federal do Parana
*
* This file is part of blend.
*
* blend is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* blend is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with blend. If not, see <http://www.gnu.org/licenses/>.
*/
import { SQLAdapter } from "./sql";
import { View } from "../core/view";
import { FilterOperator } from "../core/filter";
const MDB = require("monetdb")();
export interface MonetConfig {
host: string;
port: number;
dbname: string;
user: string;
password: string;
}
interface MonetResult {
data: any[];
structure: {type: string, column: string, index: number}[];
}
export class MonetAdapter extends SQLAdapter {
private config: MonetConfig;
constructor (conf: MonetConfig) {
super();
this.config = conf;
}
public getDataFromView(view: View, cb: (error: Error, result?: any[]) => void): void {
const query = this.getQueryFromView(view);
let pool: any = new MDB(this.config);
pool.connect();
pool.query(query).then((result: MonetResult) => {
if (result) {
let res = result.data.map((item) => {
let obj: any = {};
for (let i = 0; i < result.structure.length; ++i) {
let struct = result.structure[i];
if (struct.type === "timestamp") {
obj[struct.column] = new Date(item[struct.index]);
}
else {
obj[struct.column] = item[struct.index];
}
}
return obj;
});
cb(null, res);
}
else {
cb(null, null);
}
}).fail((err: Error) => {
cb(err, null);
});
pool.close();
}
public materializeView(view: View): boolean {
return false;
}
protected typeCast(quotedValue: string, dt: string): string {
switch (dt) {
case "date":
return "CAST(" + quotedValue + " AS TIMESTAMP)";
case "integer":
return "CAST(" + quotedValue + " AS INTEGER)";
case "boolean":
return "CAST(" + quotedValue + " AS BOOLEAN)";
default:
return quotedValue;
}
}
protected applyOperator(lSide: string, rSide: string, op: FilterOperator): string {
switch (op) {
case FilterOperator.EQUAL:
return lSide + " = " + rSide;
case FilterOperator.NOTEQUAL:
return "NOT(" + lSide + " = " + rSide + ")";
case FilterOperator.GREATER:
return lSide + " > " + rSide;
case FilterOperator.LOWER:
return lSide + " < " + rSide;
case FilterOperator.GREATEREQ:
return lSide + " >= " + rSide;
case FilterOperator.LOWEREQ:
return lSide + " <= " + rSide;
default:
return "";
}
}
}
......@@ -21,8 +21,10 @@
import { expect } from "chai";
import { PostgresAdapter } from "./postgres";
import { MonetAdapter, MonetConfig } from "./monet";
import { Adapter } from "../core/adapter";
import { Fixture } from "../../test/postgres/fixture";
import { Fixture as FixPostgres } from "../../test/postgres/fixture";
import { Fixture as FixMonet } from "../../test/monet/fixture";
import { ConfigParser } from "../util/configParser";
import { adapterScenario } from "../../test/scenario";
......@@ -33,16 +35,39 @@ describe("postgres adapter", () => {
let config: any;
let adapter: Adapter;
let fixture;
before((done) => {
before(function (done) {
// Arrow function not used to get acces to this and skip the test
config = ConfigParser.parse("config/test.yaml");
fixture = new Fixture(config.connection);
fixture.load(config.loadViews, config.struct.create, (err) => {
if (err) {
throw err;
}
adapter = new PostgresAdapter(config.connection);
done();
});
if (config.adapter === "postgres") {
fixture = new FixPostgres(config.connection);
fixture.load(config.loadViews, config.struct.create, (err) => {
if (err) {
throw err;
}
adapter = new PostgresAdapter(config.connection);
done();
});
}
else if (config.adapter === "monet") {
fixture = new FixMonet(config.connection);
fixture.load(config.loadViews, config.struct.create, (err) => {
if (err) {
throw err;
}
let parsedConfig: MonetConfig = {
user: config.connection.user,
dbname: config.connection.database,
password: config.connection.password,
host: config.connection.host,
port: config.connection.port
};
adapter = new MonetAdapter(parsedConfig);
done();
});
}
else {
this.skip();
}
});
// Tests
it("should get data from single materialized view", (done) => {
......
/*
* Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
* Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
* Departamento de Informatica - Universidade Federal do Parana
*
* This file is part of blend.
......@@ -18,37 +18,12 @@
* along with blend. If not, see <http://www.gnu.org/licenses/>.
*/
import { Adapter } from "../core/adapter";
import { Metric } from "../core/metric";
import { Dimension } from "../core/dimension";
import { Clause } from "../core/clause";
import { Filter, FilterOperator } from "../core/filter";
import { AggregationType, RelationType } from "../common/types";
import { SQLAdapter } from "./sql";
import { View } from "../core/view";
import { FilterOperator } from "../core/filter";
import { Pool, PoolConfig } from "pg";
interface ExpandedView {
dimensions: Dimension[];
metrics: Metric[];
dimMetrics: Metric[];
keys: Dimension[];
clauses: Clause[];
from: string;
id: string;
origin: boolean;
}
interface DimInfo {
dim: Dimension;
views: ExpandedView[];
}
interface DimTranslation {
aliased: string;
noalias: string;
}
export class PostgresAdapter extends Adapter {
export class PostgresAdapter extends SQLAdapter {
private pool: Pool;
constructor (config: PoolConfig) {
......@@ -75,809 +50,35 @@ export class PostgresAdapter extends Adapter {
return false;
}
public getQueryFromView(view: View): string {
/*
Find the base (materialized) views that has this data and exapand
than (make a parse to the format used in the adapter)
*/
const materialized = this.searchMaterializedViews(view).sort((a, b) => {
return (a.id < b.id) ? -1 : 1;
}).map((item) => {
return {
id: item.id,
from: "view_" + item.id,
dimMetrics: [],
metrics: item.metrics.filter((i) => {
return view.metrics.some((j) => i.name === j.name);
}),
dimensions: item.dimensions,
keys: item.keys,
clauses: item.clauses,
origin: item.origin
};
});
// Remove repeated views from the result
let partialJoin = [materialized[0]];
for (let i = 1; i < materialized.length; ++i) {
if (materialized[i - 1].id !== materialized[i].id) {
partialJoin.push(materialized[i]);
}
}
const blackList = view.dimensions.map((i) => i.name);
for (let i = 0; i < view.clauses.length; ++i) {
if (view.clauses[i].filters.length === 1) {
let filter = view.clauses[i].filters[0];
if (filter.operator === FilterOperator.EQUAL) {
blackList.push(filter.target.name);
}
}
}
/*
If there is more than one source of data (tables/views)
a join is needed.
Partial Join represents how many sources still exists,
every join reduces this number.
*/
let clausesToCover = view.clauses.map((i) => i);
while (partialJoin.length > 1) {
/*
Variable map finds what dimenensions are still needed to
complete this query, they are required for 2 reasons.
1 - To make joins
2 - Because they are in the query
For each view that has this dimension we add one score to
this dimension, if they are in the query the same.
Automatically if the dimension is in the query there will be
at least one view with this atribute (or the query could not be
completed) so dimensions in the query always have score of
at least 2.
To make a join the dimension must be in 2 different views,
creating a score of 2 either.
If the score is less than 2 so this dimension is not required
anymore and can be removed.
*/
let map: { [key: string]: number } = {};
let partialsChange = false;
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions;
for (let k = 0; k < dims.length; ++k) {
if (!map[dims[k].name]) {
map[dims[k].name] = 1;
}
else {
++map[dims[k].name];
}
}
}
for (let i = 0; i < view.dimensions.length; ++i) {
let dim = view.dimensions[i];
while (dim !== null) {
if (map[dim.name]) {
++map[dim.name];
}
dim = dim.parent;
}
}
/*
Also mark scores for dimensions inside clauses
*/
for (let i = 0; i < clausesToCover.length; ++i) {
for (let j = 0; j < clausesToCover[i].targets.length; ++j) {
if (map[clausesToCover[i].targets[j].name]) {
++map[clausesToCover[i].targets[j].name];
}
}
}
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions.filter((item) => {
return map[item.name] > 1;
});
const keys = partialJoin[i].keys.filter((item) => {
return map[item.name] > 1;
});
/*
At this point the dimensions with less than score 2
are removed, if this happens the view is agreggated
again, with less dimensions, removing this dimension
from the view.
*/
let coveredClauses: Clause[] = [];
let notCoveredClauses: Clause[] = [];
/*
If all dimensions in a clause are a sub set of the
dimensions of a view, this clause is apllied now,
propagating the clause to this point.
Then this clause is removed from the set of clauses
*/
for (let j = 0; j < clausesToCover.length; ++j) {
if (clausesToCover[j].isCovered(partialJoin[i].dimensions)) {
coveredClauses.push(clausesToCover[j]);
}
else {
notCoveredClauses.push(clausesToCover[j]);
}
}
clausesToCover = notCoveredClauses.filter((clause) => {
return !partialJoin[i].clauses.some((c) => c.id === clause.id);
});
if (dims.length < partialJoin[i].dimensions.length || coveredClauses.length > 0) {
const partial = new View({
metrics: partialJoin[i].metrics,
dimensions: dims,
keys: keys,
origin: false,
clauses: coveredClauses.concat(partialJoin[i].clauses),
materialized: false
});
const from = "(" +
this.buildQuery(partial, [partialJoin[i]]) +
") AS view_" + partial.id + "\n";
partialJoin[i].id = partial.id;
partialJoin[i].dimensions = partial.dimensions;
partialJoin[i].keys = partial.keys;
partialJoin[i].origin = partial.origin;
partialJoin[i].from = from;
partialsChange = true;
}
}
/*
If at least one of the views changed (have the number of
dimensions reduced) returns to the begining of the loop
again.
Othewise we need to make a join.
*/
if (!partialsChange) {
/*
Sorting the views by keys.
If the keys are identical, then they
will be in sequence, and views with identical
keys can be joined.
Sort an array of keys is the same as sort a
array of strings.
*/
const sorted = partialJoin.sort((a, b) => {
return this.compareKeys(a.keys, b.keys, blackList);
});
/*
SUPER WARNING: WHEN THE BLACK LIST IS USED THE VIEW IS
UNMATERIALIZEBLE, BUT THE QUERY CAN AGGREGATE THE VALUES
The blackList is the array of dimensions of the query plus
the dimensions in filters using the equality operator.
In further coments is expained that the relation to make
a join must be one-to-one between the tables.
However and a dimension is choosed, a sub view is
created and if the relation is preserved in the sub view
the query can be agregated, but this view cannot be re-used
so it is unmaterializeble.
The equality operator is the same as select one subview.
*/
/*
First of all, the remaining views are splited in segments.
A segment contains views with the same keys that are great
to make joins. Joins like this do not create "dimensional
metrics".
In joins like this one row of each view will be connected
with at most one row of each other table.
*/
const segment = [[sorted[0]]];
let segmentId = 0;
for (let i = 1; i < sorted.length; ++i) {
if (this.compareKeys(sorted[i - 1].keys, sorted[i].keys, blackList) === 0) {
segment[segmentId].push(sorted[i]);
}
else {
++segmentId;
segment.push([sorted[i]]);
}
}
partialJoin = [];
let ableToJoin = false;
for (let i = 0; i < segment.length; ++i) {
/*
If a segment has more than one view, a join can be made
*/
if (segment[i].length > 1) {
let mets: Metric[] = [];
let clauses: Clause[] = [];
let dims: Dimension[] = [];
let dimMetrics: Metric[] = [];
for (let j = 0; j < segment[i].length; ++j) {
mets = mets.concat(segment[i][j].metrics);
clauses = clauses.concat(segment[i][j].clauses);
dims = dims.concat(segment[i][j].dimensions);
dimMetrics = dimMetrics.concat(segment[i][j].dimMetrics);
}
dims = this.removeDuplicatedDimensions(dims);
/*
Its atributes are just concatenated and the
duplicates removed.
*/
const partial = new View({
metrics: mets,
dimensions: dims,
keys: segment[i][0].keys,
origin: false,
clauses: clauses,
materialized: false
});
const viewFrom = "(" +
this.buildQuery(partial, segment[i]) +
") AS view_" + partial.id + "\n";
partialJoin.push({
id: partial.id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: partial.metrics,
dimensions: partial.dimensions,
keys: partial.keys,
clauses: partial.clauses,
origin: partial.origin
});
ableToJoin = true;
}
else {
/*
If the segment has just one view, anything can be
done at this point, so just reinsert this view in
set of views.
*/
partialJoin.push(segment[i][0]);
}
}
/*
If at least one join was made in the last part (a segment
with more than one view) than return to the begining of the
loop.
This permits after a join remove the dimensions that were
only choosen to this join, and are no longer required
Ideally the joins should be restrict the join method used
above, but in some cases this can not be done.
So if all the segments have only one view inside, move
to the next method.
*/
if (!ableToJoin) {
/*
At this point 2 views will be joined, first the
similarity with each pair of views is calculated,
the pair with the biggedt similarity will be joined.
Similarity is calculated with the number of common
dimensions in the keys.
*/
let similarity = 0;
let idx0 = 0;
let idx1 = 1;
for (let i = 0; i < partialJoin.length; ++i) {
for (let j = i + 1 ; j < partialJoin.length; ++j) {
const pi = partialJoin[i].keys;
const pj = partialJoin[j].keys;
let score = this.similarDimensions (pi, pj);
if (similarity < score) {
similarity = score;
idx0 = i;
idx1 = j;
}
}
}
const partial0 = partialJoin[idx0];
const partial1 = partialJoin[idx1];
partialJoin.splice(idx1, 1);
partialJoin.splice(idx0, 1);
/*
Once the views are select they are joined with the
same method, concatenedted its atributes and
removing duplicates, however the nasty effect of
this join is the creation of "dimensional metrics".
"Dimensional metrics" are metrics that can no longer
be aggregated, and at this point to the end
of a query they will act as dimensions.
This change happens to avoid inconsistency generated
by a join where one row of one table can be connected
to more than one of other table.
Take this example.
View0 : metrics [met0], dimensions [dim0]
values: [{met0: 10, dim0: 1}]
View1 : metrics [met1], dimensions [dim2]
values: [{met1: 10, dim2: 1}. {met1: 5, dim2: 2}]
View2 : metrics [], dimensions [dim0, dim1, dim2]
values: [
{dim0: 1, dim1: 1, dim2: 1},
{dim0: 1, dim1: 1, dim2: 2}
]
The query is metrics [met0, met1] and dimensions [dim1]
First a join of View0 and View1 is made, the result
is: [
{dim0: 1, dim1: 1, dim2: 1, met0: 10},
{dim0: 1, dim1: 1, dim2: 2, met0: 10}
]
Note that the value of met0 is duplicated.
Now dim0 is removed, than joined with view2 resulting
in: [