Commit 4339c078 authored by Lucas Fernandes de Oliveira's avatar Lucas Fernandes de Oliveira

Merge branch 'issue/66' into 'develop'

Issue #66: Refactor engine and adapter relationship

See merge request !64
parents ba1a7642 1e2b68b0
Pipeline #15786 failed with stages
in 1 minute and 6 seconds
......@@ -350,7 +350,7 @@ describe("postgres adapter", () => {
adapter.getDataFromView(view, (err, result) => {
expect(err).to.be.a("null");
expect(result).to.be.an("array");
expect(result).to.have.length(5);
expect(result).to.have.length(2);
expect(result[0]).to.be.an("object");
let keys: string[] = [];
keys = keys.concat(view.metrics.map((item) => item.name));
......@@ -369,7 +369,7 @@ describe("postgres adapter", () => {
adapter.getDataFromView(view, (err, result) => {
expect(err).to.be.a("null");
expect(result).to.be.an("array");
expect(result).to.have.length(5);
expect(result).to.have.length(1);
expect(result[0]).to.be.an("object");
let keys: string[] = [];
keys = keys.concat(view.metrics.map((item) => item.name));
......
......@@ -25,22 +25,12 @@ import { Dimension } from "../core/dimension";
import { Clause } from "../core/clause";
import { Filter, FilterOperator } from "../core/filter";
import { AggregationType, RelationType, DataType } from "../common/types";
import { Operation, Opcode } from "../common/expression";
import { View } from "../core/view";
interface ExpandedView {
dimensions: Dimension[];
metrics: Metric[];
dimMetrics: Metric[];
keys: Dimension[];
clauses: Clause[];
from: string;
id: string;
origin: boolean;
}
interface DimInfo {
dim: Dimension;
views: ExpandedView[];
views: View[];
}
interface DimTranslation {
......@@ -50,516 +40,152 @@ interface DimTranslation {
expanded: boolean;
}
interface QueryAndName {
query: string;
name: string;
}
interface DimAndNameMap {
dimensions: {[key: string]: DimInfo};
views: {[key: string]: View};
}
export abstract class SQLAdapter extends Adapter {
public getQueryFromView(view: View): string {
/*
Find the base (materialized) views that has this data and exapand
than (make a parse to the format used in the adapter)
*/
const materialized = this.searchMaterializedViews(view).sort((a, b) => {
return (a.id < b.id) ? -1 : 1;
}).map((item) => {
return {
id: item.id,
from: "view_" + item.id,
dimMetrics: [],
metrics: item.metrics.filter((i) => {
return view.metrics.some((j) => i.name === j.name);
}),
dimensions: item.dimensions,
keys: item.keys,
clauses: item.clauses,
origin: item.origin
};
const partials = this.buildPartials(view).filter((i) => {
return i.query !== "";
}).map((i) => {
return i.name + " AS (" + i.query + ")";
});
// Remove repeated views from the result
let partialJoin = [materialized[0]];
for (let i = 1; i < materialized.length; ++i) {
if (materialized[i - 1].id !== materialized[i].id) {
partialJoin.push(materialized[i]);
}
let withClause = "";
if (partials.length > 0) {
withClause = "WITH " + partials.join(", ") + " ";
}
const blackList = view.dimensions.map((i) => i.name);
for (let i = 0; i < view.clauses.length; ++i) {
if (view.clauses[i].filters.length === 1) {
let filter = view.clauses[i].filters[0];
if (filter.operator === FilterOperator.EQUAL) {
blackList.push(filter.target.name);
}
}
}
let sort = "";
if (view.sort.length > 0) {
// Sorting
const order = view.sort.map((item) => {
return "\"" + item.name + "\"";
}).join(",");
/*
If there is more than one source of data (tables/views)
a join is needed.
Partial Join represents how many sources still exists,
every join reduces this number.
*/
let clausesToCover = view.clauses.map((i) => i);
while (partialJoin.length > 1) {
/*
Variable map finds what dimenensions are still needed to
complete this query, they are required for 2 reasons.
1 - To make joins
2 - Because they are in the query
For each view that has this dimension we add one score to
this dimension, if they are in the query the same.
Automatically if the dimension is in the query there will be
at least one view with this atribute (or the query could not be
completed) so dimensions in the query always have score of
at least 2.
To make a join the dimension must be in 2 different views,
creating a score of 2 either.
If the score is less than 2 so this dimension is not required
anymore and can be removed.
*/
let map: { [key: string]: number } = {};
let partialsChange = false;
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions;
for (let k = 0; k < dims.length; ++k) {
if (!map[dims[k].name]) {
map[dims[k].name] = 1;
}
sort = " ORDER BY " + order;
}
else {
++map[dims[k].name];
}
}
}
return withClause + "SELECT * FROM " + this.viewName(view) + sort + ";";
}
for (let i = 0; i < view.dimensions.length; ++i) {
let dim = view.dimensions[i];
while (dim !== null) {
if (map[dim.name]) {
++map[dim.name];
}
dim = dim.parent;
private buildPartials(view: View): QueryAndName[] {
let op = view.operation;
let queue: View[] = op.values.map((i) => i);
const output: QueryAndName[] = [{
query: this.operation(op, view),
name: this.viewName(view)
}];
const map: {[key: string]: boolean } = {};
while (queue.length > 0) {
const partial = queue.shift();
if (!map[partial.id]) {
const query = this.operation(partial.operation, partial);
if (query !== "") {
map[partial.id] = true;
output.unshift({
query: query,
name: this.viewName(partial)
});
queue = queue.concat(partial.operation.values);
}
}
}
/*
Also mark scores for dimensions inside clauses
*/
for (let i = 0; i < clausesToCover.length; ++i) {
for (let j = 0; j < clausesToCover[i].targets.length; ++j) {
if (map[clausesToCover[i].targets[j].name]) {
++map[clausesToCover[i].targets[j].name];
}
}
}
return output;
}
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions.filter((item) => {
return map[item.name] > 1;
});
const keys = partialJoin[i].keys.filter((item) => {
return map[item.name] > 1;
});
/*
At this point the dimensions with less than score 2
are removed, if this happens the view is agreggated
again, with less dimensions, removing this dimension
from the view.
*/
let coveredClauses: Clause[] = [];
let notCoveredClauses: Clause[] = [];
/*
If all dimensions in a clause are a sub set of the
dimensions of a view, this clause is apllied now,
propagating the clause to this point.
Then this clause is removed from the set of clauses
*/
for (let j = 0; j < clausesToCover.length; ++j) {
if (clausesToCover[j].isCovered(partialJoin[i].dimensions)) {
coveredClauses.push(clausesToCover[j]);
}
else {
notCoveredClauses.push(clausesToCover[j]);
}
}
clausesToCover = notCoveredClauses.filter((clause) => {
return !partialJoin[i].clauses.some((c) => c.id === clause.id);
});
if (dims.length < partialJoin[i].dimensions.length || coveredClauses.length > 0) {
const partial = new View({
metrics: partialJoin[i].metrics,
dimensions: dims,
keys: keys,
origin: false,
clauses: coveredClauses.concat(partialJoin[i].clauses),
materialized: false
});
const from = "(" +
this.buildQuery(partial, [partialJoin[i]], false) +
") AS view_" + partial.id + "\n";
protected viewName(view: View): string {
return "view_" + view.id;
}
partialJoin[i].id = partial.id;
partialJoin[i].dimensions = partial.dimensions;
partialJoin[i].keys = partial.keys;
partialJoin[i].origin = partial.origin;
partialJoin[i].from = from;
private operation(op: Operation, view: View): string {
switch (op.opcode) {
case Opcode.REDUCE:
return this.buildOperation(view, op.values, false);
case Opcode.JOIN:
return this.buildOperation(view, op.values, true);
default:
// Is unnecessary make a push function. Push = materialized
// No need for materialized partials
return "";
}
}
partialsChange = true;
}
private buildOperation(view: View, partials: View[], isJoin: boolean): string {
// Mapping, which views the metrics and dimensions are
const mapping = this.buildMaps(partials);
// Projection
const metrics = view.metrics.map((i) => {
const sourceView = mapping.views[i.name];
return this.translateMetric(i, sourceView);
});
const dimensions = view.dimensions.map((dimension) => {
let dim = dimension;
while (!mapping.views[dim.name]) {
dim = dim.parent;
}
/*
If at least one of the views changed (have the number of
dimensions reduced) returns to the begining of the loop
again.
Othewise we need to make a join.
*/
if (!partialsChange) {
/*
Sorting the views by keys.
If the keys are identical, then they
will be in sequence, and views with identical
keys can be joined.
Sort an array of keys is the same as sort a
array of strings.
*/
const sorted = partialJoin.sort((a, b) => {
return this.compareKeys(a.keys, b.keys, blackList);
});
/*
SUPER WARNING: WHEN THE BLACK LIST IS USED THE VIEW IS
UNMATERIALIZEBLE, BUT THE QUERY CAN AGGREGATE THE VALUES
The blackList is the array of dimensions of the query plus
the dimensions in filters using the equality operator.
In further coments is expained that the relation to make
a join must be one-to-one between the tables.
However and a dimension is choosed, a sub view is
created and if the relation is preserved in the sub view
the query can be agregated, but this view cannot be re-used
so it is unmaterializeble.
The equality operator is the same as select one subview.
*/
/*
First of all, the remaining views are splited in segments.
A segment contains views with the same keys that are great
to make joins. Joins like this do not create "dimensional
metrics".
In joins like this one row of each view will be connected
with at most one row of each other table.
*/
const segment = [[sorted[0]]];
let segmentId = 0;
for (let i = 1; i < sorted.length; ++i) {
if (this.compareKeys(sorted[i - 1].keys, sorted[i].keys, blackList) === 0) {
segment[segmentId].push(sorted[i]);
}
else {
++segmentId;
segment.push([sorted[i]]);
}
}
partialJoin = [];
let ableToJoin = false;
for (let i = 0; i < segment.length; ++i) {
/*
If a segment has more than one view, a join can be made
*/
if (segment[i].length > 1) {
let mets: Metric[] = [];
let clauses: Clause[] = [];
let dims: Dimension[] = [];
let dimMetrics: Metric[] = [];
for (let j = 0; j < segment[i].length; ++j) {
mets = mets.concat(segment[i][j].metrics);
clauses = clauses.concat(segment[i][j].clauses);
dims = dims.concat(segment[i][j].dimensions);
dimMetrics = dimMetrics.concat(segment[i][j].dimMetrics);
}
dims = this.removeDuplicatedDimensions(dims);
/*
Its atributes are just concatenated and the
duplicates removed.
*/
const partial = new View({
metrics: mets,
dimensions: dims,
keys: segment[i][0].keys,
origin: false,
clauses: clauses,
materialized: false
});
const viewFrom = "(" +
this.buildQuery(partial, segment[i], false) +
") AS view_" + partial.id + "\n";
partialJoin.push({
id: partial.id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: partial.metrics,
dimensions: partial.dimensions,
keys: partial.keys,
clauses: partial.clauses,
origin: partial.origin
});
ableToJoin = true;
}
else {
/*
If the segment has just one view, anything can be
done at this point, so just reinsert this view in
set of views.
*/
partialJoin.push(segment[i][0]);
}
}
/*
If at least one join was made in the last part (a segment
with more than one view) than return to the begining of the
loop.
This permits after a join remove the dimensions that were
only choosen to this join, and are no longer required
Ideally the joins should be restrict the join method used
above, but in some cases this can not be done.
So if all the segments have only one view inside, move
to the next method.
*/
if (!ableToJoin) {
/*
At this point 2 views will be joined, first the
similarity with each pair of views is calculated,
the pair with the biggedt similarity will be joined.
Similarity is calculated with the number of common
dimensions in the keys.
*/
let similarity = 0;
let idx0 = 0;
let idx1 = 1;
for (let i = 0; i < partialJoin.length; ++i) {
for (let j = i + 1 ; j < partialJoin.length; ++j) {
const pi = partialJoin[i].keys;
const pj = partialJoin[j].keys;
let score = this.similarDimensions (pi, pj);
if (similarity < score) {
similarity = score;
idx0 = i;
idx1 = j;
}
}
}
const sourceView = mapping.views[dim.name];
return this.translateDimension(dimension, dim, sourceView);
});
const projElem = dimensions.map((i) => i.aliased).concat(metrics);
const partial0 = partialJoin[idx0];
const partial1 = partialJoin[idx1];
partialJoin.splice(idx1, 1);
partialJoin.splice(idx0, 1);
/*
Once the views are select they are joined with the
same method, concatenedted its atributes and
removing duplicates, however the nasty effect of
this join is the creation of "dimensional metrics".
"Dimensional metrics" are metrics that can no longer
be aggregated, and at this point to the end
of a query they will act as dimensions.
This change happens to avoid inconsistency generated
by a join where one row of one table can be connected
to more than one of other table.
Take this example.
View0 : metrics [met0], dimensions [dim0]
values: [{met0: 10, dim0: 1}]
View1 : metrics [met1], dimensions [dim2]
values: [{met1: 10, dim2: 1}. {met1: 5, dim2: 2}]
View2 : metrics [], dimensions [dim0, dim1, dim2]
values: [
{dim0: 1, dim1: 1, dim2: 1},
{dim0: 1, dim1: 1, dim2: 2}
]
The query is metrics [met0, met1] and dimensions [dim1]
First a join of View0 and View1 is made, the result
is: [
{dim0: 1, dim1: 1, dim2: 1, met0: 10},
{dim0: 1, dim1: 1, dim2: 2, met0: 10}
]
Note that the value of met0 is duplicated.
Now dim0 is removed, than joined with view2 resulting
in: [
{met1: 10, dim1: 1, dim2: 1, met0: 10},
{met1: 5 , dim1: 1, dim2: 2, met0: 10}
]
Lets assume that the agregation is SUM
If we remove dim2 and re-agregate the result is: [
{met1: 15, dim1: 1, met0: 20}
]
This result is wrong. The replication of the value
met0 affects the result.
See if met1 was not required, first the dimemnsion would
be reduced, left dim0 and dim1, than joined that reduced
again resulting in the value [
{dim1:1, met0: 10}
]
Is this case there is no duplication and the aggregation
does not include more rows than should.
To solve this problem the met0 must become a dimension,
in other words, not aggregated again. If the met0 was
not agregated in the query met0, met1, dim1 the result
is: [
{met1: 15, dim1: 1, met0: 10}
]
what is compatible.
After this extreme long explanation what must be
known is: Joining views with diferent keys
generate "dimensional metrics".
Views with "dimensional metrics" can not used for future
queries because can not be re-agregated, so this must be
avoided and is one-query only views.
*/
let dimMetrics: Metric[];
let mets: Metric[];
let dims = partial0.dimensions.concat(partial1.dimensions);
dims = this.removeDuplicatedDimensions(dims);
let keys = partial0.keys.concat(partial1.keys);
keys = this.removeDuplicatedDimensions(keys);
if (partial0.keys.length === similarity) {
/*
Here the metrics become dimensions, but the effect
can be reduced. If the keys of partial0
is a sub set of the keys ou partial1
than the number of rows of partial 1 is not
affected, in other words the metrics of partial1
can be aggregated and does not need to become
dimensions.
*/
partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics);
partial0.metrics = [];
mets = partial1.metrics;
}
// Grouping
const grouped = dimensions.map((item) => {
return (item.expanded) ? item.alias : item.noalias;
});
else if (partial1.keys.length === similarity) {
/*
The same occurs if the keys of partia1 is a subset
of partial0.
*/
partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics);
partial1.metrics = [];
mets = partial0.metrics;
}
// Selection
const conds = [];
const clauses = this.orphanClauses(view, partials);
for (let i = 0; i < clauses.length; ++i) {
const trClause = this.translateClause(clauses[i], mapping.views);
if (trClause) {
conds.push("(" + trClause + ")");
}
}
else {
/*
But if there is no sub set, than both sides have
the metrics turned in dimensions.
*/
partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics);
partial0.metrics = [];
partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics);
partial1.metrics = [];
mets = [];
// Joinning
if (isJoin) {
const dimMap = mapping.dimensions;
for (let i of Object.keys(dimMap)) {
if (dimMap[i].views.length > 1) {
const views = dimMap[i].views;
const dim = dimMap[i].dim;
const leftSide = this.buildColumn(dim, views.shift().id);
while (views.length > 0) {
const rightSide = this.buildColumn(dim, views.shift().id);
conds.push("(" + leftSide + "=" + rightSide + ")");
}
dimMetrics = partial0.dimMetrics.concat(partial1.dimMetrics);
const partial = new View({
metrics: mets,
dimensions: dims,
keys: keys,
origin: false,
clauses: partial0.clauses.concat(partial1.clauses),
materialized: false
});
const id = new View({
metrics: mets.concat(dimMetrics),
dimensions: dims,
keys: keys,
origin: false,
clauses: partial0.clauses.concat(partial1.clauses),
materialized: false
}).id;
const viewFrom = "(" +
this.buildQuery(partial, [partial0, partial1], false) +
") AS view_" + id + "\n";
partialJoin.push({
id: id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: mets,
dimensions: dims,
keys: keys,
clauses: partial.clauses,
origin: false
});
}
}
}
/*
When only one view remain, the query is made and a ;
is added at the end.
TODO: Probrably this last line adds one more
layer to the query, that is in fact unnecessary.
Think a way to remove-it.
*/
return this.buildQuery(view, partialJoin, true) + ";";
}
private searchMaterializedViews(view: View): View[] {
let r: View[] = [];
if (view.materialized) {
return [view];
}
// Assembly
const projection = "SELECT " + projElem.join(", ");
const sources = "FROM " + partials.map((i) => this.viewName(i)).join(", ");
const selection = (conds.length > 0) ? " WHERE " + conds.join(" AND ") : "";
else {
let children = view.childViews;
for (let i = 0; i < children.length; ++i) {
r = r.concat(this.searchMaterializedViews(children[i]));
}
let grouping = "";
if (grouped.length > 0) {
grouping = " GROUP BY " + grouped.join(",");
}
return r;
return projection + sources + selection + grouping;
}
private buildQuery(target: View, views: ExpandedView[], toSort: boolean): string {
const metrics = target.metrics;
const dimensions = target.dimensions;
const clauses = target.clauses;
const sort = target.sort;
private buildMaps(views: View[]): DimAndNameMap {
let dimMap: {[key: string]: DimInfo} = {};
let nameMap: {[key: string]: ExpandedView} = {};
let nameMap: {[key: string]: View} = {};
for (let i = 0; i < views.length; ++i) {
const mets = views[i].metrics;
......@@ -586,95 +212,20 @@ export abstract class SQLAdapter extends Adapter {
}
}
// Projection
const strMetrics = metrics.map((metric) => {
const view = nameMap[metric.name];
if (view) {
return this.translateMetric(metric, view);
}
return "";
}).filter((item) => item !== "");
const parsedDimensions = dimensions.map((dimension) => {
let dim = dimension;
while (!nameMap[dim.name]) {
dim = dim.parent;
}
const view = nameMap[dim.name];
return this.translateDimension(dimension, dim, view);
});
let parsedDimMetrics: DimTranslation[] = [];
for (let i = 0; i < views.length; ++i) {
const dimMets = views[i].dimMetrics.map((item) => {
return this.translateDimMetric(item, views[i]);
});
parsedDimMetrics = parsedDimMetrics.concat(dimMets);
}
const totalDimensions = parsedDimensions.concat(parsedDimMetrics);
const strDimensions = totalDimensions.map ((item) => item.aliased);