Issue #41: Fix join in adapter

Signed-off-by: Lucas Fernandes de Oliveira's avatarLucas Fernandes de Oliveira <lfo14@inf.ufpr.br>
parent f8295d6d
Pipeline #12075 passed with stage
in 42 seconds
......@@ -25,6 +25,7 @@ schema:
- "met:0"
- "met:1"
- "met:2"
- "met:10"
-
alias: "View 1"
data: "test/postgres/fixtures/view1.json"
......@@ -44,6 +45,7 @@ schema:
metrics:
- "met:5"
- "met:6"
- "met:11"
-
alias: "View 3"
data: "test/postgres/fixtures/view3.json"
......@@ -93,8 +95,6 @@ schema:
- "dim:6"
metrics:
- "met:9"
- "met:10"
- "met:11"
-
alias: "view 9"
data: "test/postgres/fixtures/view9.json"
......
......@@ -319,4 +319,42 @@ describe("postgres adapter", () => {
done();
});
});
it("should get data from a unmaterializeble view", (done) => {
let view = adapterScenario.unMaterializebleView;
adapter.getDataFromView(view, (err, result) => {
expect(err).to.be.a("null");
expect(result).to.be.an("array");
expect(result).to.have.length(5);
expect(result[0]).to.be.an("object");
let keys: string[] = [];
keys = keys.concat(view.metrics.map((item) => item.name));
keys = keys.concat(view.dimensions.map((item) => item.name));
result.forEach((row) => {
expect(row).to.be.an("object");
expect(row).to.have.all.keys(keys);
});
done();
});
});
it("should get data from partial joins", (done) => {
let view = adapterScenario.partialJoinView;
adapter.getDataFromView(view, (err, result) => {
expect(err).to.be.a("null");
expect(result).to.be.an("array");
expect(result).to.have.length(5);
expect(result[0]).to.be.an("object");
let keys: string[] = [];
keys = keys.concat(view.metrics.map((item) => item.name));
keys = keys.concat(view.dimensions.map((item) => item.name));
result.forEach((row) => {
expect(row).to.be.an("object");
expect(row).to.have.all.keys(keys);
});
done();
});
});
});
......@@ -27,9 +27,25 @@ import { AggregationType, RelationType } from "../common/types";
import { View } from "../core/view";
import { Pool, PoolConfig } from "pg";
interface ExpandedView {
dimensions: Dimension[];
metrics: Metric[];
dimMetrics: Metric[];
keys: Dimension[];
clauses: Clause[];
from: string;
id: string;
origin: boolean;
}
interface DimInfo {
dim: Dimension;
views: View[];
views: ExpandedView[];
}
interface DimTranslation {
aliased: string;
noalias: string;
}
export class PostgresAdapter extends Adapter {
......@@ -60,18 +76,422 @@ export class PostgresAdapter extends Adapter {
}
public getQueryFromView(view: View): string {
/*
Find the base (materialized) views that has this data and exapand
than (make a parse to the format used in the adapter)
*/
const materialized = this.searchMaterializedViews(view).sort((a, b) => {
return (a.id < b.id) ? -1 : 1;
}).map((item) => {
return {
id: item.id,
from: "view_" + item.id,
dimMetrics: [],
metrics: item.metrics.filter((i) => {
return view.metrics.some((j) => i.name === j.name);
}),
dimensions: item.dimensions,
keys: item.keys,
clauses: item.clauses,
origin: item.origin
};
});
const unique = [materialized[0]];
// Remove repeated views from the result
let partialJoin = [materialized[0]];
for (let i = 1; i < materialized.length; ++i) {
if (materialized[i - 1].id !== materialized[i].id) {
unique.push(materialized[i]);
partialJoin.push(materialized[i]);
}
}
return this.buildQuery(view, unique);
/*
If there is more than one source of data (tables/views)
a join is needed.
Partial Join represents how many sources still exists,
every join reduces this number.
*/
while (partialJoin.length > 1) {
/*
Variable map finds what dimenensions are still needed to
complete this query, they are required for 2 reasons.
1 - To make joins
2 - Because they are in the query
For each view that has this dimension we add one score to
this dimension, if they are in the query the same.
Automatically if the dimension is in the query there will be
at least one view with this atribute (or the query could not be
completed) so dimensions in the query always have score of
at least 2.
To make a join the dimension must be in 2 different views,
creating a score of 2 either.
If the score is less than 2 so this dimension is not required
anymore and can be removed.
*/
let map: { [key: string]: number } = {};
let partialsChange = false;
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions;
for (let k = 0; k < dims.length; ++k) {
if (!map[dims[k].name]) {
map[dims[k].name] = 1;
}
else {
++map[dims[k].name];
}
}
}
for (let i = 0; i < view.dimensions.length; ++i) {
let dim = view.dimensions[i];
while (dim !== null) {
if (map[dim.name]) {
++map[dim.name];
}
dim = dim.parent;
}
}
for (let i = 0; i < partialJoin.length; ++i) {
const dims = partialJoin[i].dimensions.filter((item) => {
return map[item.name] > 1;
});
const keys = partialJoin[i].keys.filter((item) => {
return map[item.name] > 1;
});
/*
At this point the dimensions with less than score 2
are removed, if this happens the view is agreggated
again, with less dimensions, removing this dimension
from the view.
*/
if (dims.length < partialJoin[i].dimensions.length) {
const partial = new View({
metrics: partialJoin[i].metrics,
dimensions: dims,
keys: keys,
origin: false,
clauses: partialJoin[i].clauses,
materialized: false
});
const from = "(" +
this.buildQuery(partial, [partialJoin[i]]) +
") AS view_" + partial.id + "\n";
partialJoin[i].id = partial.id;
partialJoin[i].dimensions = partial.dimensions;
partialJoin[i].keys = partial.keys;
partialJoin[i].origin = partial.origin;
partialJoin[i].from = from;
partialsChange = true;
}
}
/*
If at least one of the views changed (have the number of
dimensions reduced) returns to the begining of the loop
again.
Othewise we need to make a join.
*/
if (!partialsChange) {
/*
Sorting the views by keys.
If the keys are identical, then they
will be in sequence, and views with identical
keys can be joined.
Sort an array of keys is the same as sort a
array of strings.
*/
const sorted = partialJoin.sort((a, b) => {
return this.compareKeys(a.keys, b.keys);
});
/*
First of all, the remaining views are splited in segments.
A segment contains views with the same keys that are great
to make joins. Joins like this do not create "dimensional
metrics".
In joins like this one row of each view will be connected
with at most one row of each other table.
*/
const segment = [[sorted[0]]];
let segmentId = 0;
for (let i = 1; i < sorted.length; ++i) {
if (this.compareKeys(sorted[i - 1].keys, sorted[i].keys) === 0) {
segment[segmentId].push(sorted[i]);
}
else {
++segmentId;
segment.push([sorted[i]]);
}
}
partialJoin = [];
let ableToJoin = false;
for (let i = 0; i < segment.length; ++i) {
/*
If a segment has more than one view, a join can be made
*/
if (segment[i].length > 1) {
let mets: Metric[] = [];
let clauses: Clause[] = [];
let dims: Dimension[] = [];
let dimMetrics: Metric[] = [];
for (let j = 0; j < segment[i].length; ++j) {
mets = mets.concat(segment[i][j].metrics);
clauses = clauses.concat(segment[i][j].clauses);
dims = dims.concat(segment[i][j].dimensions);
dimMetrics = dimMetrics.concat(segment[i][j].dimMetrics);
}
dims = this.removeDuplicatedDimensions(dims);
/*
Its atributes are just concatenated and the
duplicates removed.
*/
const partial = new View({
metrics: mets,
dimensions: dims,
keys: segment[i][0].keys,
origin: false,
clauses: clauses,
materialized: false
});
const viewFrom = "(" +
this.buildQuery(partial, segment[i]) +
") AS view_" + partial.id + "\n";
partialJoin.push({
id: partial.id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: partial.metrics,
dimensions: partial.dimensions,
keys: partial.keys,
clauses: partial.clauses,
origin: partial.origin
});
ableToJoin = true;
}
else {
/*
If the segment has just one view, anything can be
done at this point, so just reinsert this view in
set of views.
*/
partialJoin.push(segment[i][0]);
}
}
/*
If at least one join was made in the last part (a segment
with more than one view) than return to the begining of the
loop.
This permits after a join remove the dimensions that were
only choosen to this join, and are no longer required
Ideally the joins should be restrict the join method used
above, but in some cases this can not be done.
So if all the segments have only one view inside, move
to the next method.
*/
if (!ableToJoin) {
/*
At this point 2 views will be joined, first the
similarity with each pair of views is calculated,
the pair with the biggedt similarity will be joined.
Similarity is calculated with the number of common
dimensions in the keys.
*/
let similarity = 0;
let idx0 = 0;
let idx1 = 1;
for (let i = 0; i < partialJoin.length; ++i) {
for (let j = i + 1 ; j < partialJoin.length; ++j) {
const pi = partialJoin[i].keys;
const pj = partialJoin[j].keys;
let score = this.similarDimensions (pi, pj);
if (similarity < score) {
similarity = score;
idx0 = i;
idx1 = j;
}
}
}
const partial0 = partialJoin[idx0];
const partial1 = partialJoin[idx1];
partialJoin.splice(idx1, 1);
partialJoin.splice(idx0, 1);
/*
Once the views are select they are joined with the
same method, concatenedted its atributes and
removing duplicates, however the nasty effect of
this join is the creation of "dimensional metrics".
"Dimensional metrics" are metrics that can no longer
be aggregated, and at this point to the end
of a query they will act as dimensions.
This change happens to avoid inconsistency generated
by a join where one row of one table can be connected
to more than one of other table.
Take this example.
View0 : metrics [met0], dimensions [dim0]
values: [{met0: 10, dim0: 1}]
View1 : metrics [met1], dimensions [dim2]
values: [{met1: 10, dim2: 1}. {met1: 5, dim2: 2}]
View2 : metrics [], dimensions [dim0, dim1, dim2]
values: [
{dim0: 1, dim1: 1, dim2: 1},
{dim0: 1, dim1: 1, dim2: 2}
]
The query is metrics [met0, met1] and dimensions [dim1]
First a join of View0 and View1 is made, the result
is: [
{dim0: 1, dim1: 1, dim2: 1, met0: 10},
{dim0: 1, dim1: 1, dim2: 2, met0: 10}
]
Note that the value of met0 is duplicated.
Now dim0 is removed, than joined with view2 resulting
in: [
{met1: 10, dim1: 1, dim2: 1, met0: 10},
{met1: 5 , dim1: 1, dim2: 2, met0: 10}
]
Lets assume that the agregation is SUM
If we remove dim2 and re-agregate the result is: [
{met1: 15, dim1: 1, met0: 20}
]
This result is wrong. The replication of the value
met0 affects the result.
See if met1 was not required, first the dimemnsion would
be reduced, left dim0 and dim1, than joined that reduced
again resulting in the value [
{dim1:1, met0: 10}
]
Is this case there is no duplication and the aggregation
does not include more rows than should.
To solve this problem the met0 must become a dimension,
in other words, not aggregated again. If the met0 was
not agregated in the query met0, met1, dim1 the result
is: [
{met1: 15, dim1: 1, met0: 10}
]
what is compatible.
After this extreme long explanation what must be
known is: Joining views with diferent keys
generate "dimensional metrics".
Views with "dimensional metrics" can not used for future
queries because can not be re-agregated, so this must be
avoided and is one-query only views.
*/
let dimMetrics: Metric[];
let mets: Metric[];
let dims = partial0.dimensions.concat(partial1.dimensions);
dims = this.removeDuplicatedDimensions(dims);
let keys = partial0.keys.concat(partial1.keys);
keys = this.removeDuplicatedDimensions(keys);
if (partial0.keys.length === similarity) {
/*
Here the metrics become dimensions, but the effect
can be reduced. If the keys of partial0
is a sub set of the keys ou partial1
than the number of rows of partial 1 is not
affected, in other words the metrics of partial1
can be aggregated and does not need to become
dimensions.
*/
partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics);
partial0.metrics = [];
mets = partial1.metrics;
}
else if (partial1.keys.length === similarity) {
/*
The same occurs if the keys of partia1 is a subset
of partial0.
*/
partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics);
partial1.metrics = [];
mets = partial0.metrics;
}
else {
/*
But if there is no sub set, than both sides have
the metrics turned in dimensions.
*/
partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics);
partial0.metrics = [];
partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics);
partial1.metrics = [];
mets = [];
}
dimMetrics = partial0.dimMetrics.concat(partial1.dimMetrics);
const partial = new View({
metrics: mets,
dimensions: dims,
keys: keys,
origin: false,
clauses: partial0.clauses.concat(partial1.clauses),
materialized: false
});
const viewFrom = "(" +
this.buildQuery(partial, [partial0, partial1]) +
") AS view_" + partial0.id + partial1.id + "\n";
partialJoin.push({
id: partial0.id + partial1.id,
from: viewFrom,
dimMetrics: dimMetrics,
metrics: mets,
dimensions: dims,
keys: keys,
clauses: partial.clauses,
origin: false
});
}
}
}
/*
When only one view remain, the query is made and a ;
is added at the end.
TODO: Probrably this last line adds one more
layer to the query, that is in fact unnecessary.
Think a way to remove-it.
*/
return this.buildQuery(view, partialJoin) + ";";
}
private searchMaterializedViews(view: View): View[] {
......@@ -90,27 +510,22 @@ export class PostgresAdapter extends Adapter {
return r;
}
private buildQuery(target: View, views: View[]) {
private buildQuery(target: View, views: ExpandedView[]) {
const metrics = target.metrics;
const dimensions = target.dimensions;
const clauses = target.clauses;
let dimMap: {[key: string]: DimInfo} = {};
let metMap: {[key: string]: View[]} = {};
let nameMap: {[key: string]: View} = {};
let nameMap: {[key: string]: ExpandedView} = {};
for (let i = 0; i < views.length; ++i) {
const mets = views[i].metrics;
const dims = views[i].dimensions;
for (let j = 0; j < mets.length; ++j) {
if (!metMap[mets[j].name]) {
metMap[mets[j].name] = [views[i]];
if (!nameMap[mets[j].name]) {
nameMap[mets[j].name] = views[i];
}
else {
metMap[mets[j].name].push(views[i]);
}
}
for (let j = 0; j < dims.length; ++j) {
......@@ -131,11 +546,12 @@ export class PostgresAdapter extends Adapter {
// Projection
const strMetrics = metrics.map((metric) => {
const view = nameMap[metric.name];
let func = this.getAggregateFunction(metric.aggregation, view.origin);
let quotedName = "\"" + metric.name + "\"";
let extMetric = func + "(" + this.buildColumn(metric, view.id) + ")";
return extMetric + " AS " + quotedName;
});
if (view) {
return this.translateMetric(metric, view);
}
return "";
}).filter((item) => item !== "");
const parsedDimensions = dimensions.map((dimension) => {
let dim = dimension;
......@@ -143,18 +559,23 @@ export class PostgresAdapter extends Adapter {
dim = dim.parent;
}
const view = nameMap[dim.name];
const quotedName = "\"" + dimension.name + "\"";
let extDimension = this.buildColumn(dim, view.id);
let aux = dimension;
while (aux.name !== dim.name) {
extDimension = this.translateRelation(aux.relation, extDimension);
aux = aux.parent;
}
return { aliased: extDimension + " AS " + quotedName, noalias: extDimension };
return this.translateDimension(dimension, dim, view);
});
const strDimensions = parsedDimensions.map ((item) => item.aliased);
const grouped = parsedDimensions.map((item) => item.noalias);
let parsedDimMetrics: DimTranslation[] = [];
for (let i = 0; i < views.length; ++i) {
const dimMets = views[i].dimMetrics.map((item) => {
return this.translateDimMetric(item, views[i]);
});
parsedDimMetrics = parsedDimMetrics.concat(dimMets);
}
const totalDimensions = parsedDimensions.concat(parsedDimMetrics);
const strDimensions = totalDimensions.map ((item) => item.aliased);
const grouped = totalDimensions.map((item) => item.noalias);
const elements = strMetrics.concat(strDimensions);
// Joins
......@@ -194,14 +615,14 @@ export class PostgresAdapter extends Adapter {
// Assembly
const projection = "SELECT " + elements.join(",");
const source = " FROM " + views.map((view) => "view_" + view.id).join(",");
const source = " FROM " + views.map((view) => view.from).join(",");
const selection = (conds.length > 0) ? " WHERE " + conds.join(" AND ") : "";
let grouping = "";
if (grouped.length > 0) {
grouping = " GROUP BY " + grouped.join(",");
}
return projection + source + selection + grouping + ";";
return projection + source + selection + grouping;
}
......@@ -262,7 +683,7 @@ export class PostgresAdapter extends Adapter {
return "view_" + id + "." + quotedName;
}
private translateClause(clause: Clause, map: {[key: string]: View}): string {
private translateClause(clause: Clause, map: {[key: string]: ExpandedView}): string {
const r = clause.filters.map((item) => {
return this.translateFilter(item, map);
}).filter((item) => {
......@@ -271,7 +692,7 @@ export class PostgresAdapter extends Adapter {
return r.join(" OR ");
}
private translateFilter(filter: Filter, map: {[key: string]: View}): string {
private translateFilter(filter: Filter, map: {[key: string]: ExpandedView}): string {
if (!map[filter.target.name]) {
return "";
}
......@@ -284,6 +705,32 @@ export class PostgresAdapter extends Adapter {
return leftSide + op + quotedValue + dataType;
}
private translateMetric(metric: Metric, view: ExpandedView): string {
const func = this.getAggregateFunction(metric.aggregation, view.origin);
const quotedName = "\"" + metric.name + "\"";
const extMetric = func + "(" + this.buildColumn(metric, view.id) + ")";
return extMetric + " AS " + quotedName;
}
private translateDimMetric(metric: Metric, view: ExpandedView): DimTranslation {
const quotedName = "\"" + metric.name + "\"";
const extMetric = this.buildColumn(metric, view.id);
return { aliased: extMetric + " AS " + quotedName, noalias: extMetric };
}
private translateDimension(dimension: Dimension,
ancestor: Dimension,
view: ExpandedView): DimTranslation {
const quotedName = "\"" + dimension.name + "\"";
let extDimension = this.buildColumn(ancestor, view.id);
let aux = dimension;
while (aux.name !== ancestor.name) {
extDimension = this.translateRelation(aux.relation, extDimension);
aux = aux.parent;
}
return { aliased: extDimension + " AS " + quotedName, noalias: extDimension };
}
private translateOperator(op: FilterOperator): string {
switch (op) {
case FilterOperator.EQUAL:
......@@ -316,4 +763,49 @@ export class PostgresAdapter extends Adapter {
return "";
}
}
private compareKeys(a: Dimension[], b: Dimension[]): number {
let length = 0;
let res = a.length - b.length;
if (a.length < b.length) {
length = a.length;
}
else {
length = b.length;
}
for (let i = 0; i < length; ++i) {
if (a[i].name < b[i].name) {
return -1;
}
else if (a[i].name > b[i].name) {
return 1;
}
}
return res;
}
private similarDimensions(a: Dimension[], b: Dimension[]): number {
let count = 0;
for (let i = 0; i < a.length; ++i) {
if (b.some((itemB) => a[i].name === itemB.name)) {
count++;
}
}
return count;
}
private removeDuplicatedDimensions(candidateDims: Dimension[]): Dimension[] {
let filterDims: { [key: string]: boolean } = {};
const dims = [];
for (let i = 0; i < candidateDims.length; ++i) {
if (!filterDims[candidateDims[i].name]) {
dims.push(candidateDims[i]);
filterDims[candidateDims[i].name] = true;
}
}
return dims;