/* * Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre * Departamento de Informatica - Universidade Federal do Parana * * This file is part of blend. * * blend is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * blend is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with blend. If not, see . */ import { Adapter } from "../core/adapter"; import { Metric } from "../core/metric"; import { Dimension } from "../core/dimension"; import { Clause } from "../core/clause"; import { Filter, FilterOperator } from "../core/filter"; import { AggregationType, RelationType } from "../common/types"; import { View } from "../core/view"; import { Pool, PoolConfig } from "pg"; interface ExpandedView { dimensions: Dimension[]; metrics: Metric[]; dimMetrics: Metric[]; keys: Dimension[]; clauses: Clause[]; from: string; id: string; origin: boolean; } interface DimInfo { dim: Dimension; views: ExpandedView[]; } interface DimTranslation { aliased: string; noalias: string; } export class PostgresAdapter extends Adapter { private pool: Pool; constructor (config: PoolConfig) { super(); this.pool = new Pool(config); } public getDataFromView(view: View, cb: (error: Error, result?: any[]) => void): void { const query = this.getQueryFromView(view); this.pool.connect((err, client, done) => { if (err) { cb (err); return; } client.query(query, [], (error, result) => { // call 'done()' to release client back to pool done(); cb(error, (result) ? result.rows : null); }); }); } public materializeView(view: View): boolean { return false; } public getQueryFromView(view: View): string { /* Find the base (materialized) views that has this data and exapand than (make a parse to the format used in the adapter) */ const materialized = this.searchMaterializedViews(view).sort((a, b) => { return (a.id < b.id) ? -1 : 1; }).map((item) => { return { id: item.id, from: "view_" + item.id, dimMetrics: [], metrics: item.metrics.filter((i) => { return view.metrics.some((j) => i.name === j.name); }), dimensions: item.dimensions, keys: item.keys, clauses: item.clauses, origin: item.origin }; }); // Remove repeated views from the result let partialJoin = [materialized[0]]; for (let i = 1; i < materialized.length; ++i) { if (materialized[i - 1].id !== materialized[i].id) { partialJoin.push(materialized[i]); } } const blackList = view.dimensions.map((i) => i.name); for (let i = 0; i < view.clauses.length; ++i) { if (view.clauses[i].filters.length === 1) { let filter = view.clauses[i].filters[0]; if (filter.operator === FilterOperator.EQUAL) { blackList.push(filter.target.name); } } } /* If there is more than one source of data (tables/views) a join is needed. Partial Join represents how many sources still exists, every join reduces this number. */ let clausesToCover = view.clauses.map((i) => i); while (partialJoin.length > 1) { /* Variable map finds what dimenensions are still needed to complete this query, they are required for 2 reasons. 1 - To make joins 2 - Because they are in the query For each view that has this dimension we add one score to this dimension, if they are in the query the same. Automatically if the dimension is in the query there will be at least one view with this atribute (or the query could not be completed) so dimensions in the query always have score of at least 2. To make a join the dimension must be in 2 different views, creating a score of 2 either. If the score is less than 2 so this dimension is not required anymore and can be removed. */ let map: { [key: string]: number } = {}; let partialsChange = false; for (let i = 0; i < partialJoin.length; ++i) { const dims = partialJoin[i].dimensions; for (let k = 0; k < dims.length; ++k) { if (!map[dims[k].name]) { map[dims[k].name] = 1; } else { ++map[dims[k].name]; } } } for (let i = 0; i < view.dimensions.length; ++i) { let dim = view.dimensions[i]; while (dim !== null) { if (map[dim.name]) { ++map[dim.name]; } dim = dim.parent; } } /* Also mark scores for dimensions inside clauses */ for (let i = 0; i < clausesToCover.length; ++i) { for (let j = 0; j < clausesToCover[i].targets.length; ++j) { if (map[clausesToCover[i].targets[j].name]) { ++map[clausesToCover[i].targets[j].name]; } } } for (let i = 0; i < partialJoin.length; ++i) { const dims = partialJoin[i].dimensions.filter((item) => { return map[item.name] > 1; }); const keys = partialJoin[i].keys.filter((item) => { return map[item.name] > 1; }); /* At this point the dimensions with less than score 2 are removed, if this happens the view is agreggated again, with less dimensions, removing this dimension from the view. */ let coveredClauses: Clause[] = []; let notCoveredClauses: Clause[] = []; /* If all dimensions in a clause are a sub set of the dimensions of a view, this clause is apllied now, propagating the clause to this point. Then this clause is removed from the set of clauses */ for (let j = 0; j < clausesToCover.length; ++j) { if (clausesToCover[j].isCovered(partialJoin[i].dimensions)) { coveredClauses.push(clausesToCover[j]); } else { notCoveredClauses.push(clausesToCover[j]); } } clausesToCover = notCoveredClauses.filter((clause) => { return !partialJoin[i].clauses.some((c) => c.id === clause.id); }); if (dims.length < partialJoin[i].dimensions.length || coveredClauses.length > 0) { const partial = new View({ metrics: partialJoin[i].metrics, dimensions: dims, keys: keys, origin: false, clauses: coveredClauses.concat(partialJoin[i].clauses), materialized: false }); const from = "(" + this.buildQuery(partial, [partialJoin[i]]) + ") AS view_" + partial.id + "\n"; partialJoin[i].id = partial.id; partialJoin[i].dimensions = partial.dimensions; partialJoin[i].keys = partial.keys; partialJoin[i].origin = partial.origin; partialJoin[i].from = from; partialsChange = true; } } /* If at least one of the views changed (have the number of dimensions reduced) returns to the begining of the loop again. Othewise we need to make a join. */ if (!partialsChange) { /* Sorting the views by keys. If the keys are identical, then they will be in sequence, and views with identical keys can be joined. Sort an array of keys is the same as sort a array of strings. */ const sorted = partialJoin.sort((a, b) => { return this.compareKeys(a.keys, b.keys, blackList); }); /* SUPER WARNING: WHEN THE BLACK LIST IS USED THE VIEW IS UNMATERIALIZEBLE, BUT THE QUERY CAN AGGREGATE THE VALUES The blackList is the array of dimensions of the query plus the dimensions in filters using the equality operator. In further coments is expained that the relation to make a join must be one-to-one between the tables. However and a dimension is choosed, a sub view is created and if the relation is preserved in the sub view the query can be agregated, but this view cannot be re-used so it is unmaterializeble. The equality operator is the same as select one subview. */ /* First of all, the remaining views are splited in segments. A segment contains views with the same keys that are great to make joins. Joins like this do not create "dimensional metrics". In joins like this one row of each view will be connected with at most one row of each other table. */ const segment = [[sorted[0]]]; let segmentId = 0; for (let i = 1; i < sorted.length; ++i) { if (this.compareKeys(sorted[i - 1].keys, sorted[i].keys, blackList) === 0) { segment[segmentId].push(sorted[i]); } else { ++segmentId; segment.push([sorted[i]]); } } partialJoin = []; let ableToJoin = false; for (let i = 0; i < segment.length; ++i) { /* If a segment has more than one view, a join can be made */ if (segment[i].length > 1) { let mets: Metric[] = []; let clauses: Clause[] = []; let dims: Dimension[] = []; let dimMetrics: Metric[] = []; for (let j = 0; j < segment[i].length; ++j) { mets = mets.concat(segment[i][j].metrics); clauses = clauses.concat(segment[i][j].clauses); dims = dims.concat(segment[i][j].dimensions); dimMetrics = dimMetrics.concat(segment[i][j].dimMetrics); } dims = this.removeDuplicatedDimensions(dims); /* Its atributes are just concatenated and the duplicates removed. */ const partial = new View({ metrics: mets, dimensions: dims, keys: segment[i][0].keys, origin: false, clauses: clauses, materialized: false }); const viewFrom = "(" + this.buildQuery(partial, segment[i]) + ") AS view_" + partial.id + "\n"; partialJoin.push({ id: partial.id, from: viewFrom, dimMetrics: dimMetrics, metrics: partial.metrics, dimensions: partial.dimensions, keys: partial.keys, clauses: partial.clauses, origin: partial.origin }); ableToJoin = true; } else { /* If the segment has just one view, anything can be done at this point, so just reinsert this view in set of views. */ partialJoin.push(segment[i][0]); } } /* If at least one join was made in the last part (a segment with more than one view) than return to the begining of the loop. This permits after a join remove the dimensions that were only choosen to this join, and are no longer required Ideally the joins should be restrict the join method used above, but in some cases this can not be done. So if all the segments have only one view inside, move to the next method. */ if (!ableToJoin) { /* At this point 2 views will be joined, first the similarity with each pair of views is calculated, the pair with the biggedt similarity will be joined. Similarity is calculated with the number of common dimensions in the keys. */ let similarity = 0; let idx0 = 0; let idx1 = 1; for (let i = 0; i < partialJoin.length; ++i) { for (let j = i + 1 ; j < partialJoin.length; ++j) { const pi = partialJoin[i].keys; const pj = partialJoin[j].keys; let score = this.similarDimensions (pi, pj); if (similarity < score) { similarity = score; idx0 = i; idx1 = j; } } } const partial0 = partialJoin[idx0]; const partial1 = partialJoin[idx1]; partialJoin.splice(idx1, 1); partialJoin.splice(idx0, 1); /* Once the views are select they are joined with the same method, concatenedted its atributes and removing duplicates, however the nasty effect of this join is the creation of "dimensional metrics". "Dimensional metrics" are metrics that can no longer be aggregated, and at this point to the end of a query they will act as dimensions. This change happens to avoid inconsistency generated by a join where one row of one table can be connected to more than one of other table. Take this example. View0 : metrics [met0], dimensions [dim0] values: [{met0: 10, dim0: 1}] View1 : metrics [met1], dimensions [dim2] values: [{met1: 10, dim2: 1}. {met1: 5, dim2: 2}] View2 : metrics [], dimensions [dim0, dim1, dim2] values: [ {dim0: 1, dim1: 1, dim2: 1}, {dim0: 1, dim1: 1, dim2: 2} ] The query is metrics [met0, met1] and dimensions [dim1] First a join of View0 and View1 is made, the result is: [ {dim0: 1, dim1: 1, dim2: 1, met0: 10}, {dim0: 1, dim1: 1, dim2: 2, met0: 10} ] Note that the value of met0 is duplicated. Now dim0 is removed, than joined with view2 resulting in: [ {met1: 10, dim1: 1, dim2: 1, met0: 10}, {met1: 5 , dim1: 1, dim2: 2, met0: 10} ] Lets assume that the agregation is SUM If we remove dim2 and re-agregate the result is: [ {met1: 15, dim1: 1, met0: 20} ] This result is wrong. The replication of the value met0 affects the result. See if met1 was not required, first the dimemnsion would be reduced, left dim0 and dim1, than joined that reduced again resulting in the value [ {dim1:1, met0: 10} ] Is this case there is no duplication and the aggregation does not include more rows than should. To solve this problem the met0 must become a dimension, in other words, not aggregated again. If the met0 was not agregated in the query met0, met1, dim1 the result is: [ {met1: 15, dim1: 1, met0: 10} ] what is compatible. After this extreme long explanation what must be known is: Joining views with diferent keys generate "dimensional metrics". Views with "dimensional metrics" can not used for future queries because can not be re-agregated, so this must be avoided and is one-query only views. */ let dimMetrics: Metric[]; let mets: Metric[]; let dims = partial0.dimensions.concat(partial1.dimensions); dims = this.removeDuplicatedDimensions(dims); let keys = partial0.keys.concat(partial1.keys); keys = this.removeDuplicatedDimensions(keys); if (partial0.keys.length === similarity) { /* Here the metrics become dimensions, but the effect can be reduced. If the keys of partial0 is a sub set of the keys ou partial1 than the number of rows of partial 1 is not affected, in other words the metrics of partial1 can be aggregated and does not need to become dimensions. */ partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics); partial0.metrics = []; mets = partial1.metrics; } else if (partial1.keys.length === similarity) { /* The same occurs if the keys of partia1 is a subset of partial0. */ partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics); partial1.metrics = []; mets = partial0.metrics; } else { /* But if there is no sub set, than both sides have the metrics turned in dimensions. */ partial0.dimMetrics = partial0.dimMetrics.concat(partial0.metrics); partial0.metrics = []; partial1.dimMetrics = partial1.dimMetrics.concat(partial1.metrics); partial1.metrics = []; mets = []; } dimMetrics = partial0.dimMetrics.concat(partial1.dimMetrics); const partial = new View({ metrics: mets, dimensions: dims, keys: keys, origin: false, clauses: partial0.clauses.concat(partial1.clauses), materialized: false }); const id = new View({ metrics: mets.concat(dimMetrics), dimensions: dims, keys: keys, origin: false, clauses: partial0.clauses.concat(partial1.clauses), materialized: false }).id; const viewFrom = "(" + this.buildQuery(partial, [partial0, partial1]) + ") AS view_" + id + "\n"; partialJoin.push({ id: id, from: viewFrom, dimMetrics: dimMetrics, metrics: mets, dimensions: dims, keys: keys, clauses: partial.clauses, origin: false }); } } } /* When only one view remain, the query is made and a ; is added at the end. TODO: Probrably this last line adds one more layer to the query, that is in fact unnecessary. Think a way to remove-it. */ return this.buildQuery(view, partialJoin) + ";"; } private searchMaterializedViews(view: View): View[] { let r: View[] = []; if (view.materialized) { return [view]; } else { let children = view.childViews; for (let i = 0; i < children.length; ++i) { r = r.concat(this.searchMaterializedViews(children[i])); } } return r; } private buildQuery(target: View, views: ExpandedView[]) { const metrics = target.metrics; const dimensions = target.dimensions; const clauses = target.clauses; let dimMap: {[key: string]: DimInfo} = {}; let nameMap: {[key: string]: ExpandedView} = {}; for (let i = 0; i < views.length; ++i) { const mets = views[i].metrics; const dims = views[i].dimensions; for (let j = 0; j < mets.length; ++j) { if (!nameMap[mets[j].name]) { nameMap[mets[j].name] = views[i]; } } for (let j = 0; j < dims.length; ++j) { if (!dimMap[dims[j].name]) { dimMap[dims[j].name] = { dim: dims[j], views: [views[i]] }; nameMap[dims[j].name] = views[i]; } else { dimMap[dims[j].name].views.push(views[i]); } } } // Projection const strMetrics = metrics.map((metric) => { const view = nameMap[metric.name]; if (view) { return this.translateMetric(metric, view); } return ""; }).filter((item) => item !== ""); const parsedDimensions = dimensions.map((dimension) => { let dim = dimension; while (!nameMap[dim.name]) { dim = dim.parent; } const view = nameMap[dim.name]; return this.translateDimension(dimension, dim, view); }); let parsedDimMetrics: DimTranslation[] = []; for (let i = 0; i < views.length; ++i) { const dimMets = views[i].dimMetrics.map((item) => { return this.translateDimMetric(item, views[i]); }); parsedDimMetrics = parsedDimMetrics.concat(dimMets); } const totalDimensions = parsedDimensions.concat(parsedDimMetrics); const strDimensions = totalDimensions.map ((item) => item.aliased); const grouped = totalDimensions.map((item) => item.noalias); const elements = strMetrics.concat(strDimensions); // Joins let conds: string[] = []; for (let i of Object.keys(dimMap)) { let remainViews = dimMap[i].views.slice(); let dim = dimMap[i].dim; let leftSide = this.buildColumn(dim, remainViews.shift().id); if (remainViews.length > 0) { while (remainViews.length > 0) { const id = remainViews.shift().id; const rightSide = this.buildColumn(dim, id); conds.push(leftSide + " = " + rightSide); } } } // Selection let covered: Clause[] = []; for (let i = 0; i < views.length; ++i) { // Get the clauses that children already cover covered = covered.concat(views[i].clauses); } const toCover = clauses.filter((item) => !covered.some ((clause) => { return clause.id === item.id; })); toCover.forEach((item) => { const clause = "(" + this.translateClause(item, nameMap) + ")"; if (clause !== "()") { conds.push(clause); } }); // Assembly const projection = "SELECT " + elements.join(","); const source = " FROM " + views.map((view) => view.from).join(","); const selection = (conds.length > 0) ? " WHERE " + conds.join(" AND ") : ""; let grouping = ""; if (grouped.length > 0) { grouping = " GROUP BY " + grouped.join(","); } return projection + source + selection + grouping; } private getAggregateFunction(aggrType: AggregationType, origin: boolean): string { switch (aggrType) { case AggregationType.SUM: return "SUM"; case AggregationType.AVG: return "AVG"; case AggregationType.COUNT: return (origin) ? "COUNT" : "SUM"; case AggregationType.MAX: return "MAX"; case AggregationType.MIN: return "MIN"; default: return ""; } } private translateRelation(relation: RelationType, arg: string): string { switch (relation) { case RelationType.DAY: return this.applyRelation("EXTRACT", ["DAY FROM "], [arg]); case RelationType.MONTH: return this.applyRelation("EXTRACT", ["MONTH FROM "], [arg]); case RelationType.YEAR: return this.applyRelation("EXTRACT", ["YEAR FROM "], [arg]); case RelationType.DAYOFWEEK: return this.applyRelation("EXTRACT", ["DOW FROM "], [arg]); default: return ""; } } private applyRelation(name: string, args: string[], values: string[]): string { /* This adapter uses the concept of functions in Postgres to implement BLENDB sub-dimention relations, this functions applys the transformation to build the call of a Postgres funtion. Note that this function can be native from postgres, like EXTRACT, or even implemented on the database. This function is short and only used in the translateRelation method however is a bit complex and is possible to be used several times, because of that is puted appart to make easyer update and avoid problems Example applyRelation ("EXTRACT", "["DAY FROM"]", ["view_0.date"]) output: EXTRACT(DAY FROM view_0.date) */ return name + "(" + args.map((item, idx) => item + values[idx]).join(",") + ")"; } private buildColumn (item: Metric|Dimension, id: string): string { const quotedName = "\"" + item.name + "\""; return "view_" + id + "." + quotedName; } private translateClause(clause: Clause, map: {[key: string]: ExpandedView}): string { const r = clause.filters.map((item) => { return this.translateFilter(item, map); }).filter((item) => { return item !== ""; }); return r.join(" OR "); } private translateFilter(filter: Filter, map: {[key: string]: ExpandedView}): string { if (!map[filter.target.name]) { return ""; } const viewId = map[filter.target.name].id; const leftSide = this.buildColumn(filter.target, viewId); const op = this.translateOperator(filter.operator); const dataType = this.translateDataType(filter.target.dataType); const quotedValue = "'" + filter.value + "'"; return leftSide + op + quotedValue + dataType; } private translateMetric(metric: Metric, view: ExpandedView): string { const func = this.getAggregateFunction(metric.aggregation, view.origin); const quotedName = "\"" + metric.name + "\""; const extMetric = func + "(" + this.buildColumn(metric, view.id) + ")"; return extMetric + " AS " + quotedName; } private translateDimMetric(metric: Metric, view: ExpandedView): DimTranslation { const quotedName = "\"" + metric.name + "\""; const extMetric = this.buildColumn(metric, view.id); return { aliased: extMetric + " AS " + quotedName, noalias: extMetric }; } private translateDimension(dimension: Dimension, ancestor: Dimension, view: ExpandedView): DimTranslation { const quotedName = "\"" + dimension.name + "\""; let extDimension = this.buildColumn(ancestor, view.id); let aux = dimension; while (aux.name !== ancestor.name) { extDimension = this.translateRelation(aux.relation, extDimension); aux = aux.parent; } return { aliased: extDimension + " AS " + quotedName, noalias: extDimension }; } private translateOperator(op: FilterOperator): string { switch (op) { case FilterOperator.EQUAL: return " = "; case FilterOperator.NOTEQUAL: return " != "; case FilterOperator.GREATER: return " > "; case FilterOperator.LOWER: return " < "; case FilterOperator.GREATEREQ: return " >= "; case FilterOperator.LOWEREQ: return " <= "; default: return ""; } } private translateDataType(dt: string ): string { switch (dt) { case "date": return "::DATE"; case "integer": return "::INTEGER"; case "boolean": return "::BOOLEAN"; default: return ""; } } private compareKeys(a: Dimension[], b: Dimension[], blackList: string[]): number { /* SUPER WARNING: WHEN THE BLACK LIST IS USED THE VIEW IS UNMATERIALIZEBLE, BUT THE QUERY CAN AGGREGATE THE VALUES */ let c = a.filter((i) => !blackList.some((bad) => bad === i.name)); let d = b.filter((i) => !blackList.some((bad) => bad === i.name)); let length = 0; let res = c.length - d.length; if (c.length < d.length) { length = c.length; } else { length = d.length; } for (let i = 0; i < length; ++i) { if (c[i].name < d[i].name) { return -1; } else if (c[i].name > d[i].name) { return 1; } } return res; } private similarDimensions(a: Dimension[], b: Dimension[]): number { let count = 0; for (let i = 0; i < a.length; ++i) { if (b.some((itemB) => a[i].name === itemB.name)) { count++; } } return count; } private removeDuplicatedDimensions(candidateDims: Dimension[]): Dimension[] { let filterDims: { [key: string]: boolean } = {}; const dims = []; for (let i = 0; i < candidateDims.length; ++i) { if (!filterDims[candidateDims[i].name]) { dims.push(candidateDims[i]); filterDims[candidateDims[i].name] = true; } } return dims; } }