From 2fd30a009aa816532f6a7fe075f67383e8d21fbf Mon Sep 17 00:00:00 2001 From: Lucas Fernandes de Oliveira Date: Fri, 9 Nov 2018 08:14:11 -0200 Subject: [PATCH] Issue SCRUM#165: Add dc2 mass insertion script Signed-off-by: Lucas Fernandes de Oliveira --- .gitignore | 2 + scripts/helpers/dc2_mass_insert_update.py | 378 ++++++++++++++++++ ...dc2_mass_insert_update_config.json.example | 28 ++ 3 files changed, 408 insertions(+) create mode 100755 scripts/helpers/dc2_mass_insert_update.py create mode 100644 scripts/helpers/dc2_mass_insert_update_config.json.example diff --git a/.gitignore b/.gitignore index 17dd85a..cf7de1b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ database/schema-diagrams/*.svg *.conf *.pid *.env + +scripts/helpers/dc2_mass_insert_update_config.json diff --git a/scripts/helpers/dc2_mass_insert_update.py b/scripts/helpers/dc2_mass_insert_update.py new file mode 100755 index 0000000..abe2847 --- /dev/null +++ b/scripts/helpers/dc2_mass_insert_update.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +""" + This script is used to perform mass insertion of points of presence (POP) + of the project "Cidades Digitais 2". This script also verifies if the point + is already inserted and updates its information. +""" + +import psycopg2, csv, sys, json, re + +class Helper: + """ + Aggregate of helper functions. + """ + @staticmethod + def wrapSQL(value): + """ + Parse a value to string and place it into double quotes. + If toNull is set to true an empty string is replaced to NULL. + """ + parsed = str(value) + if value is None: + parsed = "" + + if parsed == "": + parsed = "NULL" + + return "'" + parsed + "'" + @staticmethod + def wrap(value, toNull): + """ + Parse a value to string and place it into double quotes. + If toNull is set to true an empty string is replaced to NULL. + """ + parsed = str(value) + if value is None: + parsed = "" + return "\"" + parsed + "\"" + @staticmethod + def wrapType(value, dataType): + """ + Wrap the value and place the dataType next to it. + """ + + return Helper.wrapSQL(value) + "::" + dataType + +class DBHandler: + """ + Class the makes a facede to the database. Every interation with the + database must be made through this class. + + The objective of this class is to deliver only the data and hide + all database interations. + """ + def __init__(self, config): + """ + Create a connection with a DB POSTGRE + Recieves a path to a configuration JSON file, which must have + the fields user, host, password, dbname to create the connection. + Returns the connection object; + """ + fields = ['user', 'host', 'password', 'dbname'] + r = '' + for field in fields: + r += field + ' = ' + config[field] + ' ' + self.conn = psycopg2.connect(r) + def existingPops(self): + query="SELECT e.id_point, e.ip_addr, e.oid_download, e.oid_upload, e.port\ + FROM point p INNER JOIN entity2 e ON p.id = e.id_point\ + WHERE p.is_active = true" + cursor = self.conn.cursor() + cursor.execute(query) + result = cursor.fetchall() + return { PopMonit(r[1], r[2], r[3], int(r[4])).id : r[0] for r in result} + +class FileHandler: + """ + Handles files. Any interation with files is made through this class. + """ + @staticmethod + def parseCsv(filePath, mapping): + """ + Reads CSV data and store in objects. + """ + csv.register_dialect('dialect', lineterminator = '\n', delimiter=';') + with open(filePath, newline='', encoding='utf-8') as f: + csvIn = [ i for i in csv.reader(f, 'dialect') ] + + csvInFields = csvIn[0][:] + #FIXME: The order of elements in the CSV file is not decided yet, + # neighter the header columns. When decided remove the hardcoded + # configuration. + + preIndex = {field: csvInFields.index(field) for field in csvInFields} + indexing = {key: preIndex[mapping[key]] if mapping[key] in preIndex else None for key in mapping.keys()} + return [FileHandler.createPop(i, indexing) for i in csvIn[1:]] + @staticmethod + def createPop(record, indexing): + """ + Transforms a CSV line into a PopRecord object. + """ + popMonit = PopMonit( + record[indexing["ipAddr"]] if indexing["ipAddr"] is not None else None + , record[indexing["oidDownload"]] if indexing["oidDownload"] is not None else None + , record[indexing["oidUpload"]] if indexing["oidUpload"] is not None else None + , int(record[indexing["port"]]) if indexing["port"] is not None else None + ) + popAddr = PopAddr( + record[indexing["establishment"]] if indexing["establishment"] is not None else None + , record[indexing["phone"]] if indexing["phone"] is not None else None + , record[indexing["email"]] if indexing["email"] is not None else None + , record[indexing["street"]] if indexing["street"] is not None else None + , record[indexing["neighborhood"]] if indexing["neighborhood"] is not None else None + , record[indexing["complement"]] if indexing["complement"] is not None else None + , record[indexing["reference"]] if indexing["reference"] is not None else None + , record[indexing["zipcode"]] if indexing["zipcode"] is not None else None + ) + popGeo = PopGeo( + record[indexing["latitude"]] if indexing["latitude"] is not None else None + , record[indexing["longitude"]] if indexing["longitude"] is not None else None + ) + idCity = int(record[indexing["idCity"]]) if indexing["idCity"] is not None else None + + return PopRecord(None, idCity, popMonit, popAddr, popGeo) + +class PopHandler: + """ + Class that handles the input, output and transformations over they. + This class stores the input information and handles its trabsfomrations. + Every transformations on the input is made through this class. + The output generations is also handled by this class. + """ + def __init__(self, records): + self.records = records + + def update(self, existingPops): + """ + Updates the records values. Uses a dictionary of existing points + to update the None value in idPoint to a existing idPoint. + + Parameters: + existingPops: Dictionay where the key is a PopMonit existing in + the database, and the value in the idPoint. + """ + for record in self.records: + if record.popMonit.id in existingPops: + record.idPoint = existingPops[record.popMonit.id] + return + + def queryGeneration(self, fileOut): + """ + Generates a SQL file that insert/update the records handled. + + Parameters: + fileOut: output file path. + """ + f = open(fileOut, "w") + f.write("BEGIN;\n") + for record in self.records: + f.write(record.querify()) + f.write("COMMIT;\n") + return + + def csvGeneration(self, fileOut): + """ + Generates a CSV file that informs what happend with the records. + Inform if the recors was inserted/updated or if an error occured. + + Parameters: + fileOut: output file path. + """ + return + f = open(fileOut, "w") + f.write(PopHandler.csvHeader(true)) + for record in self.records: + f.write(record.csvfy(true)) + return + +class PopMonit: + """ + Point of presence monitoration information. + """ + def __init__(self, ipAddr, oidDownload, oidUpload, port): + self._id= (ipAddr, oidDownload, oidUpload, port) + + @property + def id(self): + return self._id + + @property + def ipAddr(self): + return self._id[0] + + @property + def oidDownload(self): + return self._id[1] + + @property + def oidUpload(self): + return self._id[2] + + @property + def port(self): + return self._id[3] + + def querify(self): + query = Helper.wrapType(self.ipAddr, "INET") + ","\ + + Helper.wrapType(self.oidDownload, "TEXT") + ","\ + + Helper.wrapType(self.oidUpload, "TEXT") + ","\ + + Helper.wrapType(self.port, "INTEGER") + + return query + pass + + def csvfy(self): + return "\"" + "\";\"".join([str(i) for i in self._id])+ "\"" + + @staticmethod + def csvHeader(): + return "\"ipAddr\";\"oidDownload\";\"oidUpload\";\"port\"" + +class PopAddr: + """ + Point of presence address information. + """ + def __init__(self, establishment, phone, email, street, neighborhood,\ + complement, reference, zipcode): + self.establishment = establishment + self.phone = phone + self.email = email + self.street = street + self.neighborhood = neighborhood + self.complement = complement + self.reference = reference + self.zipcode = zipcode + + def querify(self): + query = Helper.wrapType(self.establishment, "TEXT") + ","\ + + Helper.wrapType(self.reference, "TEXT") + ","\ + + Helper.wrapType(self.street, "TEXT") + ","\ + + Helper.wrapType(self.neighborhood, "TEXT") + ","\ + + Helper.wrapType(self.email, "TEXT") + ","\ + + Helper.wrapType(self.complement, "TEXT") + ","\ + + Helper.wrapType(self.zipcode, "TEXT") + ","\ + + Helper.wrapType(self.phone, "TEXT") + + return query + + def csvfy(self): + return Helper.wrap(self.establishment) + ";"\ + + Helper.wrap(self.phone) + ";"\ + + Helper.wrap(self.email) + ";"\ + + Helper.wrap(self.street) + ";"\ + + Helper.wrap(self.neighborhood) + ";"\ + + Helper.wrap(self.complement) + ";"\ + + Helper.wrap(self.reference) + ";"\ + + Helper.wrap(self.zipcode) + ";"\ + + Helper.wrap(self.hasAltAddress) + # Remeber that the last one does not have ; + + @staticmethod + def csvHeader(): + return "\"establishment\";"\ + + "\"phone\";"\ + + "\"email\";"\ + + "\"street\";"\ + + "\"neighborhood\";"\ + + "\"complement\";"\ + + "\"reference\";"\ + + "\"zipcode\";"\ + + "\"hasAltAddress\"" + # Remeber that the last one does not have ; + +class PopGeo: + """ + Point of presence geo location information. + """ + def __init__(self, latitude, longitude): + self.latitude = latitude + self.longitude = longitude + + def querify(self): + query = Helper.wrapType(self.latitude, "TEXT") + ","\ + + Helper.wrapType(self.longitude, "TEXT") + + return query + + def csvfy(self): + return "\""+ self.latitude+ "\";\"" + self.longitude+ "\"" + + @staticmethod + def csvHeader(): + return "\"latitude\";\"longitude\"" + +class PopRecord: + """ + Class that represents a POP. Stores the input information of one + POP and its meta-data. + """ + def __init__(self, idPoint, idCity, popMonit, popAddr, popGeo): + self.idPoint = idPoint + self.idCity = idCity + self.popMonit = popMonit + self.popAddr = popAddr + self.popGeo = popGeo + pass + + def querify(self): + query = "SELECT insert_update_entity2("\ + + Helper.wrapType(self.idPoint, "INTEGER") + ","\ + + self.popAddr.querify() + ","\ + + Helper.wrapType(self.idCity, "INTEGER") + ","\ + + Helper.wrapType("", "INTEGER") + ","\ + + Helper.wrapType("", "INTEGER") + ","\ + + self.popMonit.querify() + ","\ + + self.popGeo.querify() + ");\n" + + return query + + def csvStatus(self): + status = "" + if self.idCity is None: + status += Helper.wrap("error") + elif self.idPoint is None: + status += Helper.wrap("insert") + else: + status += Helper.wrap("update") + + if self.popMonit.valid(): + status += ";" + Helper.wrap("ok") + else: + status += ";" + Helper.wrap("invalid") + + if self.popAddr.valid(): + status += ";" + Helper.wrap("ok") + else: + status += ";" + Helper.wrap("invalid") + + if self.popGeo.valid(): + status += ";" + Helper.wrap("ok") + else: + status += ";" + Helper.wrap("invalid") + + return status + + def csvfy(self, withStatus): + r = Helper.wrap(self.idPoint) + ";" + self.popMonit.csvfy() + ";"\ + + self.popAddr.csvfy() + ";" + self.popGeo.csvfy() + + if withStatus: + r += ";" + self.csvStatus(); + + return r; + + @staticmethod + def csvStatusHeader(): + return "\"status\";\"monit\";\"addr\";\"geolocation\"" + + @staticmethod + def csvHeader(withStatus): + r = "\"idPoint;\"" + PopMonit.csvHeader() + ";"\ + + PopAddr.csvHeader() + ";" + PopGeo.csvHeader() + if withStatus: + r += ";" + self.csvStatusHeader(); + +if __name__ == "__main__": + if len(sys.argv) != 3: + print('Usage: %s ' % sys.argv[0]) + sys.exit() + + fileIn = sys.argv[2] + with open(sys.argv[1]) as f: + configIn = json.load(f) + + dbHandler = DBHandler(configIn["database"]) + popHandler = PopHandler(FileHandler.parseCsv(fileIn, configIn["mapping"])) + popHandler.update(dbHandler.existingPops()) + popHandler.queryGeneration(configIn["outputQuery"]) + popHandler.csvGeneration(configIn["outputCsv"]) diff --git a/scripts/helpers/dc2_mass_insert_update_config.json.example b/scripts/helpers/dc2_mass_insert_update_config.json.example new file mode 100644 index 0000000..7fc3981 --- /dev/null +++ b/scripts/helpers/dc2_mass_insert_update_config.json.example @@ -0,0 +1,28 @@ +{ + "outputQuery": "outputFilePath.sql" + , "outputCsv": "outputFilePath.csv" + , "database": { + "user": "databaseUser" + , "host": "databaseHost" + , "password": "databasePassword" + , "dbname": "databaseName" + } + , "mapping": { + "idCity": "id_city" + , "ipAddr": "ip_addr" + , "oidDownload": "oid_download" + , "oidUpload": "oid_upload" + , "port": "port" + , "establishment": "" + , "phone": "" + , "email": "" + , "street": "" + , "neighborhood": "" + , "complement": "" + , "reference": "" + , "zipcode": "" + , "latitude": "" + , "longitude": "" + } + +} -- GitLab