Commit ade6c326 authored by jvfpw18's avatar jvfpw18

Merge HOTMapper with database development

parent d4fc52a1
#!/bin/bash
# Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
# Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
#
# This file is part of HOTMapper.
#
# HOTMapper is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# HOTMapper is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
# ---------------------------------------------------------------------------------------#
# Esse script tem como objetivo facilitar a criação do banco de dados do projeto SIMCAQ,
# conforme a necessidade dos desenvolvedores. O código é livre para modificações contanto
......@@ -12,16 +30,7 @@
# ---------------------------------------------------------------------------------------#
fBase ()
{
mclient -d $1 base/regiao.sql
mclient -d $1 base/estado.sql
mclient -d $1 base/municipio.sql
mclient -d $1 base/siope_uf.sql
mclient -d $1 base/siope_mun.sql
mclient -d $1 base/siope_mun_seed.sql
mclient -d $1 base/instituicao_superior.sql
mclient -d $1 base/formacao_superior.sql
mclient -d $1 base/formacao_superior_seed.sql
mclient -d $1 base/ibge_pib.sql
./manage.py execute_sql_group base
}
# ---------------------------------------------------------------------------------------#
......@@ -62,6 +71,15 @@ fInsert()
}
# ---------------------------------------------------------------------------------------#
# ---------------------------------------------------------------------------------------#
# Função para criar tabelas agregadas a partir de sql
# ---------------------------------------------------------------------------------------#
fAggregate()
{
./manage.py execute_sql_group simcaq_aggregate
}
# ---------------------------------------------------------------------------------------#
# ---------------------------------------------------------------------------------------#
# Retorna uma ajuda caso não haja parâmetros de entrada
# ---------------------------------------------------------------------------------------#
......@@ -73,9 +91,9 @@ if [ ! $1 ]; then
printf "# 3. create: execute the commands to create the tables.\n"
printf "# 4. insert: execute the commands to insert data to tables.\n\n"
printf "# Estructure of commands:\n"
printf "# 1. ./auto.sh all [database_name] [path_to_files] [initial_year]"
printf "# 1. ./auto.sh all [path_to_files] [initial_year]"
printf " [final_year]\n"
printf "# 2. ./auto.sh base [database_name]\n"
printf "# 2. ./auto.sh base\n"
printf "# 3. ./auto.sh create\n"
printf "# 4. ./auto.sh insert [path_to_files] [initial_year] [final_year]\n\n"
exit 0;
......@@ -89,33 +107,29 @@ source ./env/bin/activate
if [ $? = 0 ]; then
printf "\n# Environment activated!\n"
if [ "$1" = 'all' ]; then
if [ $2 ] && [ $3 ] && [ $4 ] && [ $5 ]; then
printf "\n# Initializing the creation of base tables (may need database"
printf " password)...\n"
if [ $2 ] && [ $3 ] && [ $4 ]; then
printf "\n# Initializing the creation of base tables...\n"
sleep 1
fBase "$2"
fBase
printf "\n# Initializing the creation of mapping tables...\n"
sleep 1
fCreate
printf "\n# Initializing the insertion of data, this may take a while...\n"
sleep 2
fInsert "$3" "$4" "$5"
sleep 1
else
printf "# ERROR: Missing parameters!\n"
exit -1;
fi
elif [ "$1" = 'base' ]; then
if [ $2 ]; then
printf "\n# Initializing the creation of base tables (may need database"
printf " password)...\n"
fInsert "$2" "$3" "$4"
sleep 1
fBase "$2"
printf "\n# Initializing the creation of aggregate tables...\n"
sleep 1
fAggregate
else
printf "# ERROR: Missing parameters!\n"
exit -1;
fi
elif [ "$1" = 'base' ]; then
printf "\n# Initializing the creation of base tables...\n"
sleep 1
fBase
sleep 1
elif [ "$1" = 'create' ]; then
printf "\n# Initializing the creation of tables...\n"
sleep 1
......
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,7 +15,6 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,23 +15,25 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''Database manipulation actions - these can be used as models for other modules.'''
import logging
from sqlalchemy import create_engine, MetaData
from sqlalchemy import create_engine, MetaData, text
from os import chdir
from datetime import datetime
from database.base import MissingTableError
from database.database_table import gen_data_table, copy_tabbed_to_csv
import database.groups
import settings
from database.groups import DATA_GROUP, DATABASE_TABLE_NAME
ENGINE = create_engine(settings.DATABASE_URI, echo=settings.ECHO)
META = MetaData(bind=ENGINE)
logging.basicConfig(format = settings.LOGGING_FORMAT)
logger = logging.getLogger(__name__)
database_table_logger = logging.getLogger('database.database_table')
database_table_logger.setLevel(settings.LOGGING_LEVEL)
......@@ -101,7 +103,7 @@ def csv_from_tabbed(table_name, input_file, output_file, year, sep=';'):
copy_tabbed_to_csv(input_file, column_mappings, settings.CHUNK_SIZE, output_file,
column_names=column_names, sep=sep)
def update_from_file(file_name, table, year, columns=None, target_list=None,
def update_from_file(file_name, table, year, columns=None,
offset=2, delimiters=[';', '\\n', '"'], null=''):
'''Updates table columns from an input csv file'''
table = gen_data_table(table, META)
......@@ -140,3 +142,37 @@ def generate_backup():
f = open(settings.BACKUP_FILE,"w")
f.write(str(datetime.now()))
f.close()
def execute_sql_script(sql_scripts, sql_path=settings.SCRIPTS_FOLDER):
if type(sql_scripts) == str:
sql_scripts = [sql_scripts]
with ENGINE.connect() as connection:
trans = connection.begin()
for script in sql_scripts:
with open(sql_path + '/' + script) as sql:
connection.execute(text(sql.read()))
trans.commit()
def execute_sql_group(script_group, sql_path=settings.SCRIPTS_FOLDER, files=False):
if not files:
sql_script = [DATA_GROUP[group.upper()] for group in script_group.split(",")]
else:
sql_script = script_group.split(",")
for sql in sql_script:
execute_sql_script(sql, sql_path + '/')
def drop_group(script_group, files=False):
script_group = script_group.split(",")
selected_tables = []
if not files:
for group in script_group:
selected_tables += DATA_GROUP[group.upper()]
else:
selected_tables = script_group
for table in reversed(selected_tables):
if table in DATABASE_TABLE_NAME:
table_name = DATABASE_TABLE_NAME[table]
else:
table_name = table.replace('.sql', '')
drop(table_name)
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,9 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''Module containing base declarations'''
......
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,9 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''This module contains the definition of the DatabaseTable class and a constructor'''
import os
......
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''Group Settings'''
# ---------------------------------------------------------------------------------------#
# SMPPIR
# ---------------------------------------------------------------------------------------#
INEP = [
'admission.sql',
'course.sql',
'evader.sql',
'extracurricular_activities.sql',
'graduate.sql',
'institution.sql',
'institutionPrivate.sql',
'social_support.sql',
'student_loans.sql'
]
PROUNI = [
'coursePROUNI.sql',
'institutionPROUNI.sql',
'prouni.sql'
]
PNAD = [
'pnad.sql'
]
CADUNICO = [
'eixo2.sql',
'eixo3.sql',
'eixo4.sql',
'african_sustentability.sql',
'african_rights.sql',
'african_culture.sql'
]
FIES = [
'courseFIES.sql',
'fies.sql',
'institutionFIES.sql'
]
ALL_GROUPS_SMPPIR = INEP + PROUNI + PNAD + CADUNICO + FIES
# ---------------------------------------------------------------------------------------#
# ---------------------------------------------------------------------------------------#
# SIMCAQ
# ---------------------------------------------------------------------------------------#
BASE = [
'regiao.sql',
'estado.sql',
'municipio.sql',
'siope_uf.sql',
'siope_mun.sql',
'siope_mun_seed.sql',
'instituicao_superior.sql',
'formacao_superior.sql',
'formacao_superior_seed.sql',
'ibge_pib.sql',
'cub.sql',
]
SIMCAQ_AGGREGATE = [
'docente_por_escola.sql',
'idm.sql',
'projecao_matricula.sql'
]
# ---------------------------------------------------------------------------------------#
# ---------------------------------------------------------------------------------------#
# Usado para chamar os grupos corretos
# ---------------------------------------------------------------------------------------#
DATA_GROUP = {
"INEP": INEP,
"PROUNI": PROUNI,
"PNAD": PNAD,
"CADUNICO": CADUNICO,
"FIES": FIES,
"ALL_GROUPS_SMPPIR": ALL_GROUPS_SMPPIR,
"BASE": BASE,
"SIMCAQ_AGGREGATE": SIMCAQ_AGGREGATE
}
# ---------------------------------------------------------------------------------------#
# Nome da tabela caso seja diferente do nome do sql
# ---------------------------------------------------------------------------------------#
DATABASE_TABLE_NAME = {
'admission.sql': 'admission_ag',
'course.sql': 'course_ag',
'evader.sql': 'evader_ag',
'extracurricular_activities.sql': 'extracurricular_activities_ag',
'graduate.sql': 'graduate_ag',
'institution.sql': 'institution_ag',
'institutionPrivate.sql': 'institution_private_ag',
'social_support.sql': 'social_support_ag',
'student_loans.sql': 'student_loans_ag',
'coursePROUNI.sql': 'course_prouni_ag',
'institutionPROUNI.sql': 'institution_prouni_ag',
'prouni.sql': 'prouni_ag',
'eixo2.sql': 'quilombola_eixo_2_ag',
'eixo3.sql': 'quilombola_eixo_3_ag',
'eixo4.sql': 'quilombola_eixo_4_ag',
'african_sustentability.sql': 'african_sustentability_ag',
'african_rights.sql': 'african_rights_ag',
'african_culture.sql': 'african_culture_ag',
'pnad.sql': 'pnad_ag',
'courseFIES.sql': 'course_fies_ag',
'fies.sql': 'fies_ag',
'institutionFIES.sql': 'institution_fies_ag',
'idm.sql': 'indice_distribuicao_matriculas'
}
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,9 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
''' Routines related to column dictionary generation.
Names comonly used:
......
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,9 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
import re
from sqlalchemy_monetdb.types import MONETDB_TYPE_MAP, TINYINT, DOUBLE_PRECISION
......
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,9 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''Generates schema in mysql dialect. Useful for documentation'''
from sqlalchemy import create_engine, MetaData, inspect
......
#!/usr/bin/env python3
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -17,16 +17,14 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''CLI for database module'''
from manager import Manager
import subprocess
import database.actions
from settings import SCRIPTS_FOLDER
manager = Manager()
......@@ -59,7 +57,7 @@ def update_from_file(csv_file, table, year, columns=None, target_list=None, offs
if target_list:
target_list = target_list.split(',')
database.actions.update_from_file(csv_file, table, year, columns=columns,
target_list=target_list, offset=offset,
offset=offset,
delimiters=[sep, '\\n', '"'], null=null)
@manager.command
......@@ -79,5 +77,36 @@ def generate_backup():
'''Create/Recriate file monitored by backup script in production'''
database.actions.generate_backup()
@manager.command
def execute_sql_group(script_group, script_path=SCRIPTS_FOLDER, files=False):
'''Execute a group of sql files from groups.py,
if you want only specific files use --files and a "file1,file2,..." pattern'''
database.actions.execute_sql_group(script_group, script_path, files)
@manager.command
def drop_group(script_group, files=False):
'''Drop a group of tables from groups.py,
if you want to drop only specif tables use --files and a "table1,table2,..." pattern'''
database.actions.drop_group(script_group, files)
@manager.command
def rebuild_group(script_group, sql_path=SCRIPTS_FOLDER, files=False):
database.actions.drop_group(script_group, files)
database.actions.execute_sql_group(script_group, sql_path, files)
@manager.command
def run_script(script_name, args="", folder=SCRIPTS_FOLDER):
'''Run a script from the scripts folder, the arguments of the script needs to be passed as a string'''
run_list = args.split(",")
run_list.insert(0, script_name)
if script_name[-2:] == 'py':
run_list.insert(0, 'python')
subprocess.run(run_list, cwd=folder)
elif script_name[-2:] == 'sh':
run_list.insert(0, 'sh')
subprocess.run(run_list, cwd=folder)
elif script_name[-3:] == 'sql':
database.actions.execute_sql_script(script_name)
if __name__ == "__main__":
manager.main()
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,10 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
import sys
import pandas as pd
......
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,9 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
import pandas as pd
import os
......
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
......@@ -15,10 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
'''
'''Settings used by the database module'''
import logging
......@@ -35,7 +33,7 @@ DATABASE_USER_PASSWORD = 'monetdb'
DATABASE_HOST = 'localhost'
# Database to connect to
DATABASE = 'hotmapper_demo'
DATABASE = 'dev_simcaq'
# URI structure. Standards to login:password model, but can be changed as needed.
DATABASE_URI = '{}://{}:{}@{}/{}'.format(DATABASE_DIALECT, DATABASE_USER,
......@@ -50,6 +48,9 @@ MAPPING_PROTOCOLS_FOLDER = 'mapping_protocols'
# Folder for table definitions files
TABLE_DEFINITIONS_FOLDER = 'table_definitions'
# Folder for scripts and sql tables
SCRIPTS_FOLDER = 'sql'
# Source table definitions
SOURCE_TABLE_NAME = 'fonte'
SOURCE_TABLE_COLUMNS = {
......
/*
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with HOTMapper. If not, see <https://www.gnu.org/licenses/>.
*/
CREATE TABLE regiao
(
id serial,
nome text not null
);
COPY 5 RECORDS INTO regiao FROM stdin USING DELIMITERS ',','\n';
1,Norte
2,Nordeste
3,Sudeste
4,Sul
5,Centro-Oeste
"""
Copyright (C) 2018 Centro de Computacao Cientifica e Software Livre
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR
This file is part of HOTMapper.
HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with simcaq-cdn. If not, see <https://www.gnu.org/licenses/>.
"""
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment