Commit eaa19b39 authored by Bruno Meyer's avatar Bruno Meyer

adega#121 Cleaning up & docker container integrating postgres+django

parent e53c0f15
FROM python:3.5
Run apt-get update -qq
Run apt-get install -y \
RUN apt-get update -qq
RUN apt-get install -y \
python3-pip libpq-dev \
postgresql-client
Run mkdir /adega
RUN mkdir /adega
WORKDIR /adega
ADD . /adega
Run pip3 install -r requirements.txt
run ./install.sh --configure
# Not necessary (only do the build slow)
# ADD . /adega
ADD requirements.txt /adega/requirements.txt
RUN pip3 install -r requirements.txt
# Not necessary. The migrate can be done by makefile or in docker-compose.yml
# Besides that, there is no database to migrate yet (while docker build)
# RUN ./install.sh --configure
The MIT License (MIT)
Copyright (c) 2016 Simple is Better Than Complex
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
......@@ -8,11 +8,7 @@ services:
- POSTGRES_DB=adega
web:
build: .
command: python3 ./src/manage.py makemigrations degree
command: python3 ./src/manage.py makemigrations uploads
command: python3 ./src/manage.py makemigrations educator
command: python3 ./src/manage.py migrate
command: python3 ./src/manage.py runserver 0.0.0.0:8000
command: bash "./docker_scripts/on_docker_init.sh"
volumes:
- .:/adega
ports:
......@@ -21,3 +17,8 @@ services:
- db
depends_on:
- db
environment:
- POSTGRES_USER=adega
- POSTGRES_PASSWORD=adega
- POSTGRES_DB=adega
- POSTGRES_HOST=adega_db_1
bash ./docker_scripts/wait-for-postgres.sh
python ./src/manage.py migrate
python ./src/manage.py runserver 0.0.0.0:8000
#!/bin/bash
# wait-for-postgres.sh
set -e
host="$1"
cmd="$@"
until PGPASSWORD=$POSTGRES_PASSWORD psql -h "$POSTGRES_HOST" -U "$POSTGRES_USER" -c '\q'; do
>&2 echo "Postgres is unavailable - sleeping"
sleep 1
done
>&2 echo "Postgres is up"
#~ >&2 echo "Postgres is up - executing command"
#~ exec $cmd
......@@ -4,8 +4,8 @@ verbose=0
# ---------- functions ------------------
configure() {
postgres psql < postgres/create.sql
#~ postgres psql < postgres/create.sql
PGPASSWORD=adega psql -h adega_db_1 -U adega < postgres/create.sql
python3 src/manage.py makemigrations degree
python3 src/manage.py makemigrations uploads
python3 src/manage.py makemigrations educator
......
......@@ -36,3 +36,13 @@ install-user:
pipenv install
%:
@:
args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}`
manage:
@echo $(call args,"")
sudo docker exec -it adega_web_1 python3 ./src/manage.py $(call args,"")
CREATE DATABASE adega;
CREATE USER adega WITH PASSWORD 'adega';
-- ~ CREATE USER adega WITH PASSWORD 'adega';
ALTER ROLE adega SET client_encoding TO 'utf8';
ALTER ROLE adega SET default_transaction_isolation TO 'read committed';
ALTER ROLE adega SET timezone TO 'UTC-3';
GRANT ALL PRIVILEGES ON DATABASE adega TO adega;
-- ~ ALTER ROLE adega SET client_encoding TO 'utf8';
-- ~ ALTER ROLE adega SET default_transaction_isolation TO 'read committed';
-- ~ ALTER ROLE adega SET timezone TO 'UTC-3';
-- ~ GRANT ALL PRIVILEGES ON DATABASE adega TO adega;
import numpy as np
from utils.situations import *
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
def listagem_turma_ingresso(df):
#~ print(df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups)
grupos = df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups
for t in grupos:
print(t)
print("\n\n")
print(df["FORMA_INGRESSO"][grupos[t]].drop_duplicates())
This diff is collapsed.
import pandas as pd
import math
import ujson as json
from utils.situations import Situation, EvasionForm
def average_graduation(df):
total_student = df['MATR_ALUNO'].drop_duplicates().shape[0]
total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]
return total_graduate / total_student
def general_failure(df):
affect_ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)]
average = failures.shape[0] / affect_ira.shape[0]
student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\
.aggregate({'SITUACAO': 'count'})
student_failures = failures.groupby(['MATR_ALUNO'], as_index=False)\
.aggregate({'SITUACAO': 'count'})
merged = pd.merge(student_courses, student_failures, on=['MATR_ALUNO'])
merged.columns = ['MART_ALUNO', 'FEITAS', 'REPROVADO']
variance = merged['REPROVADO'].div(merged['FEITAS']).sub(average)\
.pow(2).sum() / merged.shape[0]
standard_deviation = math.sqrt(variance)
return (average, standard_deviation)
def current_students_failure(df):
fixed = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
affect_ira = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)]
average = failures.shape[0] / affect_ira.shape[0]
student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\
.aggregate({'SITUACAO': 'count'})
student_failures = failures.groupby(['MATR_ALUNO'], as_index=False)\
.aggregate({'SITUACAO': 'count'})
merged = pd.merge(student_courses, student_failures, on=['MATR_ALUNO'])
merged.columns = ['MART_ALUNO', 'FEITAS', 'REPROVADO']
variance = merged['REPROVADO'].div(merged['FEITAS']).sub(average)\
.pow(2).sum() / merged.shape[0]
standard_deviation = math.sqrt(variance)
return (average, standard_deviation)
def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def current_ira(df):
ano_grade = int(df.loc[df['NUM_VERSAO'].idxmax()]['NUM_VERSAO'])
fixed = df.loc[(df['NUM_VERSAO'] == ano_grade)]
fixed = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def current_students_ira(df):
fixed = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
fixed = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def general_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def current_evasion_rate(df):
ano_grade = int(df.loc[df['NUM_VERSAO'].idxmax()]['NUM_VERSAO'])
students = df.loc[(df['NUM_VERSAO'] == ano_grade)]
students = students['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
semester_end = graduates['PERIODO'].max()
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
semester_end = graduates['PERIODO'].max()
year = int(row['ANO_INGRESSO'])
semester = int(row['SEMESTRE_INGRESSO'])
difference = 2 * (year_end - year) + (semester_end - semester) + 1
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
def total_students(df):
return df.drop_duplicates('MATR_ALUNO').shape[0]
def current_total_students(df):
return df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)].drop_duplicates('MATR_ALUNO').shape[0]
def taxa_abandono(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_abandono = students.loc[(df.FORMA_EVASAO == EvasionForm.EF_ABANDONO)].shape[0]
return total_abandono / total_student
def average_ira_graph(df):
alunos = df.drop_duplicates('MATR_ALUNO')
dic = build_dict_ira_medio(alunos)
return dic
def current_students_average_ira_graph(df):
alunos_se = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
alunos_se = alunos_se.drop_duplicates('MATR_ALUNO')
dic_se = build_dict_ira_medio(alunos_se)
return dic_se
def graduates_average_ira_graph(df):
alunos_for = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
alunos_for = alunos_for.drop_duplicates('MATR_ALUNO')
dic_for = build_dict_ira_medio(alunos_for)
return dic_for
def period_evasion_graph(df):
di_qtd = {}
dic = {}
evasions_total = 0
year_start = int(df['ANO'].min())
year_end = int(df['ANO'].max()) + 1
students = df.drop_duplicates()
for year in range(year_start, year_end):
for semester in range(1, 3):
evasions = students.loc[(df['ANO_EVASAO'] == str(year)) & (df['SEMESTRE_EVASAO'] == str(semester))].shape[0]
date = str(year) + ' {}º Período'.format(semester)
di_qtd[date] = evasions
evasions_total += evasions
if evasions_total:
for di in di_qtd:
qtd = di_qtd[di]
dic[di] = {'qtd': qtd, 'taxa': (qtd/evasions_total)*100}
return dic
def build_dict_ira_medio(alunos):
dic = {"00-4.9":0, "05-9.9":0, "10-14.9":0, "15-19.9":0, "20-24.9":0, "25-29.9":0, "30-34.9":0,
"35-39.9":0, "40-44.9":0, "45-49.9":0, "50-54.9":0, "55-59.9":0, "60-64.9":0, "65-69.9":0,
"70-74.9":0, "75-79.9":0, "80-84.9":0, "85-89.9":0, "90-94.9": 0,"95-100":0}
iras = []
for index, row in alunos.iterrows():
if(row['MEDIA_FINAL'] is not None):
iras.append(row['MEDIA_FINAL'])
for d in dic:
aux = d.split('-')
v1 = float(aux[0])
if (v1 == 0.0):
v1 += 0.01
v2 = float(aux[1])
dic[d] = sum((float(num) >= v1) and (float(num) < v2) for num in iras)
return dic
def build_degree_json(df):
def merge_dicts(dict1, dict2, dict3):
dict_out = {}
for key, value in dict1.items():
v2 = dict2[key] if key in dict2 else None
v3 = dict3[key] if key in dict3 else None
dict_out[key] = {'ira_medio': value, 'sem_evasao': v2, 'formatura': v3}
return dict_out
dic = merge_dicts(average_ira_graph(df),current_students_average_ira_graph(df),graduates_average_ira_graph(df))
degree_json = {
"ira_medio_grafico": json.dumps(sorted(dic.items())),
"evasao_grafico": json.dumps(sorted(period_evasion_graph(df).items())),
"ira_atual": current_students_ira(df),
"ira_medio": general_ira(df),
"qtd_alunos": total_students(df),
"qtd_alunos_atuais": current_total_students(df),
"taxa_evasao": general_evasion_rate(df),
"taxa_formatura": average_graduation(df),
"taxa_reprovacao": general_failure(df),
"taxa_reprovacao_atual": current_students_failure(df),
"tempo_formatura": average_graduation_time(df),
}
with open("cache/curso/curso.json",'w') as f:
f.write(json.dumps(degree_json,indent=4))
\ No newline at end of file
import numpy as np
#~ TODO:
#~ FAZER CACHE DE TUDO
#~ AO CHAMAR A FUNCAO VERIFICAR SE TEM ALGO NA CACHE
from utils.situations import *
import pandas as pd
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
def listagem_alunos(df):
#~ ativos = df[["MATR_ALUNO", "NOME_PESSOA",]][df["FORMA_EVASAO"] == EvasionForm.EF_ATIVO].drop_duplicates()
situacoes = df.groupby(["MATR_ALUNO", "NOME_PESSOA", "FORMA_EVASAO"])
situacoes = list(pd.DataFrame({'count' : situacoes.size()}).reset_index().groupby(["FORMA_EVASAO"]))
#~ Cria lista de nome de listagens
retorno = {}
for s in situacoes:
#Busca a lista de alunos relacionados a um codigo
retorno[s[0]] = list(s[1]["MATR_ALUNO"])
return retorno
def ira_alunos(df):
iras = ira_por_quantidade_disciplinas(df)
for i in iras:
ira_total = 0
carga_total = 0
for semestre in iras[i]:
ira_total += iras[i][semestre][0]*iras[i][semestre][2]
carga_total+=iras[i][semestre][2]
if(carga_total != 0):
iras[i] = ira_total/carga_total
else:
iras[i] = 0
return iras
def taxa_aprovacao(df):
aprovacoes_semestres = indice_aprovacao_semestral(df)
for aluno in aprovacoes_semestres:
total = sum([aprovacoes_semestres[aluno][s][1] for s in aprovacoes_semestres[aluno]])
aprovacoes = sum([aprovacoes_semestres[aluno][s][0] for s in aprovacoes_semestres[aluno]])
total = float(total)
aprovacoes = float(aprovacoes)
if(total != 0):
aprovacoes_semestres[aluno] = aprovacoes/total
else:
aprovacoes_semestres[aluno] = None
#~ for semestre in aprovacoes_semestres[aluno]:
#~ aprovacoes+=aprovacoes_semestres[aluno][semestre][0]
#~ total+=aprovacoes_semestres[semestre][1]
return aprovacoes_semestres
def posicao_turmaIngresso_semestral(df):
iras = ira_semestral(df)
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
def periodo_real(df):
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
def periodo_pretendido(df):
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
return students
def ira_semestral(df):
aux = ira_por_quantidade_disciplinas(df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
def ira_por_quantidade_disciplinas(df):
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = df["MATR_ALUNO"][i]
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
carga = float(df["CH_TOTAL"][i])
#media_credito = int(df["MEDIA_CREDITO"][i])
#if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if (situacao in Situation.SITUATION_AFFECT_IRA):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0, 0]
students[matr][ano + "/" + semestre][0] += nota*carga
students[matr][ano + "/" + semestre][1] += 1
students[matr][ano + "/" + semestre][2] += carga
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][2] != 0):
students[matr][periodo][0] /= students[matr][periodo][2] * 100
return (students)
def indice_aprovacao_semestral(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
def aluno_turmas(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
situations = dict(Situation.SITUATIONS)
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
students[matr].append(data)
return students
import os
import pandas as pd
import numpy as np
from utils.situations import *
class DataframeHolder:
def __init__(self, dataframe):
self.students = dataframe.groupby('MATR_ALUNO')
self.courses = dataframe.groupby('COD_ATIV_CURRIC')
self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])
def load_dataframes(cwd='.'):
dataframes = []
for path, dirs, files in os.walk(cwd):
for f in files:
file_path = path + '/' + f
dh = {'name': f, 'dataframe': None}
if 'csv' in f:
dh['dataframe'] = read_csv(file_path)
if 'xls' in f:
dh['dataframe'] = read_excel(file_path)
if dh['dataframe'] is not None:
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
return dataframe
def read_excel(path, planilha='Planilha1'):
return pd.read_excel(path)
def read_csv(path):
return pd.read_csv(path)
def fix_dataframes(dataframes):
for df in dataframes:
if df['name'] == 'historico.xls' or df['name'] == 'historico.csv':
history = df['dataframe']
history.rename(columns={'DESCR_SITUACAO': 'SITUACAO'}, inplace=True)
if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv':
register = df['dataframe']
#~ clean_history(history)
clean_register(register)
#~ df.dropna(axis=0, how='all')
history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
history = history[np.isfinite(history['MEDIA_FINAL'])]
merged = pd.merge(history, register, how='outer', on=['MATR_ALUNO'])
merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"})
fix_situation(merged)
fix_admission(merged)
fix_evasion(merged)
fix_carga(merged)
return merged
def clean_history(df):
df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR', 'NUM_VERSAO'