Commit 9ff21bbd authored by João Denis Rodrigues's avatar João Denis Rodrigues
Browse files

Fix dataframe merge + media formatura

parent 4a615b20
import pandas as pd
import numpy as np
import math
from utils.situations import Situation
from utils.situations import Situation, EvasionForm
def average_graduation(df):
not_nan = df.dropna(axis=0)
total_student = not_nan.shape[0]
list_graduation = not_nan[not_nan.FORMA_EVASAO == 'Formatura']
total_graduate = list_graduation.shape[0]
total_student = df['MATR_ALUNO'].drop_duplicates().shape[0]
total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]
return total_graduate / total_student
def general_failure(df):
not_nan = df.dropna(axis=0)
affect_ira = not_nan[not_nan.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
......@@ -28,6 +29,7 @@ def general_failure(df):
standard_deviation = math.sqrt(variance)
return (average, standard_deviation)
def general_ira(df):
fixed = df.dropna(axis=0)[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
......
......@@ -22,44 +22,67 @@ def load_dataframes(cwd='.'):
if dh['dataframe'] is not None:
dataframes.append(dh)
return dataframes
dataframe = fix_dataframes(dataframes)
return dataframe
def read_excel(path, planilha='Planilha1'):
return pd.read_excel(path)
def read_csv(path):
return pd.read_csv(path)
def fix_dataframes(dataframes):
for df in dataframes:
fix_situation(df['dataframe'])
fix_admission(df['dataframe'])
fix_evasion(df['dataframe'])
if df['name'] == 'historico.xls':
hist = df['dataframe']
history = df['dataframe']
if df['name'] == 'matricula.xls':
mat = df['dataframe']
merged = pd.merge(hist, mat, on=['MATR_ALUNO'])
merged.drop(['ID_PESSOA', 'ID_CURRIC_ALUNO', 'CONCEITO', 'NOME_UNIDADE',
'ID_NOTA', 'ID_VERSAO_CURSO', 'NOME_PESSOA', 'SIGLA',
'NUM_VERSAO_y', 'COD_CURSO_y', 'DT_NASCIMENTO'
], axis=1, inplace=True)
merged.rename(columns={'NUM_VERSAO_x':'NUM_VERSAO',
'COD_CURSO_x':'COD_CURSO'}, inplace=True)
print(list(merged))
register = df['dataframe']
clean_history(history)
clean_register(register)
merged = pd.merge(history, register, how='right', on=['MATR_ALUNO'])
fix_situation(merged)
# fix_admission(merged)
fix_evasion(merged)
return merged
def clean_history(df):
df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR'
], axis=1, inplace=True)
df['PERIODO'] = df['PERIODO'].str.split('o').str[0]
def clean_register(df):
df_split = df['PERIODO_INGRESSO'].str.split('/')
df['ANO_INGRESSO'] = df_split.str[0]
df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0]
df_split = df['PERIODO_EVASAO'].str.split('/')
df['ANO_EVASAO'] = df_split.str[0]
df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0]
df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE',
'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO',
],axis=1, inplace=True)
def fix_situation(df):
if hasattr(df, 'SITUACAO'):
for situation in Situation.SITUATIONS:
df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0]
if situation[1] == 'Outro':
temp = df[~df['SITUACAO'].astype(str).str.isdigit()]
df.loc[~df.SITUACAO.astype(str).str.isdigit()] = situation[0]
for situation in Situation.SITUATIONS:
df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0]
def fix_admission(df):
pass
def fix_evasion(df):
pass
def fix_admission(df):
for adm in AdmissionType.ADMISSION_FORM:
df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0]
def fix_evasion(df):
for evasion in EvasionForm.EVASION_FORM:
df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False), 'FORMA_EVASAO'] = evasion[0]
......@@ -13,14 +13,14 @@ try:
except NameError:
to_unicode = str
def build_cache(registry, history):
def build_cache(dataframe):
# os.chdir("../src")
path = "cache"
build_path(path)
path += "/curso"
build_path(path)
generate_degree_data(path, registry, history)
generate_degree_data(path, dataframe)
generate_student_data(path)
generate_student_list(path)
generate_admission_data(path)
......@@ -28,10 +28,10 @@ def build_cache(registry, history):
generate_course_data(path)
generate_course_general_data(path)
def generate_degree_data(path, registry, history):
average_graduation(registry)
general_failure(history)
general_ira(history)
def generate_degree_data(path, dataframe):
average_graduation(dataframe)
# general_failure(dataframe)
# general_ira(dataframe)
pass
def generate_student_data(path):
......
import os
import time
from base.dataframe_base import load_dataframes, fix_dataframes
from base.dataframe_base import load_dataframes
from build_cache import build_cache
from datetime import timedelta
......@@ -8,15 +8,9 @@ def main():
start_time = time.clock()
start_time_exec = time.time()
dataframes = load_dataframes(os.getcwd() + '/' + 'base')
fix_dataframes(dataframes)
for df in dataframes:
if 'historico' in df['name']:
history = df['dataframe']
if 'matricula.xls' in df['name']:
registry = df['dataframe']
dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(registry, history)
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
run_time = timedelta(seconds=round(time.time() - start_time_exec))
......
......@@ -38,22 +38,30 @@ class EvasionForm:
EF_REOPCAO = 9
EF_DESISTENCIA = 10
EF_JUBILAMENTO = 11
EF_DESCUMPRIMENTO_EDITAL = 12
EF_FALECIMENTO = 13
EF_TERMINO_REG_TEMP = 14
EF_REINTEGRACAO = 15
EF_OUTROS = 100
EVASION_FORM = (
(EF_DESCONHECIDO, 'Desconhecido'),
(EF_ATIVO, 'Ativo'),
(EF_FORMATURA, 'Formado'),
(EF_ATIVO, 'Sem evasão'),
(EF_FORMATURA, 'Formatura'),
(EF_ABANDONO, 'Abandono'),
(EF_DESISTENCIA_VESTIBULAR, 'Desistencia vestibular'),
(EF_DESISTENCIA_VESTIBULAR, 'Desistência Vestibular'),
(EF_CANCELAMENTO, 'Cancelamento'),
(EF_NAO_CONFIRMACAO_VAGA, 'Não confirmação de vaga'),
(EF_NOVO_VESTIBULAR, 'Novo vestibular'),
(EF_TRANSFERENCIA_EXTERNA, 'Transferência externa'),
(EF_REOPCAO, 'Reopção de curso'),
(EF_NAO_CONFIRMACAO_VAGA, 'Não Confirmação de Vaga'),
(EF_NOVO_VESTIBULAR, 'Novo Vestibular'),
(EF_TRANSFERENCIA_EXTERNA, 'Transferência Externa'),
(EF_REOPCAO, 'Reopção'),
(EF_DESISTENCIA, 'Desistência'),
(EF_JUBILAMENTO, 'Jubilado'),
(EF_OUTROS, 'Outros'),
(EF_JUBILAMENTO, 'Jubilamento'),
(EF_DESCUMPRIMENTO_EDITAL, 'Descumprimento Edital'),
(EF_FALECIMENTO, 'Falecimento'),
(EF_TERMINO_REG_TEMP, 'Término de Registro Temporário'),
(EF_REINTEGRACAO, 'Reintegração'),
(EF_OUTROS, 'Outro'),
)
# == Situation Courses == #
......@@ -77,7 +85,10 @@ class Situation:
SIT_TRANCAMENTO_TOTAL = 11
SIT_TRANCAMENTO_ADMINISTRATIVO = 12
SIT_REPROVADO_SEM_NOTA = 13
SIT_HORAS = 13
SIT_HORAS = 14
SIT_APROV_ADIANTAMENTO = 15
SIT_INCOMPLETO = 16
SIT_OUTROS = 100
......@@ -101,6 +112,8 @@ class Situation:
(SIT_HORAS, 'Horas'),
(SIT_APROV_ADIANTAMENTO, 'Aprov Adiantamento'),
(SIT_INCOMPLETO, 'Incompleto'),
(SIT_OUTROS, 'Outro'),
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment