diff --git a/script/analysis/degree_analysis.py b/script/analysis/degree_analysis.py index 5d91ef7d3c3fa176571e272f28590f40f41703ef..f1ff3ea60a88b7b1b907465b7adbf404401616f8 100644 --- a/script/analysis/degree_analysis.py +++ b/script/analysis/degree_analysis.py @@ -1,15 +1,16 @@ import pandas as pd import numpy as np import math -from utils.situations import Situation +from utils.situations import Situation, EvasionForm + def average_graduation(df): - not_nan = df.dropna(axis=0) - total_student = not_nan.shape[0] - list_graduation = not_nan[not_nan.FORMA_EVASAO == 'Formatura'] - total_graduate = list_graduation.shape[0] + total_student = df['MATR_ALUNO'].drop_duplicates().shape[0] + total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0] + return total_graduate / total_student + def general_failure(df): not_nan = df.dropna(axis=0) affect_ira = not_nan[not_nan.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] @@ -28,6 +29,7 @@ def general_failure(df): standard_deviation = math.sqrt(variance) return (average, standard_deviation) + def general_ira(df): fixed = df.dropna(axis=0)[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] fixed = fixed[fixed.MEDIA_FINAL <= 100] diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py index 6aa6ece7e9fc47a1a886b2ecef44dec8770091fd..56d1bf01b269d2ddf7f6741f5ad5ed21fd5160b2 100644 --- a/script/base/dataframe_base.py +++ b/script/base/dataframe_base.py @@ -22,44 +22,67 @@ def load_dataframes(cwd='.'): if dh['dataframe'] is not None: dataframes.append(dh) - return dataframes + dataframe = fix_dataframes(dataframes) + return dataframe + def read_excel(path, planilha='Planilha1'): return pd.read_excel(path) + def read_csv(path): return pd.read_csv(path) + def fix_dataframes(dataframes): for df in dataframes: - fix_situation(df['dataframe']) - fix_admission(df['dataframe']) - fix_evasion(df['dataframe']) if df['name'] == 'historico.xls': - hist = df['dataframe'] + history = df['dataframe'] if df['name'] == 'matricula.xls': - mat = df['dataframe'] - merged = pd.merge(hist, mat, on=['MATR_ALUNO']) - merged.drop(['ID_PESSOA', 'ID_CURRIC_ALUNO', 'CONCEITO', 'NOME_UNIDADE', - 'ID_NOTA', 'ID_VERSAO_CURSO', 'NOME_PESSOA', 'SIGLA', - 'NUM_VERSAO_y', 'COD_CURSO_y', 'DT_NASCIMENTO' - ], axis=1, inplace=True) - merged.rename(columns={'NUM_VERSAO_x':'NUM_VERSAO', - 'COD_CURSO_x':'COD_CURSO'}, inplace=True) - print(list(merged)) + register = df['dataframe'] + + clean_history(history) + clean_register(register) + + merged = pd.merge(history, register, how='right', on=['MATR_ALUNO']) + + fix_situation(merged) +# fix_admission(merged) + fix_evasion(merged) + + return merged + + +def clean_history(df): + df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', + 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', + 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' + ], axis=1, inplace=True) + df['PERIODO'] = df['PERIODO'].str.split('o').str[0] + +def clean_register(df): + df_split = df['PERIODO_INGRESSO'].str.split('/') + df['ANO_INGRESSO'] = df_split.str[0] + df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] + df_split = df['PERIODO_EVASAO'].str.split('/') + df['ANO_EVASAO'] = df_split.str[0] + df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] + + df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', + 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', + ],axis=1, inplace=True) + def fix_situation(df): - if hasattr(df, 'SITUACAO'): - for situation in Situation.SITUATIONS: - df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] - if situation[1] == 'Outro': - temp = df[~df['SITUACAO'].astype(str).str.isdigit()] - df.loc[~df.SITUACAO.astype(str).str.isdigit()] = situation[0] + for situation in Situation.SITUATIONS: + df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] -def fix_admission(df): - pass -def fix_evasion(df): - pass +def fix_admission(df): + for adm in AdmissionType.ADMISSION_FORM: + df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] +def fix_evasion(df): + for evasion in EvasionForm.EVASION_FORM: + df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False), 'FORMA_EVASAO'] = evasion[0] diff --git a/script/build_cache.py b/script/build_cache.py index 6f6c2f49716d9e37c2d228caef239e0841f1351e..ca06a71aea381c5b0461ae3a9458653d29624973 100644 --- a/script/build_cache.py +++ b/script/build_cache.py @@ -13,14 +13,14 @@ try: except NameError: to_unicode = str -def build_cache(registry, history): +def build_cache(dataframe): # os.chdir("../src") path = "cache" build_path(path) path += "/curso" build_path(path) - generate_degree_data(path, registry, history) + generate_degree_data(path, dataframe) generate_student_data(path) generate_student_list(path) generate_admission_data(path) @@ -28,10 +28,10 @@ def build_cache(registry, history): generate_course_data(path) generate_course_general_data(path) -def generate_degree_data(path, registry, history): - average_graduation(registry) - general_failure(history) - general_ira(history) +def generate_degree_data(path, dataframe): + average_graduation(dataframe) +# general_failure(dataframe) + # general_ira(dataframe) pass def generate_student_data(path): diff --git a/script/main.py b/script/main.py index 7b3a6b127004240cabcbdc7e2a2931eb1e84f735..979bc78769847a0948ea1ca4accf6fbe6b43ea6a 100644 --- a/script/main.py +++ b/script/main.py @@ -1,6 +1,6 @@ import os import time -from base.dataframe_base import load_dataframes, fix_dataframes +from base.dataframe_base import load_dataframes from build_cache import build_cache from datetime import timedelta @@ -8,15 +8,9 @@ def main(): start_time = time.clock() start_time_exec = time.time() - dataframes = load_dataframes(os.getcwd() + '/' + 'base') - fix_dataframes(dataframes) - for df in dataframes: - if 'historico' in df['name']: - history = df['dataframe'] - if 'matricula.xls' in df['name']: - registry = df['dataframe'] + dataframe = load_dataframes(os.getcwd() + '/' + 'base') - build_cache(registry, history) + build_cache(dataframe) cpu_time = timedelta(seconds=round(time.clock() - start_time)) run_time = timedelta(seconds=round(time.time() - start_time_exec)) diff --git a/script/utils/situations.py b/script/utils/situations.py index 81cbc9190df8259da57ee4ef6d238b55d28311ee..80f2b175949b2a61c20ed04ceb44d4101a912c12 100644 --- a/script/utils/situations.py +++ b/script/utils/situations.py @@ -38,22 +38,30 @@ class EvasionForm: EF_REOPCAO = 9 EF_DESISTENCIA = 10 EF_JUBILAMENTO = 11 + EF_DESCUMPRIMENTO_EDITAL = 12 + EF_FALECIMENTO = 13 + EF_TERMINO_REG_TEMP = 14 + EF_REINTEGRACAO = 15 EF_OUTROS = 100 EVASION_FORM = ( (EF_DESCONHECIDO, 'Desconhecido'), - (EF_ATIVO, 'Ativo'), - (EF_FORMATURA, 'Formado'), + (EF_ATIVO, 'Sem evasão'), + (EF_FORMATURA, 'Formatura'), (EF_ABANDONO, 'Abandono'), - (EF_DESISTENCIA_VESTIBULAR, 'Desistencia vestibular'), + (EF_DESISTENCIA_VESTIBULAR, 'Desistência Vestibular'), (EF_CANCELAMENTO, 'Cancelamento'), - (EF_NAO_CONFIRMACAO_VAGA, 'Não confirmação de vaga'), - (EF_NOVO_VESTIBULAR, 'Novo vestibular'), - (EF_TRANSFERENCIA_EXTERNA, 'Transferência externa'), - (EF_REOPCAO, 'Reopção de curso'), + (EF_NAO_CONFIRMACAO_VAGA, 'Não Confirmação de Vaga'), + (EF_NOVO_VESTIBULAR, 'Novo Vestibular'), + (EF_TRANSFERENCIA_EXTERNA, 'Transferência Externa'), + (EF_REOPCAO, 'Reopção'), (EF_DESISTENCIA, 'Desistência'), - (EF_JUBILAMENTO, 'Jubilado'), - (EF_OUTROS, 'Outros'), + (EF_JUBILAMENTO, 'Jubilamento'), + (EF_DESCUMPRIMENTO_EDITAL, 'Descumprimento Edital'), + (EF_FALECIMENTO, 'Falecimento'), + (EF_TERMINO_REG_TEMP, 'Término de Registro Temporário'), + (EF_REINTEGRACAO, 'Reintegração'), + (EF_OUTROS, 'Outro'), ) # == Situation Courses == # @@ -77,7 +85,10 @@ class Situation: SIT_TRANCAMENTO_TOTAL = 11 SIT_TRANCAMENTO_ADMINISTRATIVO = 12 SIT_REPROVADO_SEM_NOTA = 13 - SIT_HORAS = 13 + SIT_HORAS = 14 + + SIT_APROV_ADIANTAMENTO = 15 + SIT_INCOMPLETO = 16 SIT_OUTROS = 100 @@ -101,6 +112,8 @@ class Situation: (SIT_HORAS, 'Horas'), + (SIT_APROV_ADIANTAMENTO, 'Aprov Adiantamento'), + (SIT_INCOMPLETO, 'Incompleto'), (SIT_OUTROS, 'Outro'), )