From 23f8f98087dc72892b498222272d6436958366a8 Mon Sep 17 00:00:00 2001 From: Bruno Meyer <buba.meyer_@hotmail.com> Date: Fri, 2 Feb 2018 11:53:33 -0200 Subject: [PATCH] Funcionando para relatorio .33 --- script/analysis/student_analysis.py | 5 +- script/base/dataframe_base.py | 153 +++++++++++++++------------- script/utils/situations.py | 5 + 3 files changed, 90 insertions(+), 73 deletions(-) diff --git a/script/analysis/student_analysis.py b/script/analysis/student_analysis.py index cf28afb..eb2fb11 100644 --- a/script/analysis/student_analysis.py +++ b/script/analysis/student_analysis.py @@ -68,10 +68,11 @@ def ira_por_quantidade_disciplinas(df): situacao = int(df["SITUACAO"][i]) nota = float(df["MEDIA_FINAL"][i]) carga = float(df["CH_TOTAL"][i]) - media_credito = int(df["MEDIA_CREDITO"][i]) + #media_credito = int(df["MEDIA_CREDITO"][i]) - if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0): + #if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0): + if (situacao in Situation.SITUATION_AFFECT_IRA): if not (ano + "/" + semestre in students[matr]): students[matr][ano + "/" + semestre] = [0, 0, 0] diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py index 341d958..3c1e2c9 100644 --- a/script/base/dataframe_base.py +++ b/script/base/dataframe_base.py @@ -8,102 +8,113 @@ from json import load as json_load from utils.situations import * + class DataframeHolder: - def __init__(self, dataframe): - self.students = dataframe.groupby('MATR_ALUNO') - self.courses = dataframe.groupby('COD_ATIV_CURRIC') - self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) + def __init__(self, dataframe): + self.students = dataframe.groupby('MATR_ALUNO') + self.courses = dataframe.groupby('COD_ATIV_CURRIC') + self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) def load_dataframes(cwd='.'): - dataframes = [] - for path, dirs, files in os.walk(cwd): - for f in files: - file_path = path + '/' + f - dh = {'name': f, 'dataframe': None} - if 'csv' in f: - dh['dataframe'] = read_csv(file_path) - if 'xls' in f: - dh['dataframe'] = read_excel(file_path) - - if dh['dataframe'] is not None: - dataframes.append(dh) - - dataframe = fix_dataframes(dataframes) - - dh = DataframeHolder(dataframe) - #~ dh.students.aggregate(teste) -# print(dh.students['MEDIA_FINAL'].aggregate(teste)) - return dataframe + dataframes = [] + for path, dirs, files in os.walk(cwd): + for f in files: + file_path = path + '/' + f + dh = {'name': f, 'dataframe': None} + if 'csv' in f: + dh['dataframe'] = read_csv(file_path) + if 'xls' in f: + dh['dataframe'] = read_excel(file_path) + + if dh['dataframe'] is not None: + dataframes.append(dh) + + dataframe = fix_dataframes(dataframes) + print(list(dataframe)) + dh = DataframeHolder(dataframe) + #~ dh.students.aggregate(teste) +# print(dh.students['MEDIA_FINAL'].aggregate(teste)) + return dataframe def read_excel(path, planilha='Planilha1'): - return pd.read_excel(path) + return pd.read_excel(path) def read_csv(path): - return pd.read_csv(path) + return pd.read_csv(path) def fix_dataframes(dataframes): - for df in dataframes: - if df['name'] == 'historico.xls': - history = df['dataframe'] - if df['name'] == 'matricula.xls': - register = df['dataframe'] - - clean_history(history) - clean_register(register) - - merged = pd.merge(history, register, how='right', on=['MATR_ALUNO']) - #~ print(merged) - fix_situation(merged) -# fix_admission(merged) - fix_evasion(merged) - - return merged + for df in dataframes: + if df['name'] == 'historico.xls' or df['name'] == 'historico.csv': + history = df['dataframe'] + if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv': + register = df['dataframe'] + + #~ clean_history(history) + clean_register(register) + #~ df.dropna(axis=0, how='all') + history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce') + history = history[np.isfinite(history['MEDIA_FINAL'])] + + + merged = pd.merge(history, register, how='outer', on=['MATR_ALUNO']) + merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"}) + + fix_situation(merged) + fix_admission(merged) + fix_evasion(merged) + fix_carga(merged) + + + return merged def clean_history(df): - df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', - 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', - 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' - ], axis=1, inplace=True) - df['PERIODO'] = df['PERIODO'].str.split('o').str[0] + df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', + 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', + 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' + ], axis=1, inplace=True) + df['PERIODO'] = df['PERIODO'].str.split('o').str[0] def clean_register(df): - df_split = df['PERIODO_INGRESSO'].str.split('/') - df['ANO_INGRESSO'] = df_split.str[0] - df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] - df_split = df['PERIODO_EVASAO'].str.split('/') - df['ANO_EVASAO'] = df_split.str[0] - df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] + df_split = df['PERIODO_INGRESSO'].str.split('/') + df['ANO_INGRESSO'] = df_split.str[0] + df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] + df_split = df['PERIODO_EVASAO'].str.split('/') + df['ANO_EVASAO'] = df_split.str[0] + df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] - df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', - 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', - ],axis=1, inplace=True) + df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', + 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', + ],axis=1, inplace=True) def fix_situation(df): - for situation in Situation.SITUATIONS: - df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] + for situation in Situation.SITUATIONS: + df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] def fix_admission(df): - for adm in AdmissionType.ADMISSION_FORM: - df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] + for adm in AdmissionType.ADMISSION_FORM: + df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] +def fix_carga(df): + df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"] + def fix_evasion(df): - evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] - df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 - for evasion in EvasionForm.EVASION_FORM: - #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] - df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] - - #~ if(evasion[0] == 100): - #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): - #~ if(x != 0.0): - #~ print(x) - #~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) - #~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) + evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] + df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 + for evasion in EvasionForm.EVASION_FORM: + #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] + df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] + + #~ if(evasion[0] == 100): + #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): + #~ if(x != 0.0): + #~ print(x) + #~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) + #~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) diff --git a/script/utils/situations.py b/script/utils/situations.py index e62c509..96ab5a0 100644 --- a/script/utils/situations.py +++ b/script/utils/situations.py @@ -89,6 +89,8 @@ class Situation: SIT_APROV_ADIANTAMENTO = 15 SIT_INCOMPLETO = 16 + + SIT_REPROVADO_ADIAN = 17 SIT_OUTROS = 100 @@ -98,6 +100,7 @@ class Situation: (SIT_REPROVADO, 'Reprovado por nota'), (SIT_MATRICULA, 'MatrÃcula'), (SIT_REPROVADO_FREQ, 'Reprovado por Frequência'), + (SIT_REPROVADO_ADIAN, 'Reprov Adiantamento'), (SIT_EQUIVALENCIA, 'Equivalência de Disciplina'), (SIT_CANCELADO, 'Cancelado'), @@ -123,6 +126,7 @@ class Situation: SIT_REPROVADO_FREQ, SIT_DISPENSA_COM_NOTA, SIT_CONHECIMENTO_APROVADO, + SIT_REPROVADO_ADIAN, SIT_CONHECIMENTO_REPROVADO ) @@ -135,6 +139,7 @@ class Situation: SITUATION_FAIL = ( SIT_REPROVADO, SIT_REPROVADO_FREQ, + SIT_REPROVADO_ADIAN, SIT_CONHECIMENTO_REPROVADO ) -- GitLab