Skip to content
Snippets Groups Projects
dataframe_base.py 4.27 KiB
Newer Older
Jomaro Rodrigues's avatar
Jomaro Rodrigues committed

import os
import pandas as pd
import numpy as np
from script.utils.situations import *


class DataframeHolder:
bhm15's avatar
bhm15 committed
        def __init__(self, dataframe):
                self.students = dataframe.groupby('MATR_ALUNO')
                self.courses = dataframe.groupby('COD_ATIV_CURRIC')
                self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])


def load_dataframes(cwd='.'):
bhm15's avatar
bhm15 committed
        dataframes = []
        for path, dirs, files in os.walk(cwd):
                for f in files:
                        file_path = path + '/' + f
                        dh = {'name': f, 'dataframe': None}
                        if 'csv' in f:
                                dh['dataframe'] = read_csv(file_path)
                        if 'xls' in f:
                                dh['dataframe'] = read_excel(file_path)

                        if dh['dataframe'] is not None:
                                dataframes.append(dh)

        dataframe = fix_dataframes(dataframes)
        dh = DataframeHolder(dataframe)
        #~ dh.students.aggregate(teste)
#       print(dh.students['MEDIA_FINAL'].aggregate(teste))
        return dataframe


def read_excel(path, planilha='Planilha1'):
bhm15's avatar
bhm15 committed
        return pd.read_excel(path)
bhm15's avatar
bhm15 committed
        return pd.read_csv(path)


def fix_dataframes(dataframes):
bhm15's avatar
bhm15 committed
        for df in dataframes:
                if df['name'] == 'historico.xls' or df['name'] == 'historico.csv':
                        history = df['dataframe']
                        history.rename(columns={'DESCR_SITUACAO': 'SITUACAO'}, inplace=True)
                if df['name'] == 'matricula.xls'  or df['name'] == 'matricula.csv':
                        register = df['dataframe']

        #~ clean_history(history)
        clean_register(register)
        #~ df.dropna(axis=0, how='all')
        history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
        history = history[np.isfinite(history['MEDIA_FINAL'])]
bhm15's avatar
bhm15 committed
        merged = pd.merge(history, register, how='outer', on=['MATR_ALUNO'])
        merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"})
bhm15's avatar
bhm15 committed
        fix_situation(merged)
        fix_admission(merged)
        fix_evasion(merged)
        fix_carga(merged)

        return merged
    df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
             'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
bhm15's avatar
bhm15 committed
             'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR', 'NUM_VERSAO'
            ], axis=1, inplace=True)
    df['PERIODO'] = df['PERIODO'].str.split('o').str[0]

Jomaro Rodrigues's avatar
Jomaro Rodrigues committed

def clean_register(df):
bhm15's avatar
bhm15 committed
        df_split = df['PERIODO_INGRESSO'].str.split('/')
        df['ANO_INGRESSO'] = df_split.str[0]
        df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0]
        df_split = df['PERIODO_EVASAO'].str.split('/')
        df['ANO_EVASAO'] = df_split.str[0]
        df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0]
bhm15's avatar
bhm15 committed
        df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE','COD_CURSO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO'],axis=1, inplace=True)
bhm15's avatar
bhm15 committed
        for situation in Situation.SITUATIONS:
                df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0]
bhm15's avatar
bhm15 committed
        for adm in AdmissionType.ADMISSION_FORM:
                df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0]

bhm15's avatar
bhm15 committed
def fix_carga(df):
        df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"]
bhm15's avatar
bhm15 committed
        evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
        df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
        for evasion in EvasionForm.EVASION_FORM:
                #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
                df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]

                #~ if(evasion[0] == 100):
                        #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
                                #~ if(x != 0.0):
                                        #~ print(x)
        #~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
        #~ print(df[['MATR_ALUNO','FORMA_EVASAO']])