Skip to content
Snippets Groups Projects
Commit 42ddefb1 authored by Odair Mario's avatar Odair Mario
Browse files

merge back-to- origins, students, course, degree

parent 7a2784b4
No related branches found
No related tags found
1 merge request!1WIP: Development
...@@ -10,125 +10,110 @@ from utils.situations import * ...@@ -10,125 +10,110 @@ from utils.situations import *
class DataframeHolder: class DataframeHolder:
def __init__(self, dataframe): def __init__(self, dataframe):
self.students = dataframe.groupby('MATR_ALUNO') self.students = dataframe.groupby('MATR_ALUNO')
self.courses = dataframe.groupby('COD_ATIV_CURRIC') self.courses = dataframe.groupby('COD_ATIV_CURRIC')
self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])
def load_dataframes(cwd='.'): def load_dataframes(cwd='.'):
dataframes = [] dataframes = []
for path, dirs, files in os.walk(cwd): for path, dirs, files in os.walk(cwd):
for f in files: for f in files:
file_path = path + '/' + f file_path = path + '/' + f
dh = {'name': f, 'dataframe': None} dh = {'name': f, 'dataframe': None}
if 'csv' in f: if 'csv' in f:
dh['dataframe'] = read_csv(file_path) dh['dataframe'] = read_csv(file_path)
if 'xls' in f: if 'xls' in f:
dh['dataframe'] = read_excel(file_path) dh['dataframe'] = read_excel(file_path)
if dh['dataframe'] is not None: if dh['dataframe'] is not None:
dataframes.append(dh) dataframes.append(dh)
dataframe = fix_dataframes(dataframes) dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe) dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste) #~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste)) # print(dh.students['MEDIA_FINAL'].aggregate(teste))
return dataframe return dataframe
def read_excel(path, planilha='Planilha1'): def read_excel(path, planilha='Planilha1'):
return pd.read_excel(path) return pd.read_excel(path)
def read_csv(path): def read_csv(path):
return pd.read_csv(path) return pd.read_csv(path)
def fix_dataframes(dataframes): def fix_dataframes(dataframes):
for df in dataframes: for df in dataframes:
if df['name'] == 'historico.xls' or df['name'] == 'historico.csv': if df['name'] == 'historico.xls' or df['name'] == 'historico.csv':
history = df['dataframe'] history = df['dataframe']
history.rename(columns={'DESCR_SITUACAO': 'SITUACAO'}, inplace=True) history.rename(columns={'DESCR_SITUACAO': 'SITUACAO'}, inplace=True)
if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv': if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv':
register = df['dataframe'] register = df['dataframe']
#~ clean_history(history) #~ clean_history(history)
clean_register(register) clean_register(register)
#~ df.dropna(axis=0, how='all') #~ df.dropna(axis=0, how='all')
history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce') history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
history = history[np.isfinite(history['MEDIA_FINAL'])] history = history[np.isfinite(history['MEDIA_FINAL'])]
merged = pd.merge(history, register, how='outer', on=['MATR_ALUNO']) merged = pd.merge(history, register, how='outer', on=['MATR_ALUNO'])
merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"}) merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"})
fix_situation(merged) fix_situation(merged)
fix_admission(merged) fix_admission(merged)
fix_evasion(merged) fix_evasion(merged)
fix_carga(merged) fix_carga(merged)
return merged return merged
def clean_history(df): def clean_history(df):
<<<<<<< HEAD
df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR'
], axis=1, inplace=True)
df['PERIODO'] = df['PERIODO'].str.split('o').str[0]
=======
df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR', 'NUM_VERSAO' 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR', 'NUM_VERSAO'
], axis=1, inplace=True) ], axis=1, inplace=True)
df['PERIODO'] = df['PERIODO'].str.split('o').str[0] df['PERIODO'] = df['PERIODO'].str.split('o').str[0]
>>>>>>> origin/degree
def clean_register(df): def clean_register(df):
df_split = df['PERIODO_INGRESSO'].str.split('/') df_split = df['PERIODO_INGRESSO'].str.split('/')
df['ANO_INGRESSO'] = df_split.str[0] df['ANO_INGRESSO'] = df_split.str[0]
df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0]
df_split = df['PERIODO_EVASAO'].str.split('/') df_split = df['PERIODO_EVASAO'].str.split('/')
df['ANO_EVASAO'] = df_split.str[0] df['ANO_EVASAO'] = df_split.str[0]
df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0]
<<<<<<< HEAD
df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE','COD_CURSO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO'],axis=1, inplace=True)
'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO',
],axis=1, inplace=True)
=======
df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE',
'COD_CURSO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO',
],axis=1, inplace=True)
>>>>>>> origin/degree
def fix_situation(df): def fix_situation(df):
for situation in Situation.SITUATIONS: for situation in Situation.SITUATIONS:
df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0]
def fix_admission(df): def fix_admission(df):
for adm in AdmissionType.ADMISSION_FORM: for adm in AdmissionType.ADMISSION_FORM:
df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0]
def fix_carga(df): def fix_carga(df):
df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"] df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"]
def fix_evasion(df): def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM: for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100): #~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0): #~ if(x != 0.0):
#~ print(x) #~ print(x)
#~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) #~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
#~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) #~ print(df[['MATR_ALUNO','FORMA_EVASAO']])
...@@ -3,15 +3,9 @@ import time ...@@ -3,15 +3,9 @@ import time
from base.dataframe_base import load_dataframes from base.dataframe_base import load_dataframes
from build_cache import build_cache from build_cache import build_cache
from datetime import timedelta from datetime import timedelta
<<<<<<< HEAD
from analysis.degree_analysis import * from analysis.degree_analysis import *
=======
from utils.situations import * from utils.situations import *
from analysis.course_analysis import * from analysis.course_analysis import *
>>>>>>> origin/course
def main(): def main():
start_time = time.clock() start_time = time.clock()
start_time_exec = time.time() start_time_exec = time.time()
...@@ -19,7 +13,7 @@ def main(): ...@@ -19,7 +13,7 @@ def main():
dataframe = load_dataframes(os.getcwd() + '/' + 'base') dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(dataframe) build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time)) cpu_time = timedelta(seconds=round(time.clock() - start_time))
Main(dataframe) analises_disciplinas(dataframe)
run_time = timedelta(seconds=round(time.time() - start_time_exec)) run_time = timedelta(seconds=round(time.time() - start_time_exec))
print("--- Tempo de CPU: {} ---".format(cpu_time)) print("--- Tempo de CPU: {} ---".format(cpu_time))
print("--- Tempo total: {} ---".format(run_time)) print("--- Tempo total: {} ---".format(run_time))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment