diff --git a/script/analysis/course_analysis.py b/script/analysis/course_analysis.py index a0411b79415c5b031c285a9f43b99a42351f9019..c52dec75344364d75c50e753d1ab29eadbc2696e 100644 --- a/script/analysis/course_analysis.py +++ b/script/analysis/course_analysis.py @@ -3,6 +3,8 @@ import pandas as pd import json import numpy as np import utils.situations +from collections import OrderedDict, defaultdict + # df = pd.read_excel("../base/base-2016-1/historico.xls") # imprime completamente um dataframe @@ -17,7 +19,7 @@ def print_analise(d): def func(x, matr): c = matr[x['COD_ATIV_CURRIC']].values[0] - return (x['counts'] / c) + return (x['Quantidade'] / c) # quantidade de matriculas @@ -30,15 +32,18 @@ def analysis(df): qnt_matr = counts_matr(df) # quantidade de matriculas disciplina # conta quantas vezes os valores de 'SIGLA' se repete para cada disciplina disciplinas = df.groupby(['COD_ATIV_CURRIC', 'SIGLA'] - ).size().reset_index(name='counts') + ).size().reset_index(name='Quantidade') # adiciona mais uma coluna ao df disciplina com as taxas de cada valor de 'SIGLA' - disciplina = disciplinas.groupby(['COD_ATIV_CURRIC', 'SIGLA', 'counts']).apply( - lambda x: func(x, qnt_matr)).reset_index(name='taxas gerais') - # print(disciplina) - return disciplina + disciplina = disciplinas.groupby(['COD_ATIV_CURRIC', 'SIGLA', 'Quantidade']).apply( + lambda x: func(x, qnt_matr)).reset_index(name='Taxas gerais') + disciplina = disciplina.drop('level_3',1) + for dis in qnt_matr.keys(): + disc = disciplina.loc[disciplina['COD_ATIV_CURRIC']==dis].drop('COD_ATIV_CURRIC',1) + disc = disc.set_index('SIGLA').to_dict(into=OrderedDict) + with open(dis+'.json','w') as f: + json.dump(disc,f,indent=4) + return disciplina.set_index('COD_ATIV_CURRIC') # quantidade de vezes cursadas ate obter a aprovacao - - def qnt_aprov(df): qnt = df.groupby(['MATR_ALUNO', 'COD_ATIV_CURRIC'] ).size().reset_index(name='qnt_aprov') @@ -77,9 +82,10 @@ def analysis_semestre(df): def Main(df): Analysis = analysis(df) Analysis_semestre = analysis_semestre(df) + # print_analise(Analysis) matr = counts_matr(df) matr_semes = matr_semestre(df) - print_analise(merged) + # print_analise(merged) # main() # matr = counts_matr(df) diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py index 56d1bf01b269d2ddf7f6741f5ad5ed21fd5160b2..a0a179e40f4a775f0926ee946c9edde56e7bdd34 100644 --- a/script/base/dataframe_base.py +++ b/script/base/dataframe_base.py @@ -10,7 +10,7 @@ from utils.situations import * def load_dataframes(cwd='.'): dataframes = [] - for path, dirs, files in os.walk(cwd): + for path, dirs, files in os.walk(cwd): for f in files: file_path = path + '/' + f dh = {'name': f, 'dataframe': None} @@ -84,5 +84,6 @@ def fix_admission(df): def fix_evasion(df): - for evasion in EvasionForm.EVASION_FORM: - df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False), 'FORMA_EVASAO'] = evasion[0] + # for evasion in EvasionForm.EVASION_FORM: + # df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False), 'FORMA_EVASAO'] = evasion[0] + pass diff --git a/script/main.py b/script/main.py index addbd1055924267ce8aae824d9f4d9583015acf2..ce4b7a63889e0ec6e7eba64f2f218f93535dbfbe 100644 --- a/script/main.py +++ b/script/main.py @@ -4,7 +4,7 @@ from base.dataframe_base import load_dataframes from build_cache import build_cache from datetime import timedelta from utils.situations import * - +from analysis.course_analysis import * def main(): start_time = time.clock() start_time_exec = time.time() @@ -12,7 +12,7 @@ def main(): dataframe = load_dataframes(os.getcwd() + '/' + 'base') build_cache(dataframe) - + Main(dataframe) cpu_time = timedelta(seconds=round(time.clock() - start_time)) run_time = timedelta(seconds=round(time.time() - start_time_exec)) print("--- Tempo de CPU: {} ---".format(cpu_time))