### WIP

parent 12b12b14
Pipeline #14877 failed with stage
in 18 seconds
 import numpy as np import math from utils.situations import * ANO_ATUAL = 2017 SEMESTRE_ATUAL = 2 # ++++++ Funcoes auxiliares +++++++ def weighted_avg_and_std(values, weights): average = np.average(values, weights=weights) # Fast and numerically precise: variance = np.average((values-average)**2, weights=weights) return math.sqrt(variance) def listagem_turma_ingresso(df): #~ print(df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups) # print(df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups) grupos = df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups for t in grupos: print(t) print("\n\n") print(df["FORMA_INGRESSO"][grupos[t]].drop_duplicates()) # +++++++++++++++++++++++++++++++++++ def calcular_ira_medio(df): ira_medio_turmaIngresso = {} grupos = df.loc[ df["SITUACAO"].isin(Situation.SITUATION_AFFECT_IRA) ].groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups for t in grupos: #print(t) #print( df["NOME_PESSOA"][grupos[t]].drop_duplicates().count() ); ira_medio_turmaIngresso[ t ] = np.average( df["MEDIA_FINAL"][ grupos[t] ], weights=df["TOTAL_CARGA_HORARIA"][ grupos[t] ] ) print(ira_medio_turmaIngresso) return ira_medio_turmaIngresso def calcular_ira_medio_desvio_padrao(df): dp_turmaIngresso = {} grupos = df.loc[ df["SITUACAO"].isin(Situation.SITUATION_AFFECT_IRA) ].groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups for t in grupos: dp_turmaIngresso[ t ] = weighted_avg_and_std( df["MEDIA_FINAL"][ grupos[t] ], weights=df["TOTAL_CARGA_HORARIA"][ grupos[t] ] ); print(dp_turmaIngresso) return dp_turmaIngresso def calcular_ira_semestre(df, ano, periodo): grupos = df.loc[ (df["SITUACAO"].isin(Situation.SITUATION_AFFECT_IRA))&(df["ANO"] == ano)&(df["PERIODO"] == periodo) ].groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups print(grupos) #return ira_semestre # NAO ENTENDI def calcular_ira_semestre_desvio_padrao(media, turma_ingresso, ano, semestre, qtd_semestres): pass
A.D.E.G.A-Relatorios @ 17de5e30
 Subproject commit 17de5e30f1671cfee6001740b0d1b8dcf5a9a804
 #!/usr/bin/env python3 import pandas as pd import numpy as np from random import choice, randrange, seed from glob import glob matriculas_semestrais = glob('1102049907_*.xls') display_grrs = ('GRR20132413', 'GRR20123145', 'GRR20135427', 'GRR20132356', 'GRR20073965', 'GRR20142054') seed(2315484613) with open('nomes/f.txt') as fi: f = list(map(str.strip, fi.readlines())) with open('nomes/m.txt') as fi: m = list(map(str.strip, fi.readlines())) with open('nomes/s.txt') as fi: s = list(map(str.strip, fi.readlines())) def gera_nome(sexo): return "{} {} {}".format(choice(m) if sexo == 'M' else choice(f), choice(s), choice(s)) def gera_grr(grrs): gerados = set() result = [] for grr in grrs: e = 'GRR{}{:0>4}'.format(grr[3:7], randrange(10000)) while e in gerados: e = 'GRR{}{:0>4}'.format(grr[3:7], randrange(10000)) result.append(e) gerados.add(e) return result hist = pd.read_excel('1102059918.xls', 'Sheet1') matr = pd.read_excel('1102049901.xls', 'Sheet1') print('historico lines:', hist.shape[0]) print('matricula lines:', matr.shape[0]) def change_dt(s): return '01/01/{}'.format(s.split('/')[2]) vhist = hist.MATR_ALUNO.drop_duplicates() vmatr = matr.MATR_ALUNO.drop_duplicates() print('historico alunos:', vhist.shape[0]) print('matricula alunos:', vmatr.shape[0]) validos = set(vhist) & set(vmatr) print('validos:',len(validos)) hist = hist[hist.MATR_ALUNO.isin(validos)] matr = matr[matr.MATR_ALUNO.isin(validos)] print('historico lines:', hist.shape[0]) mapper = matr[['MATR_ALUNO', 'NOME_PESSOA', 'DT_NASCIMENTO']].copy() mapper['FAKE_GRR'] = gera_grr(mapper.MATR_ALUNO) mapper['FAKE_NOME'] = matr.SEXO.apply(gera_nome) matr['MATR_ALUNO'] = mapper['FAKE_GRR'] matr['NOME_PESSOA'] = mapper['FAKE_NOME'] matr['DT_NASCIMENTO'] = matr.DT_NASCIMENTO.apply(change_dt) m_grr = dict(zip(mapper.MATR_ALUNO, mapper.FAKE_GRR)) m_nome = dict(zip(mapper.MATR_ALUNO, mapper.FAKE_NOME)) hist['NOME_PESSOA'] = hist.MATR_ALUNO.map(m_nome) hist['MATR_ALUNO'] = hist.MATR_ALUNO.map(m_grr) c = ['MATR_ALUNO', 'NOME_PESSOA', 'FAKE_GRR'] for _ , v in mapper.loc[mapper.MATR_ALUNO.isin(display_grrs), c].iterrows(): print('{:>10}\t{:38}\t{:>10}'.format(*list(v))) hist.sort_values('MATR_ALUNO').to_excel('historico.xls', 'Sheet1', index=False) matr.sort_values('MATR_ALUNO').to_excel('matricula.xls', 'Sheet1', index=False) mapper.to_excel('mapper.xls', 'Sheet1') for rel in matriculas_semestrais: r = pd.read_excel(rel, 'Sheet1') r = r[r.MATR_ALUNO.isin(validos)] r['NOME_ALUNO'] = r.MATR_ALUNO.map(m_nome) r['MATR_ALUNO'] = r.MATR_ALUNO.map(m_grr) r['DT_NASCIMENTO'] = r.DT_NASCIMENTO.apply(change_dt) r.sort_values('MATR_ALUNO').to_excel('_'+rel, 'Sheet1', index=False)
 ... ... @@ -6,15 +6,20 @@ from datetime import timedelta from analysis.degree_analysis import * from utils.situations import * from analysis.course_analysis import * from analysis.admission_analysis import * def main(): start_time = time.clock() start_time_exec = time.time() dataframe = load_dataframes(os.getcwd() + '/' + 'base') build_cache(dataframe) #build_cache(dataframe) cpu_time = timedelta(seconds=round(time.clock() - start_time)) analises_disciplinas(dataframe) run_time = timedelta(seconds=round(time.time() - start_time_exec)) #calcular_ira_medio(dataframe) #calcular_ira_medio_desvio_padrao(dataframe) calcular_ira_semestre(dataframe, 2017, 2) run_time = timedelta(seconds=round(time.time() - start_time_exec)) print("--- Tempo de CPU: {} ---".format(cpu_time)) print("--- Tempo total: {} ---".format(run_time)) ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!