Commit 1be02748 authored by Your Name's avatar Your Name

WIP

parent 12b12b14
Pipeline #14877 failed with stage
in 18 seconds
import numpy as np
import math
from utils.situations import *
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
# ++++++ Funcoes auxiliares +++++++
def weighted_avg_and_std(values, weights):
average = np.average(values, weights=weights)
# Fast and numerically precise:
variance = np.average((values-average)**2, weights=weights)
return math.sqrt(variance)
def listagem_turma_ingresso(df):
#~ print(df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups)
# print(df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups)
grupos = df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups
for t in grupos:
print(t)
print("\n\n")
print(df["FORMA_INGRESSO"][grupos[t]].drop_duplicates())
# +++++++++++++++++++++++++++++++++++
def calcular_ira_medio(df):
ira_medio_turmaIngresso = {}
grupos = df.loc[ df["SITUACAO"].isin(Situation.SITUATION_AFFECT_IRA) ].groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups
for t in grupos:
#print(t)
#print( df["NOME_PESSOA"][grupos[t]].drop_duplicates().count() );
ira_medio_turmaIngresso[ t ] = np.average( df["MEDIA_FINAL"][ grupos[t] ], weights=df["TOTAL_CARGA_HORARIA"][ grupos[t] ] )
print(ira_medio_turmaIngresso)
return ira_medio_turmaIngresso
def calcular_ira_medio_desvio_padrao(df):
dp_turmaIngresso = {}
grupos = df.loc[ df["SITUACAO"].isin(Situation.SITUATION_AFFECT_IRA) ].groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups
for t in grupos:
dp_turmaIngresso[ t ] = weighted_avg_and_std( df["MEDIA_FINAL"][ grupos[t] ], weights=df["TOTAL_CARGA_HORARIA"][ grupos[t] ] );
print(dp_turmaIngresso)
return dp_turmaIngresso
def calcular_ira_semestre(df, ano, periodo):
grupos = df.loc[ (df["SITUACAO"].isin(Situation.SITUATION_AFFECT_IRA))&(df["ANO"] == ano)&(df["PERIODO"] == periodo) ].groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups
print(grupos)
#return ira_semestre
# NAO ENTENDI
def calcular_ira_semestre_desvio_padrao(media, turma_ingresso, ano, semestre, qtd_semestres):
pass
A.D.E.G.A-Relatorios @ 17de5e30
Subproject commit 17de5e30f1671cfee6001740b0d1b8dcf5a9a804
#!/usr/bin/env python3
import pandas as pd
import numpy as np
from random import choice, randrange, seed
from glob import glob
matriculas_semestrais = glob('1102049907_*.xls')
display_grrs = ('GRR20132413', 'GRR20123145', 'GRR20135427', 'GRR20132356', 'GRR20073965', 'GRR20142054')
seed(2315484613)
with open('nomes/f.txt') as fi:
f = list(map(str.strip, fi.readlines()))
with open('nomes/m.txt') as fi:
m = list(map(str.strip, fi.readlines()))
with open('nomes/s.txt') as fi:
s = list(map(str.strip, fi.readlines()))
def gera_nome(sexo):
return "{} {} {}".format(choice(m) if sexo == 'M' else choice(f),
choice(s), choice(s))
def gera_grr(grrs):
gerados = set()
result = []
for grr in grrs:
e = 'GRR{}{:0>4}'.format(grr[3:7], randrange(10000))
while e in gerados:
e = 'GRR{}{:0>4}'.format(grr[3:7], randrange(10000))
result.append(e)
gerados.add(e)
return result
hist = pd.read_excel('1102059918.xls', 'Sheet1')
matr = pd.read_excel('1102049901.xls', 'Sheet1')
print('historico lines:', hist.shape[0])
print('matricula lines:', matr.shape[0])
def change_dt(s):
return '01/01/{}'.format(s.split('/')[2])
vhist = hist.MATR_ALUNO.drop_duplicates()
vmatr = matr.MATR_ALUNO.drop_duplicates()
print('historico alunos:', vhist.shape[0])
print('matricula alunos:', vmatr.shape[0])
validos = set(vhist) & set(vmatr)
print('validos:',len(validos))
hist = hist[hist.MATR_ALUNO.isin(validos)]
matr = matr[matr.MATR_ALUNO.isin(validos)]
print('historico lines:', hist.shape[0])
mapper = matr[['MATR_ALUNO', 'NOME_PESSOA', 'DT_NASCIMENTO']].copy()
mapper['FAKE_GRR'] = gera_grr(mapper.MATR_ALUNO)
mapper['FAKE_NOME'] = matr.SEXO.apply(gera_nome)
matr['MATR_ALUNO'] = mapper['FAKE_GRR']
matr['NOME_PESSOA'] = mapper['FAKE_NOME']
matr['DT_NASCIMENTO'] = matr.DT_NASCIMENTO.apply(change_dt)
m_grr = dict(zip(mapper.MATR_ALUNO, mapper.FAKE_GRR))
m_nome = dict(zip(mapper.MATR_ALUNO, mapper.FAKE_NOME))
hist['NOME_PESSOA'] = hist.MATR_ALUNO.map(m_nome)
hist['MATR_ALUNO'] = hist.MATR_ALUNO.map(m_grr)
c = ['MATR_ALUNO', 'NOME_PESSOA', 'FAKE_GRR']
for _ , v in mapper.loc[mapper.MATR_ALUNO.isin(display_grrs), c].iterrows():
print('{:>10}\t{:38}\t{:>10}'.format(*list(v)))
hist.sort_values('MATR_ALUNO').to_excel('historico.xls', 'Sheet1', index=False)
matr.sort_values('MATR_ALUNO').to_excel('matricula.xls', 'Sheet1', index=False)
mapper.to_excel('mapper.xls', 'Sheet1')
for rel in matriculas_semestrais:
r = pd.read_excel(rel, 'Sheet1')
r = r[r.MATR_ALUNO.isin(validos)]
r['NOME_ALUNO'] = r.MATR_ALUNO.map(m_nome)
r['MATR_ALUNO'] = r.MATR_ALUNO.map(m_grr)
r['DT_NASCIMENTO'] = r.DT_NASCIMENTO.apply(change_dt)
r.sort_values('MATR_ALUNO').to_excel('_'+rel, 'Sheet1', index=False)
......@@ -6,15 +6,20 @@ from datetime import timedelta
from analysis.degree_analysis import *
from utils.situations import *
from analysis.course_analysis import *
from analysis.admission_analysis import *
def main():
start_time = time.clock()
start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(dataframe)
#build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
analises_disciplinas(dataframe)
run_time = timedelta(seconds=round(time.time() - start_time_exec))
#calcular_ira_medio(dataframe)
#calcular_ira_medio_desvio_padrao(dataframe)
calcular_ira_semestre(dataframe, 2017, 2)
run_time = timedelta(seconds=round(time.time() - start_time_exec))
print("--- Tempo de CPU: {} ---".format(cpu_time))
print("--- Tempo total: {} ---".format(run_time))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment