Commit ff8b2049 authored by Odair M.'s avatar Odair M. Committed by Bruno Meyer
Browse files

Closes #151 : analises de taxas da turma ingresso

parent 711ae614
......@@ -5,7 +5,7 @@
{% block content %}
<div class="row">
<div class="col-md-12">
<h1>Informações Gerais</h1>
<h1>Informações Gerais - Turma Ingresso</h1>
<table class="table">
<tr>
<td>Ano/Semestre</td>
......@@ -13,23 +13,23 @@
</tr>
<tr>
<td>Quantidade de alunos</td>
<td> NO </td>
<td> {{admission_info.quantidade_alunos}} </td>
</tr>
<tr>
<td>Ira Médio</td>
<td>IRA Médio</td>
<td> {{admission_info.ira|floatformat:2}} &plusmn {{admission_info.std|floatformat:2}} </td>
</tr>
<tr>
<td>Tempo médio de formatura em anos</td>
<td> NO </td>
<td>Tempo médio de formatura em anos <b>atual</b></td>
<td> {{admission_info.formatura_media}} </td>
</tr>
<tr>
<td>Indice de Reprovação</td>
<td> NO </td>
<td>Índice de Reprovação</td>
<td> {{admission_info.taxa_reprovacao|floatformat:2}} </td>
</tr>
<tr>
<td>Indice de Evasão</td>
<td> NO </td>
<td> {{admission_info.taxa_evasao|floatformat:2}} </td>
</tr>
<tr>
<td>Quantidade de Evadidos</td>
......
......@@ -29,7 +29,7 @@
</a>
</td>
<td>{{ti.ira|floatformat:2}} &plusmn {{ti.std|floatformat:2}}</td>
<td> NO </td>
<td> {{ti.quantidade_alunos}} </td>
<td> {{ti.alunos_evadidos}} </td>
<td> {{ti.formatura}} </td>
<td> {{ti.ativos}} </td>
......
......@@ -21,7 +21,7 @@ def detail(request, submission_id, ano, semestre):
if(admission["ano"] == ano and admission["semestre"] == semestre):
admission_info = admission
break
admission_detail = get_admission_detail(
request.session,
degree,
......@@ -32,6 +32,9 @@ def detail(request, submission_id, ano, semestre):
for x in admission_detail:
admission_info[x] = admission_detail[x]
if(admission_info["formatura_media"] == -1):
admission_info["formatura_media"] = "Não há alunos formados nesta turma"
return render(request, 'admission/detail.html',{
"degree": degree,
"admission_info": admission_info,
......
......@@ -3,6 +3,8 @@ import numpy as np
from script.utils.situations import Situation as sit
from script.utils.situations import EvasionForm as ef
from script.utils.situations import *
from script.utils.situations import Situation as sit
from script.utils.situations import EvasionForm as ef
from script.analysis.student_analysis import StudentAnalysis
from collections import defaultdict
......@@ -16,41 +18,121 @@ class Admission(object):
__dataframes = {}
__groupbys = {}
analysis = {}
__counts = False # se as quantidades já foram calculadas
def __init__(self, df):
self.__dataframes["df_original"] = df
self.__dataframes["df_filted"] = df.drop_duplicates(["MATR_ALUNO"])
self.__groupbys["groupby_original"] = df.groupby(['ANO_INGRESSO_y', 'SEMESTRE_INGRESSO'])
self.__groupbys["groupby_filted"] = self.__dataframes["df_filted"].groupby(['ANO_INGRESSO_y', 'SEMESTRE_INGRESSO'])
#print(self.__dataframes["df_filted"])
def count_evasion_form(self,g,evasion_form):
def count_evasion_form(self, g, evasion_form):
return g.apply(lambda x: x.loc[(x.FORMA_EVASAO == evasion_form)].shape[0])
def counts(self):
"""
Calcula as seguintes quantidades referente a turma ingresso:
* Quantidade de alunos ativos
* Quantidade de alunos de cada turma ingresso
* Quantidade de alunos que abandonaram o curso
* Quantidade de alunos que se formaram
* Quantidade de alunos que evadiram
* Quantidade de alunos que sairam do curso por outros motivos
dos já citados
"""
qtd_alunos_ingresso = self.__groupbys["groupby_filted"].apply(lambda x: x.shape[0])
self.analysis["qtd_alunos_ingresso"] = qtd_alunos_ingresso
evasions = [
("qtd_ativos",ef.EF_ATIVO),
("qtd_abandono",ef.EF_ABANDONO),
("qtd_formatura",ef.EF_FORMATURA),
("qtd_ativos",ef.EF_ATIVO)
]
("qtd_ativos", ef.EF_ATIVO),
("qtd_abandono", ef.EF_ABANDONO),
("qtd_formatura", ef.EF_FORMATURA),
("qtd_ativos", ef.EF_ATIVO)
]
# calcula a quantidade de alunos qtd_ativos, qtd_abandono e qtd_formatura
for i in evasions:
self.analysis[i[0]] = self.count_evasion_form(self.__groupbys["groupby_filted"],i[1])
self.analysis[i[0]] = self.count_evasion_form(self.__groupbys["groupby_filted"], i[1])
# calcula a quantidade de alunos evadidos
self.analysis["alunos_evadidos"] = qtd_alunos_ingresso - self.analysis["qtd_ativos"]
self.analysis["outras_formas_evasao"] = self.analysis["alunos_evadidos"] - self.analysis["qtd_formatura"] - self.analysis["qtd_abandono"]
self.__counts = True
def taxa_evasao(self):
""" Calcula a taxa de evasão de cada turma ingresso """
# precisa das quantidades para calcular as taxas
if not self.__counts:
self.counts()
# calcula a taxa de evasão da turma ingresso
qtd_alunos = self.analysis["qtd_alunos_ingresso"]
qtd_evasao = self.analysis["alunos_evadidos"]
taxa_evasao = qtd_evasao / qtd_alunos
taxa_evasao[np.isnan(taxa_evasao)] = 0.0
taxa_evasao[np.isinf(taxa_evasao)] = 0.0
self.analysis["taxa_evasao"] = taxa_evasao
def formatura_medio(self):
"""
Calcula o tempo medio levado para cada turma ingresso se formar.
"""
df = self.__dataframes["df_filted"]
df = df.loc[df.FORMA_EVASAO == ef.EF_FORMATURA]
# muda semestre 1 para 0.0 e semestre 2 para 0.5
dict_convert = {"1":0.0, "2":0.5, "Anual":0.0}
df["SEMESTRE_INGRESSO"] = df["SEMESTRE_INGRESSO"].map(dict_convert)
df["SEMESTRE_EVASAO"] = df["SEMESTRE_EVASAO"].map(dict_convert)
# agrupa dados por turma ingresso
admission_g = df.groupby(["ANO_INGRESSO_y", "SEMESTRE_INGRESSO"])
# faz a media do tempo gasto por cada aluno de cada turma ingresso na
# graduação
media_formatura = admission_g.apply(lambda x:\
((x.ANO_EVASAO.astype(float)+x.SEMESTRE_EVASAO.astype(float))-\
(x.ANO_INGRESSO_y.astype(float)+x.SEMESTRE_INGRESSO.astype(float))).mean())
self.analysis["media_formatura"] = media_formatura.rename({0.0: '1', 0.5:'2'})
def taxa_reprovacao(self):
if(not self.__counts):
self.counts()
# calcula a taxa de evasão da turma ingresso
admission_g = self.__groupbys["groupby_original"]
taxa_reprovacao = admission_g.apply(lambda x:\
x[x.SITUACAO.isin(sit.SITUATION_FAIL)].shape[0] /\
x[x.SITUACAO.isin(sit.SITUATION_COURSED)].shape[0])
self.analysis["taxa_reprovacao"] = taxa_reprovacao
def ira_medio(self):
# filtra o dataframe pela situações que afetam o ira
dataframe = self.__dataframes["df_original"][self.__dataframes["df_original"]['SITUACAO'].isin(sit.SITUATION_AFFECT_IRA)]
submission_groupby = dataframe.groupby(["ANO_INGRESSO_y", "SEMESTRE_INGRESSO"])
""" Para cada turma ingresso, faz o agrupamento por aluno e calcula o
ira de cada aluno e depois é feito a media dos iras de todos os alunos
da turma ingresso.
Sao dois .apply, um dentro do outro, sendo que um intera sobre a turma
ingresso e ai faz o agrupamento de alunos e o outro intera sobre os
alunos da turma ingresso para calcular o ira. """
ira_medio = submission_groupby.apply(lambda x:\
x.groupby(["MATR_ALUNO"]).apply(lambda y:\
(y.MEDIA_FINAL * y.TOTAL_CARGA_HORARIA).sum() /\
(y.TOTAL_CARGA_HORARIA.sum()*100)).mean())
self.analysis["ira_medio"] = ira_medio
def admission_list(self):
self.analysis["admission_list"] = list(self.__groupbys["groupby_filted"].groups.keys())
def build_analysis(self):
self.counts()
self.admission_list()
self.ira_medio()
self.taxa_evasao()
self.taxa_reprovacao()
self.formatura_medio()
def build_cache(self):
admissions = []
formatura_medio = self.analysis["media_formatura"]
for i in self.analysis["admission_list"]:
admission_dict = {}
# This will create an directory when build_cache create the json
......@@ -63,14 +145,20 @@ class Admission(object):
admission_dict["formatura"] = int(self.analysis["qtd_formatura"][i])
admission_dict["alunos_evadidos"] = int(self.analysis["alunos_evadidos"][i])
admission_dict["outras_formas_evasao"] = int(self.analysis["outras_formas_evasao"][i])
admission_dict["formatura_media"] = float(formatura_medio[i]) if i in formatura_medio.index else -1
admission_dict["quantidade_alunos"] = int(self.analysis["qtd_alunos_ingresso"][i])
admission_dict["ira_medio"] = float(self.analysis["ira_medio"][i])
admission_dict["taxa_evasao"] = float(self.analysis["taxa_evasao"][i])
admission_dict["taxa_reprovacao"] = float(self.analysis["taxa_reprovacao"][i])
admissions.append(admission_dict)
return admissions
def build_cache_evasion_count(self):
admission_dict = {}
admission_dict["ano"] = {}
admission_dict["semestre"] = {}
admission_dict["quantidade_alunos"] = {}
admission_dict["abandono"] = {}
admission_dict["ativos"] = {}
admission_dict["formatura"] = {}
......@@ -79,6 +167,7 @@ class Admission(object):
for i in self.analysis["admission_list"]:
admission_dict["ano"][i] = i[0]
admission_dict["semestre"][i] = i[1]
admission_dict["quantidade_alunos"][i] = int(self.analysis["qtd_alunos_ingresso"][i])
admission_dict["abandono"][i] = int(self.analysis["qtd_abandono"][i])
admission_dict["ativos"][i] = int(self.analysis["qtd_ativos"][i])
admission_dict["formatura"][i] = int(self.analysis["qtd_formatura"][i])
......@@ -91,15 +180,15 @@ def admission_class_ira_per_semester(df):
"""
Calculate the average IRA in every semester of the admission classes.
This function group the dataframe by admission classes.
Then group each class by semesters.
And finally group each semester by student.
This function group the dataframe by admission classes.
Then group each class by semesters.
And finally group each semester by student.
Calculate each student's IRA and then the average IRA for the class.
Parameters
----------
df : DataFrame
Returns
-------
dict of {list:dict}
......@@ -112,7 +201,7 @@ def admission_class_ira_per_semester(df):
Examples
--------
{('2005', '1'): {(2012, '1o. Semestre'): 0.485,
{('2005', '1'): {(2012, '1o. Semestre'): 0.485,
(2007, '1o. Semestre'): 0.6186531973412296, ...} ...}
"""
......@@ -120,16 +209,16 @@ def admission_class_ira_per_semester(df):
df = df[ df['TOTAL_CARGA_HORARIA'] != 0]
admission_grouped = df.groupby(['ANO_INGRESSO_y','SEMESTRE_INGRESSO'])
dict_admission = {}
for admission in admission_grouped:
#admission_grouped is a tuple of tuples, each tuple contains 0-tuple year/semester & 1-dataframe
#admission_grouped is a tuple of tuples, each tuple contains 0-tuple year/semester & 1-dataframe
dict_ira_semester = {}
semester_grouped = admission[1].groupby(['ANO','PERIODO'])
for semester in semester_grouped:
student_grouped = semester[1].groupby('ID_ALUNO')
ira_class = []
# Compute all individual IRA from an class
for student in student_grouped:
#TODO: Verify if this can be calculated without groupby
......@@ -137,7 +226,7 @@ def admission_class_ira_per_semester(df):
(student[1].MEDIA_FINAL*student[1].TOTAL_CARGA_HORARIA).sum() )/(100*student[1].TOTAL_CARGA_HORARIA.sum()
)
ira_class.append(ira_individual)
# Compute the mean and standard variation from an class
# semester[0] represents a semester/year key
dict_ira_semester.update({
......@@ -159,15 +248,15 @@ def iras_alunos_turmas_ingressos(df):
"SEMESTRE_INGRESSO",
"MATR_ALUNO"]
).groups
# Cria um dicionario cujas chaves são GRR
# e valor são tuplas (ano_ingresso,semestre_ingresso)
ano_semestre_do_grr = {}
for ti in turmas_ingresso_grr:
ano_semestre_do_grr[ ti[2] ] = (ti[0],ti[1])
resultados = defaultdict(list)
for grr in iras:
semestre_ano = ano_semestre_do_grr[grr]
......@@ -184,7 +273,7 @@ def media_ira_turma_ingresso(df):
for r in iras_alunos_por_turma:
aux = np.array(iras_alunos_por_turma[r])
resultados[r] = np.mean(aux)
return resultados
def desvio_padrao_turma_ingresso(df):
......
......@@ -22,11 +22,11 @@ except NameError:
to_unicode = str
student_analysis = None
def build_cache(dataframe,path):
# os.chdir("../src")
def build_cache(dataframe, path):
# os.chdir("../src")
ensure_path_exists(path)
student_analysis = StudentAnalysis(dataframe)
......@@ -34,25 +34,27 @@ def build_cache(dataframe,path):
for cod, df in dataframe.groupby('COD_CURSO'):
path = path + '/'
generate_degree_data(path, df)
generate_student_data(path+'students/',df,student_analysis)
generate_admission_data(path+'admissions/',df)
generate_course_data(path+'courses/' ,dataframe)
generate_cepe_data(path+'/others/',df)
generate_student_data(path + 'students/', df, student_analysis)
generate_admission_data(path + 'admissions/', df)
generate_course_data(path + 'courses/', dataframe)
generate_cepe_data(path + '/others/', df)
def generate_cepe_data(path,df):
def generate_cepe_data(path, df):
cepe_dict = {}
cepe_dict["student_fails_course"] = student_fails_course(df)
cepe_dict["fails_semester"] = fails_semester(df)
cepe_dict["fails_by_freq"] = fails_by_freq(df)
save_json(path+"cepe9615.json", cepe_dict)
save_json(path + "cepe9615.json", cepe_dict)
def generate_degree_data(path, dataframe):
ensure_path_exists(path)
ensure_path_exists(path+'students')
ensure_path_exists(path + 'students')
students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
build_degree_json(path,dataframe)
build_degree_json(path, dataframe)
def historico(dataframe):
res = []
......@@ -65,7 +67,8 @@ def historico(dataframe):
def process_semestre(per, df):
ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
ira = df[df.SITUACAO.isin(
Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0]
tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0]
......@@ -74,12 +77,11 @@ def process_semestre(per, df):
'ira': ira,
'completas': completas,
'tentativas': tentativas,
'aprovacao': completas/tentativas if tentativas else 0,
'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0
'aprovacao': completas / tentativas if tentativas else 0,
'ira_por_quantidade_disciplinas': ira / tentativas if tentativas else 0
}
def generate_student_data(path, dataframe, student_analysis):
student_data = defaultdict(dict)
all_grrs = list(dataframe["MATR_ALUNO"].drop_duplicates())
......@@ -92,37 +94,37 @@ def generate_student_data(path, dataframe, student_analysis):
# that this analysis will have in json
(student_analysis.posicao_turmaIngresso_semestral(),
"posicao_turmaIngresso_semestral"),
"posicao_turmaIngresso_semestral"),
(student_analysis.periodo_real(),
"periodo_real"),
"periodo_real"),
(student_analysis.periodo_pretendido(),
"periodo_pretendido"),
"periodo_pretendido"),
(student_analysis.ira_semestral(),
"ira_semestral"),
"ira_semestral"),
(student_analysis.ira_por_quantidade_disciplinas(),
"ira_por_quantidade_disciplinas"),
"ira_por_quantidade_disciplinas"),
(student_analysis.indice_aprovacao_semestral(),
"indice_aprovacao_semestral"),
"indice_aprovacao_semestral"),
(student_analysis.aluno_turmas(),
"aluno_turmas"),
"aluno_turmas"),
(student_analysis.taxa_aprovacao(),
"taxa_aprovacao"),
"taxa_aprovacao"),
(student_analysis.student_info(),
"student"),
"student"),
]
for x in student_data:
for a in analysis: # Use this to verify
student_data[x][a[1]] = a[0][x] # null fields in analysis
save_json(path+x+".json", student_data[x])
save_json(path + x + ".json", student_data[x])
files_list = [
EvasionForm.EF_ABANDONO,
......@@ -139,16 +141,15 @@ def generate_student_data(path, dataframe, student_analysis):
if(fl in list_situations):
list_content = list_situations[fl]
save_json(path+"list/"+list_name+".json", list_content)
save_json(path + "list/" + list_name + ".json", list_content)
# Falta verificar se alguem nao recebeu algumas analises
#Falta verificar se alguem nao recebeu algumas analises
def generate_student_list(path):
pass
def generate_admission_data(path, df):
listagem = []
......@@ -156,9 +157,8 @@ def generate_admission_data(path, df):
a.build_analysis()
admissions = a.build_cache()
for i in admissions:
save_json(path+i["ano"]+"/"+i["semestre"]+".json", i)
save_json(path + i["ano"] + "/" + i["semestre"] + ".json", i)
evasion_count = a.build_cache_evasion_count()
analises = [
......@@ -167,6 +167,7 @@ def generate_admission_data(path, df):
("ira_per_semester", admission_class_ira_per_semester(df)),
("evasion_per_semester", evasion_per_semester(df)),
("students_per_semester", students_per_semester(df)),
("quantidade_alunos", evasion_count["quantidade_alunos"]),
("abandono", evasion_count["abandono"]),
("ativos", evasion_count["ativos"]),
("formatura", evasion_count["formatura"]),
......@@ -178,8 +179,8 @@ def generate_admission_data(path, df):
for a in analises:
for x in a[1]:
valor = a[1][x]
x = ( str(x[0]), str(x[1]))
turmas[x][ a[0] ] = valor
x = (str(x[0]), str(x[1]))
turmas[x][a[0]] = valor
listagem = []
......@@ -194,19 +195,18 @@ def generate_admission_data(path, df):
listagem.append(resumo_turma)
save_json(path + "lista_turma_ingresso.json", listagem)
save_json(path+"lista_turma_ingresso.json", listagem)
def generate_admission_list(path,df):
def generate_admission_list(path, df):
pass
def generate_course_data(path, df):
course = Course(df)
course.build_analysis()
courses = course.build_general_course()
save_json(path+"disciplinas.json", courses)
save_json(path + "disciplinas.json", courses)
course_list = course.build_course()
for i in course_list:
save_json(path+i["disciplina_codigo"]+".json", i)
save_json(path + i["disciplina_codigo"] + ".json", i)
......@@ -26,11 +26,12 @@ def analyze(submission, debug=True):
print("--- Tempo de CPU: {} ---".format(cpu_time))
print("--- Tempo total: {} ---".format(run_time))
except:
except Exception as e:
if(debug):
print("Error on submission analysis:",ValueError)
print("Error on submission analysis:",e)
submission.set_fail(round(time.clock() - start_time))
submission.set_fail(round(time.clock() - start_time), error_message=str(e))
......
......@@ -37,6 +37,7 @@ class Submission(models.Model):
historico = models.FileField(upload_to=get_path)
matricula = models.FileField(upload_to=get_path)
degree = models.ForeignKey(Degree)
timestamp = models.DateTimeField(default=timezone.now)
......@@ -49,6 +50,8 @@ class Submission(models.Model):
analysis_status = models.IntegerField(default=0, choices=ANALYSIS_STATUS_CHOICES)
last_error = models.CharField(default="", max_length=4096)
relative_year = models.IntegerField(null=True)
relative_semester = models.IntegerField(null=True)
......@@ -91,11 +94,12 @@ class Submission(models.Model):
self.analysis_status = 1
self.save()
def set_fail(self,time):
def set_fail(self,time, error_message):
self.processed = False
self.process_time = time
self.done_in = timezone.now()
self.analysis_status = 2
self.last_error = error_message
self.save()
# @receiver(models.signals.post_save, sender=Submission)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment