Commit b3650070 authored by bhmeyer's avatar bhmeyer Committed by Odair M.

adega #175: começado o class pra fazer o dict de intervalos de iras; tentativa...

adega #175: começado o class pra fazer o dict de intervalos de iras; tentativa de arrumar o alunos/ira do degree
parent 18b40df9
......@@ -18,6 +18,7 @@ class AdegaChart{
this.xaxis_title = config.xaxis_title || "";
this.mode = config.mode;
this.text = config.text;
this.marker = config.marker;
......@@ -120,11 +121,12 @@ class AdegaChart{
}
}
if(this.mode && this.mode[i]){
data[i].mode = this.mode[i];
data[i].text = this.text[i];
}
if(this.marker != undefined){
data[i].marker = this.marker;
}
data[i].connectgaps = true;
}
var layout = {
......
......@@ -4,12 +4,18 @@ register = template.Library()
@register.filter
def to_percent(value):
return "{:.2f}".format(float(value)*100) + "%"
if type(value) == int or type(value) == float:
return "{:.2f}".format(float(value)*100) + "%"
return ""
@register.filter
def fix_2digit(value):
return "{:.2f}".format(float(value))
if type(value) == int or type(value) == float:
return "{:.2f}".format(float(value))
return ""
@register.filter
def remove_spaces(value):
return value.replace(' ', '')
\ No newline at end of file
if type(value) == str:
return value.replace(' ', '')
return value
\ No newline at end of file
......@@ -64,12 +64,13 @@
<div class="row">
<div class="col-12">
<div id="compara_aprov"></div>
<div id="approvation_rate"></div>
</div>
</div>
<div class="row">
<div class="col-12">
<div id="approvation_rate"></div>
<div id="compara_aprov"></div>
</div>
</div>
......@@ -154,6 +155,15 @@
var approvation_rate_y2 = approvation_rate_2[1].map(function(x){
return x[0];
});
var approvation_rate_text1 = approvation_rate_1[1].map(function(x){
// Quantity of student that register on course on each semester
return "<br>" + x[1] + " Matrículas";
});
var approvation_rate_text2 = approvation_rate_2[1].map(function(x){
return "<br>" + x[1] + " Matrículas";
});
var chart2 = new AdegaChart({
data_x: approvation_rate_x,
......@@ -162,6 +172,7 @@
title: "Taxa de aprovação por semestre",
fill: "none",
type: ["scatter", "scatter"],
text: [approvation_rate_text1, approvation_rate_text2],
mode: ["lines+markers", "lines+markers"],
legend: [codeCourse1,codeCourse2],
xaxis_title: "Período",
......@@ -169,7 +180,7 @@
marker: {
opacity:0.6,
size: 13
}
},
});
}
......
......@@ -74,7 +74,10 @@ def compare(request, submission_id):
chart_approvation_rate = {}
for course_name in courses_list:
course_detail = get_course_detail(request.session, degree, course_name, submission_id)
course_detail = get_course_detail(request.session,
degree, course_name,
submission_id)
chart_approvation_rate[course_name] = course_detail["aprovacao_semestral"]
charts["approvation_rate"] = chart_approvation_rate
......
......@@ -165,9 +165,9 @@
title: "Distribuição de alunos por faixas de IRA",
fill: "none",
legend: [
"Quantidade total de alunos",
"Quantidade total de alunos sem evasão",
"Quantidade total de alunos formados"
"Total de alunos",
"Alunos ativos",
"Alunos formados"
],
xaxis_title: "Faixa de IRA",
yaxis_title: "Quantidade de alunos",
......
......@@ -104,8 +104,9 @@
<span class="data">{{ analysis_result.indice_aprovacao|to_percent }}</span>
</td>
</tr>
<tr>
<!-- This analysis was not implemented yet -->
<!-- <tr>
<td>Período real</td>
<td>
{% if analysis_result.periodo_real %}
......@@ -116,7 +117,7 @@
<span class="data">Desconhecido</span>
{% endif %}
</td>
</tr>
</tr> -->
<tr>
<td>Período pretendido</td>
......
......@@ -8,8 +8,6 @@ from collections import defaultdict
import numpy as np
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
class Admission(object):
......@@ -110,10 +108,12 @@ class Admission(object):
Sao dois .apply, um dentro do outro, sendo que um intera sobre a turma
ingresso e ai faz o agrupamento de alunos e o outro intera sobre os
alunos da turma ingresso para calcular o ira. """
ira_medio = submission_groupby.apply(lambda x:\
x.groupby(["MATR_ALUNO"]).apply(lambda y:\
(y.MEDIA_FINAL * y.TOTAL_CARGA_HORARIA).sum() /\
(y.TOTAL_CARGA_HORARIA.sum()*100)).mean())
self.analysis["ira_medio"] = ira_medio
def admission_list(self):
......@@ -136,6 +136,13 @@ class Admission(object):
# This will create an directory when build_cache create the json
# By instance: The files and directories admission/2010/1.json will
# be created
# The ira_medio can be undefined for some admissions
# Then, we need to verify if it was computed
ira_medio = 0
if i in self.analysis["ira_medio"].keys():
ira_medio = self.analysis["ira_medio"][i]
admission_dict["ano"] = i[0]
admission_dict["semestre"] = i[1]
admission_dict["abandono"] = int(self.analysis["qtd_abandono"][i])
......@@ -145,7 +152,7 @@ class Admission(object):
admission_dict["outras_formas_evasao"] = int(self.analysis["outras_formas_evasao"][i])
admission_dict["formatura_media"] = float(formatura_medio[i]) if i in formatura_medio.index else -1
admission_dict["quantidade_alunos"] = int(self.analysis["qtd_alunos_ingresso"][i])
admission_dict["ira_medio"] = float(self.analysis["ira_medio"][i])
admission_dict["ira_medio"] = float(ira_medio)
admission_dict["taxa_evasao"] = float(self.analysis["taxa_evasao"][i])
admission_dict["taxa_reprovacao"] = float(self.analysis["taxa_reprovacao"][i])
admissions.append(admission_dict)
......
......@@ -50,7 +50,7 @@ class Course(Analysis):
"SITUACAO",
[sit.SIT_CONHECIMENTO_APROVADO],
list(sit.SITUATION_KNOWLDGE),
1
2
),
rate(
"taxa_reprovacao_frequencia",
......@@ -69,7 +69,8 @@ class Course(Analysis):
__semestral_rate = [__rates[1]]
last_rate = [__rates[0], __rates[4]]
def __init__(self, df):
def __init__(self, current_year, df):
self.current_year = current_year
df_filted = df[df['SITUACAO'].isin(sit.SITUATION_COURSED)]
dict_df = {
"normal_dataframe": df,
......@@ -138,7 +139,7 @@ class Course(Analysis):
last_rates = self.last_rate
def f(x, rate):
x1 = x.loc[x.ANO == x.ANO.max()]
x1 = x.loc[x.ANO == x.ANO.max() - 1]
x_num = x1[x1[rate.collumn_name].isin(rate.fields_x)].shape[0]
x_deno = x1[x1[rate.collumn_name].isin(rate.fields_X)].shape[0]
return x_num / x_deno if x_deno > 0 else 0
......@@ -199,9 +200,9 @@ class Course(Analysis):
serie_mean = group.apply(lambda x: x["MEDIA_FINAL"].mean())
serie_std = group.apply(lambda x: x["MEDIA_FINAL"].std())
last_year_mean = group.apply(
lambda x: x.loc[x.ANO == x.ANO.max()].MEDIA_FINAL.mean())
lambda x: x.loc[x.ANO == x.ANO.max() - 1].MEDIA_FINAL.mean())
last_year_std = group.apply(
lambda x: x.loc[x.ANO == x.ANO.max()].MEDIA_FINAL.std())
lambda x: x.loc[x.ANO == x.ANO.max() - 1].MEDIA_FINAL.std())
# caso tenha algum nan, troque por 0.0
serie_mean[np.isnan(serie_mean)] = 0.0
serie_std[np.isnan(serie_std)] = 0.0
......
import math
import json
import pandas as pd
import numpy as np
from submission.analysis.utils.situations import Situation, EvasionForm
from submission.analysis.utils.utils import IntervalCount, save_json
from submission.analysis.analysis.student_analysis import *
def average_graduation(df):
total_student = df['MATR_ALUNO'].drop_duplicates().shape[0]
total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]
def average_graduation(df):
"""
Calculates the ratio of students who have already graduated
to number of students on the original dataframe.
Returns
-------
float
Examples
--------
13.395865237366003
"""
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_graduate = students.loc[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]
return total_graduate / total_student
def general_failure(df):
"""
Returns
-------
Examples
--------
"""
affect_ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)]
# average = reprovados/
average = failures.shape[0] / affect_ira.shape[0]
student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\
......@@ -49,23 +74,23 @@ def current_students_failure(df):
standard_deviation = math.sqrt(variance)
return (average, standard_deviation)
def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def general_ira(student_analysis):
iras = np.array(list(student_analysis.ira_alunos().values()))
return (iras.mean(), iras.std())
def current_ira(df):
def current_ira(df, student_analysis):
ano_grade = int(df.loc[df['NUM_VERSAO'].idxmax()]['NUM_VERSAO'])
fixed = df.loc[(df['NUM_VERSAO'] == ano_grade)]
fixed = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
iras = np.array(list(student_analysis.ira_alunos(df = fixed).values()))
return (iras.mean(), iras.std())
def current_students_ira(df):
def current_students_ira(df, student_analysis):
fixed = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
fixed = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
iras = np.array(list(student_analysis.ira_alunos(df = fixed).values()))
return (iras.mean(), iras.std())
def general_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
......@@ -84,23 +109,35 @@ def current_evasion_rate(df):
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
"""
Returns
-------
float
Examples
--------
5.3741640468705345 (years?)
"""
students = df.drop_duplicates('MATR_ALUNO')
graduates = students.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
try:
evasion_dt = int(row["DT_EVASAO"].split("/")[1])
if(evasion_dt > 7):
semester_end = 2
else:
semester_end = 1
except (ValueError, AttributeError):
try:
evasion_dt = int(row["DT_EVASAO"].split("/")[1])
if(evasion_dt > 7):
semester_end = 2
else:
semester_end = 1
except ValueError:
semester_end = int(row['SEMESTRE_EVASAO'])
except (ValueError, AttributeError):
# TODO: Some students will be not considered
# The interface must inform the user this information
# and how many students wasnt considered
......@@ -112,7 +149,6 @@ def average_graduation_time(df):
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
def total_students(df):
......@@ -125,37 +161,35 @@ def taxa_abandono(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_abandono = students.loc[(df.FORMA_EVASAO == EvasionForm.EF_ABANDONO)].shape[0]
return total_abandono / total_student
def average_ira_graph(df):
alunos = df.drop_duplicates('MATR_ALUNO')
dic = build_dict_ira_medio(alunos)
#The following 3 functions are auxiliar to make the 3 dicts the function merge_dicts receives
def average_ira_graph(student_analysis):
dic = build_dict_ira_medio(student_analysis.ira_alunos())
return dic
def current_students_average_ira_graph(df):
alunos_se = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
alunos_se = alunos_se.drop_duplicates('MATR_ALUNO')
dic_se = build_dict_ira_medio(alunos_se)
def current_students_average_ira_graph(df, student_analysis):
alunos_se = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
dic_se = build_dict_ira_medio(student_analysis.ira_alunos(df = alunos_se))
return dic_se
def graduates_average_ira_graph(df):
def graduates_average_ira_graph(df, student_analysis):
alunos_for = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
alunos_for = alunos_for.drop_duplicates('MATR_ALUNO')
dic_for = build_dict_ira_medio(alunos_for)
dic_for = build_dict_ira_medio(student_analysis.ira_alunos(df = alunos_for))
return dic_for
def period_evasion_graph(df):
di_qtd = {}
dic = {}
evasions_total = 0
# Discover the minimum and maximum values for year
year_start = int(df['ANO'].min())
year_end = int(df['ANO'].max()) + 1
......@@ -166,7 +200,7 @@ def period_evasion_graph(df):
# Iterate between all semester/year possible
for year in range(year_start, year_end):
for semester in range(1, 3):
# Filter the rows and mantain only the registers
# that match with year and semester of this iteration
evasions = students.loc[
......@@ -179,18 +213,18 @@ def period_evasion_graph(df):
# and keeping the first row founded
# Than, get the number of rows computed
evasions = evasions.drop_duplicates(
subset="MATR_ALUNO",
subset= "MATR_ALUNO",
keep='first'
).shape[0]
# Name of string on dictionary generated
date = str(year) + ' {}º Período'.format(semester)
di_qtd[date] = evasions
# Count the total of evasions identified, that will be
# used to compute the rate
evasions_total += evasions
# If at least one evasion was computed
if evasions_total:
# Compute the ratio of evasion per
......@@ -198,55 +232,93 @@ def period_evasion_graph(df):
for di in di_qtd:
qtd = di_qtd[di]
dic[di] = {'qtd': qtd, 'taxa': (float(qtd)/evasions_total)*100}
return dic
def build_dict_ira_medio(alunos):
dic = {"00-4.9":0, "05-9.9":0, "10-14.9":0, "15-19.9":0, "20-24.9":0, "25-29.9":0, "30-34.9":0,
"35-39.9":0, "40-44.9":0, "45-49.9":0, "50-54.9":0, "55-59.9":0, "60-64.9":0, "65-69.9":0,
"70-74.9":0, "75-79.9":0, "80-84.9":0, "85-89.9":0, "90-94.9": 0,"95-100":0}
iras = []
for index, row in alunos.iterrows():
if(row['MEDIA_FINAL'] is not None):
iras.append(row['MEDIA_FINAL'])
for d in dic:
aux = d.split('-')
v1 = float(aux[0])
if (v1 == 0.0):
v1 += 0.01
v2 = float(aux[1])
dic[d] = sum((float(num) >= v1) and (float(num) < v2) for num in iras)
return dic
def build_degree_json(path,df):
def merge_dicts(dict1, dict2, dict3):
dict_out = {}
for key, value in dict1.items():
v2 = dict2[key] if key in dict2 else None
v3 = dict3[key] if key in dict3 else None
dict_out[key] = {
'ira_medio': value,
'sem_evasao': v2,
'formatura': v3
}
return dict_out
def build_dict_ira_medio(iras):
"""
Uses numpy.histogram to create the intervals of iras (dict's keys)
and counts how many iras on each interval (dict's values)
Parameters
-------
iras = {grr: ira,
grr: ira,
...}
Returns
-------
dict = {'0.50-0.55': 91,
'0.55-0.60': 98,
'0.05-0.10': 38,
'0.65-0.70': 90,
... }
"""
iras_values = list(iras.values())
# keys = ira intervals borders
# values = quantity of students in the interval
values, keys = np.histogram(iras_values, bins=20, range=(0,1))
dict = {}
for i, count in enumerate(values):
inf = keys[i]
sup = keys[i+1]
convert_key = "{:.2f}".format(inf) + "-" + "{:.2f}".format(sup)
dict[convert_key] = int(count)
return dict
def merge_dicts(dict1, dict2, dict3):
"""
Makes a single dict for the STUDENTS per IRA graph.
Takes 3 dictionaries whose keys are IRA intervals and merge them.
Each IRA interval got as value another dictionary with 3 itens:
number of all students with that IRA range;
number of active students with that IRA range;
number of graduated students with that IRA range;
Parameters
----------
3 x dicts = {'0.50-0.55': 91,
'0.55-0.60': 98,
...}
Returns
-------
{'05-9.9': {'sem_evasao': 9,
'formatura': 3,
'ira_medio': 43},
'10-14.9': {'sem_evasao': 12,
'formatura': 7,
'ira_medio': 37},
...}
"""
dict_out = {}
for key, value in dict1.items():
v2 = dict2[key] if key in dict2 else None
v3 = dict3[key] if key in dict3 else None
dict_out[key] = {
'ira_medio': value,
'sem_evasao': v2,
'formatura': v3
}
return dict_out
def build_degree_json(path,df,student_analysis):
dic = merge_dicts(
average_ira_graph(df),
current_students_average_ira_graph(df),
graduates_average_ira_graph(df)
average_ira_graph(student_analysis),
current_students_average_ira_graph(df, student_analysis),
graduates_average_ira_graph(df, student_analysis)
)
degree_json = {
"ira_medio_grafico": json.dumps(sorted(dic.items())),
"ira_medio_grafico": sorted(dic.items()),
"evasao_grafico": json.dumps(sorted(period_evasion_graph(df).items())),
"ira_atual": current_students_ira(df),
"ira_medio": general_ira(df),
"ira_atual": current_students_ira(df, student_analysis),
"ira_medio": general_ira(student_analysis),
"qtd_alunos": total_students(df),
"qtd_alunos_atuais": current_total_students(df),
"taxa_evasao": general_evasion_rate(df),
......@@ -255,5 +327,8 @@ def build_degree_json(path,df):
"taxa_reprovacao_atual": current_students_failure(df),
"tempo_formatura": average_graduation_time(df),
}
with open(path+"/degree.json",'w') as f:
f.write(json.dumps(degree_json,indent=4))
save_json(path+"/degree.json", degree_json)
# with open(path+"/degree.json",'w') as f:
# f.write(json.dumps(degree_json,indent=4))