Commit a4dd3e2b authored by Odair M.'s avatar Odair M.

Merge branch '175-revisar-analises' into 'development'

Update "Revisar análises"

See merge request !85
parents 18b40df9 b3650070
Pipeline #20160 passed with stage
in 3 minutes
......@@ -18,6 +18,7 @@ class AdegaChart{
this.xaxis_title = config.xaxis_title || "";
this.mode = config.mode;
this.text = config.text;
this.marker = config.marker;
......@@ -120,11 +121,12 @@ class AdegaChart{
}
}
if(this.mode && this.mode[i]){
data[i].mode = this.mode[i];
data[i].text = this.text[i];
}
if(this.marker != undefined){
data[i].marker = this.marker;
}
data[i].connectgaps = true;
}
var layout = {
......
......@@ -4,12 +4,18 @@ register = template.Library()
@register.filter
def to_percent(value):
return "{:.2f}".format(float(value)*100) + "%"
if type(value) == int or type(value) == float:
return "{:.2f}".format(float(value)*100) + "%"
return ""
@register.filter
def fix_2digit(value):
return "{:.2f}".format(float(value))
if type(value) == int or type(value) == float:
return "{:.2f}".format(float(value))
return ""
@register.filter
def remove_spaces(value):
return value.replace(' ', '')
\ No newline at end of file
if type(value) == str:
return value.replace(' ', '')
return value
\ No newline at end of file
......@@ -64,12 +64,13 @@
<div class="row">
<div class="col-12">
<div id="compara_aprov"></div>
<div id="approvation_rate"></div>
</div>
</div>
<div class="row">
<div class="col-12">
<div id="approvation_rate"></div>
<div id="compara_aprov"></div>
</div>
</div>
......@@ -154,6 +155,15 @@
var approvation_rate_y2 = approvation_rate_2[1].map(function(x){
return x[0];
});
var approvation_rate_text1 = approvation_rate_1[1].map(function(x){
// Quantity of student that register on course on each semester
return "<br>" + x[1] + " Matrículas";
});
var approvation_rate_text2 = approvation_rate_2[1].map(function(x){
return "<br>" + x[1] + " Matrículas";
});
var chart2 = new AdegaChart({
data_x: approvation_rate_x,
......@@ -162,6 +172,7 @@
title: "Taxa de aprovação por semestre",
fill: "none",
type: ["scatter", "scatter"],
text: [approvation_rate_text1, approvation_rate_text2],
mode: ["lines+markers", "lines+markers"],
legend: [codeCourse1,codeCourse2],
xaxis_title: "Período",
......@@ -169,7 +180,7 @@
marker: {
opacity:0.6,
size: 13
}
},
});
}
......
......@@ -74,7 +74,10 @@ def compare(request, submission_id):
chart_approvation_rate = {}
for course_name in courses_list:
course_detail = get_course_detail(request.session, degree, course_name, submission_id)
course_detail = get_course_detail(request.session,
degree, course_name,
submission_id)
chart_approvation_rate[course_name] = course_detail["aprovacao_semestral"]
charts["approvation_rate"] = chart_approvation_rate
......
......@@ -165,9 +165,9 @@
title: "Distribuição de alunos por faixas de IRA",
fill: "none",
legend: [
"Quantidade total de alunos",
"Quantidade total de alunos sem evasão",
"Quantidade total de alunos formados"
"Total de alunos",
"Alunos ativos",
"Alunos formados"
],
xaxis_title: "Faixa de IRA",
yaxis_title: "Quantidade de alunos",
......
......@@ -104,8 +104,9 @@
<span class="data">{{ analysis_result.indice_aprovacao|to_percent }}</span>
</td>
</tr>
<tr>
<!-- This analysis was not implemented yet -->
<!-- <tr>
<td>Período real</td>
<td>
{% if analysis_result.periodo_real %}
......@@ -116,7 +117,7 @@
<span class="data">Desconhecido</span>
{% endif %}
</td>
</tr>
</tr> -->
<tr>
<td>Período pretendido</td>
......
......@@ -8,8 +8,6 @@ from collections import defaultdict
import numpy as np
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
class Admission(object):
......@@ -110,10 +108,12 @@ class Admission(object):
Sao dois .apply, um dentro do outro, sendo que um intera sobre a turma
ingresso e ai faz o agrupamento de alunos e o outro intera sobre os
alunos da turma ingresso para calcular o ira. """
ira_medio = submission_groupby.apply(lambda x:\
x.groupby(["MATR_ALUNO"]).apply(lambda y:\
(y.MEDIA_FINAL * y.TOTAL_CARGA_HORARIA).sum() /\
(y.TOTAL_CARGA_HORARIA.sum()*100)).mean())
self.analysis["ira_medio"] = ira_medio
def admission_list(self):
......@@ -136,6 +136,13 @@ class Admission(object):
# This will create an directory when build_cache create the json
# By instance: The files and directories admission/2010/1.json will
# be created
# The ira_medio can be undefined for some admissions
# Then, we need to verify if it was computed
ira_medio = 0
if i in self.analysis["ira_medio"].keys():
ira_medio = self.analysis["ira_medio"][i]
admission_dict["ano"] = i[0]
admission_dict["semestre"] = i[1]
admission_dict["abandono"] = int(self.analysis["qtd_abandono"][i])
......@@ -145,7 +152,7 @@ class Admission(object):
admission_dict["outras_formas_evasao"] = int(self.analysis["outras_formas_evasao"][i])
admission_dict["formatura_media"] = float(formatura_medio[i]) if i in formatura_medio.index else -1
admission_dict["quantidade_alunos"] = int(self.analysis["qtd_alunos_ingresso"][i])
admission_dict["ira_medio"] = float(self.analysis["ira_medio"][i])
admission_dict["ira_medio"] = float(ira_medio)
admission_dict["taxa_evasao"] = float(self.analysis["taxa_evasao"][i])
admission_dict["taxa_reprovacao"] = float(self.analysis["taxa_reprovacao"][i])
admissions.append(admission_dict)
......
......@@ -50,7 +50,7 @@ class Course(Analysis):
"SITUACAO",
[sit.SIT_CONHECIMENTO_APROVADO],
list(sit.SITUATION_KNOWLDGE),
1
2
),
rate(
"taxa_reprovacao_frequencia",
......@@ -69,7 +69,8 @@ class Course(Analysis):
__semestral_rate = [__rates[1]]
last_rate = [__rates[0], __rates[4]]
def __init__(self, df):
def __init__(self, current_year, df):
self.current_year = current_year
df_filted = df[df['SITUACAO'].isin(sit.SITUATION_COURSED)]
dict_df = {
"normal_dataframe": df,
......@@ -138,7 +139,7 @@ class Course(Analysis):
last_rates = self.last_rate
def f(x, rate):
x1 = x.loc[x.ANO == x.ANO.max()]
x1 = x.loc[x.ANO == x.ANO.max() - 1]
x_num = x1[x1[rate.collumn_name].isin(rate.fields_x)].shape[0]
x_deno = x1[x1[rate.collumn_name].isin(rate.fields_X)].shape[0]
return x_num / x_deno if x_deno > 0 else 0
......@@ -199,9 +200,9 @@ class Course(Analysis):
serie_mean = group.apply(lambda x: x["MEDIA_FINAL"].mean())
serie_std = group.apply(lambda x: x["MEDIA_FINAL"].std())
last_year_mean = group.apply(
lambda x: x.loc[x.ANO == x.ANO.max()].MEDIA_FINAL.mean())
lambda x: x.loc[x.ANO == x.ANO.max() - 1].MEDIA_FINAL.mean())
last_year_std = group.apply(
lambda x: x.loc[x.ANO == x.ANO.max()].MEDIA_FINAL.std())
lambda x: x.loc[x.ANO == x.ANO.max() - 1].MEDIA_FINAL.std())
# caso tenha algum nan, troque por 0.0
serie_mean[np.isnan(serie_mean)] = 0.0
serie_std[np.isnan(serie_std)] = 0.0
......
import numpy as np
#~ TODO:
#~ FAZER CACHE DE TUDO
#~ AO CHAMAR A FUNCAO VERIFICAR SE TEM ALGO NA CACHE
# TODO:
# FAZER CACHE DE TUDO
# AO CHAMAR A FUNCAO VERIFICAR SE TEM ALGO NA CACHE
from submission.analysis.utils.situations import *
from submission.analysis.utils.utils import memoize
......@@ -11,6 +11,7 @@ from collections import defaultdict
class StudentAnalysis:
data_frame = None
......@@ -33,13 +34,12 @@ class StudentAnalysis:
])
students = students.groups.keys()
iras = self.ira_alunos()
iras = self.ira_alunos(df=df)
info = {}
for stnd in students:
grr = stnd[0]
if(stnd[0][-1] == 1):
print(stnd[0])
info[grr] = {
"grr": grr,
"name": str(stnd[1]),
......@@ -52,85 +52,101 @@ class StudentAnalysis:
}
return info
@memoize
def list_students(self, df=None):
df = df if df is not None else self.data_frame
situations = df.groupby(["MATR_ALUNO", "NOME_PESSOA", "FORMA_EVASAO"])
situations = list(pd.DataFrame({'count' : situations.size()}).reset_index().groupby(["FORMA_EVASAO"]))
iras = self.ira_alunos()
situations = list(pd.DataFrame(
{'count': situations.size()}).reset_index().groupby(["FORMA_EVASAO"]))
iras = self.ira_alunos(df=df)
list_situations = defaultdict(list)
for sit in situations:
grrs = list(sit[1]["MATR_ALUNO"])
people_names = list(sit[1]["NOME_PESSOA"])
evasion_form_name = EvasionForm.code_to_str(sit[0])
for i, student in enumerate(grrs):
list_situations[sit[0]].append({
"forma_evasao": evasion_form_name,
"grr": grrs[i],
"ira": iras[ grrs[i] ],
"ira": iras[grrs[i]],
"nome": people_names[i]
})
return list_situations
@memoize
def ira_alunos(self, df=None):
"""
Calculates the average IRA per student
IRA = Sum (grades X coursetime)/ (total course time X 100)
Parameters
----------
df
Returns
-------
dict
Example
--------
iras = { GRR: number, ...}
"""
df = df if df is not None else self.data_frame
iras = self.ira_por_quantidade_disciplinas()
iras = self.ira_por_quantidade_disciplinas(df=df)
ira_per_student = {}
for i in iras:
ira_total = 0
carga_total = 0
for semestre in iras[i]:
ira_total += iras[i][semestre][0]*iras[i][semestre][2]
carga_total += iras[i][semestre][2]
if(carga_total != 0):
iras[i] = ira_total/carga_total
ira_per_student[i] = ira_total/carga_total
else:
iras[i] = 0
return iras
ira_per_student[i] = 0
return ira_per_student
@memoize
def taxa_aprovacao(self, df=None):
df = df if df is not None else self.data_frame
aprovacoes_semestres = self.indice_aprovacao_semestral()
aprovacoes_semestres = self.indice_aprovacao_semestral(df=df)
for aluno in aprovacoes_semestres:
total = sum([aprovacoes_semestres[aluno][s][1] for s in aprovacoes_semestres[aluno]])
aprovacoes = sum([aprovacoes_semestres[aluno][s][0] for s in aprovacoes_semestres[aluno]])
total = sum([aprovacoes_semestres[aluno][s][1]
for s in aprovacoes_semestres[aluno]])
aprovacoes = sum([aprovacoes_semestres[aluno][s][0]
for s in aprovacoes_semestres[aluno]])
total = float(total)
aprovacoes = float(aprovacoes)
if(total != 0):
aprovacoes_semestres[aluno] = aprovacoes/total
else:
aprovacoes_semestres[aluno] = None
return aprovacoes_semestres
def turma_ingresso(self, df=None):
df = df if df is not None else self.data_frame
df = df.drop_duplicates(subset="MATR_ALUNO", keep="first")
admissions = {}
for i,std in df.iterrows():
admissions[std["MATR_ALUNO"]] = std["ANO_INGRESSO_y"]+"/"+std["SEMESTRE_INGRESSO"]
for i, std in df.iterrows():
admissions[std["MATR_ALUNO"]] = std["ANO_INGRESSO_y"] + \
"/"+std["SEMESTRE_INGRESSO"]
return admissions
@memoize
def posicao_turmaIngresso_semestral(self, df=None):
df = df if df is not None else self.data_frame
grr_to_admissions = self.turma_ingresso()
grr_to_admissions = self.turma_ingresso(df=df)
admissions = defaultdict(list)
......@@ -139,33 +155,33 @@ class StudentAnalysis:
# By instance: {"2015/1":["GRR20151346","GRR20154562", ...], ...}
for grr in grr_to_admissions:
admissions[grr_to_admissions[grr]].append(grr)
iras_by_semester = self.ira_semestral()
iras_by_semester = self.ira_semestral(df=df)
positions = defaultdict(dict)
for grr in iras_by_semester:
for semester in iras_by_semester[grr]:
student_admission = admissions[grr_to_admissions[grr]]
competition = [matr for matr in student_admission if semester in iras_by_semester[matr]]
competition = [
matr for matr in student_admission if semester in iras_by_semester[matr]]
classifications = sorted(
competition,
key = lambda matr: iras_by_semester[matr][semester]
key=lambda matr: iras_by_semester[matr][semester]
)
positions[grr][semester] = (1+classifications.index(grr))/len(competition)
positions[grr][semester] = (
1+classifications.index(grr))/len(competition)
return positions
@memoize
def periodo_real(self, df=None):
df = df if df is not None else self.data_frame
aux = df.groupby(["MATR_ALUNO"])
students = {}
#TODO: Calculate the real value
# TODO: Calculate the real value
for x in aux:
students[x[0]] = None
return students
......@@ -173,18 +189,19 @@ class StudentAnalysis:
@memoize
def periodo_pretendido(self, df=None):
df = df if df is not None else self.data_frame
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
students[x[0][0]] = (self.current_year - int(x[0][1])) * 2 + self.current_semester - int(x[0][2]) + 1
students[x[0][0]] = ((self.current_year - int(x[0][1])) * 2 +
self.current_semester - int(x[0][2]) + 1)
return students
@memoize
def ira_semestral(self, df=None):
df = df if df is not None else self.data_frame
aux = self.ira_por_quantidade_disciplinas()
aux = self.ira_por_quantidade_disciplinas(df=df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
......@@ -192,8 +209,25 @@ class StudentAnalysis:
@memoize
def ira_por_quantidade_disciplinas(self, df=None):
"""
Calculates the ira per year/semester
Parameters
----------
df : seila
Returns
-------
dict of dict of array
iras = { GRR: {year/semester: []},
...}
Example
--------
"""
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
......@@ -208,12 +242,11 @@ class StudentAnalysis:
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
carga = float(df["CH_TOTAL"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0, 0]
students[matr][ano + "/" + semestre][0] += nota*carga
students[matr][ano + "/" + semestre][1] += 1
students[matr][ano + "/" + semestre][2] += carga
......@@ -222,14 +255,13 @@ class StudentAnalysis:
for periodo in students[matr]:
if (students[matr][periodo][2] != 0):
students[matr][periodo][0] /= students[matr][periodo][2] * 100
return students
@memoize
def indice_aprovacao_semestral(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
......@@ -250,14 +282,22 @@ class StudentAnalysis:
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
return (students)
@memoize
def aluno_turmas(self, df=None):
"""
Returns
-------
dict of
Example
--------
"""
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
......
......@@ -28,6 +28,7 @@ def load_dataframes(cwd='.'):
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
return dataframe
......@@ -45,11 +46,17 @@ def fix_dataframes(dataframes):
if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv':
register = df['dataframe']
# Remove empty lines
history = history[history['MATR_ALUNO'].notnull()]
register = register[register['MATR_ALUNO'].notnull()]
#~ clean_history(history)
clean_register(register)
#~ df.dropna(axis=0, how='all')
history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
history = history[np.isfinite(history['MEDIA_FINAL'])]
history = history.fillna({"MEDIA_FINAL":0.0})
# history.loc[history['MEDIA_FINAL'].isnull(),"MEDIA_FINAL"] = 0.0
# history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
# history = history[np.isfinite(history['MEDIA_FINAL'])]
# inner = exste nos dois relatórios, é o que a gente quer
# o que fazer com quem não está em um dos dois é um questão em aberto
......@@ -59,6 +66,7 @@ def fix_dataframes(dataframes):
"SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO",
"FORMA_INGRESSO_x": "FORMA_INGRESSO"
})
fix_situation(merged)
fix_admission(merged)
......@@ -75,10 +83,8 @@ def fix_datatype(df):
for i in collums:
df[i].fillna(0, inplace=True)
df[i] = df[i].astype(int)
print(df[i].drop_duplicates())
def clean_history(df):
print(df.columns)
drop_columns = ['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
......@@ -138,7 +144,9 @@ def fix_admission(df):
def fix_carga(df):
#df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"]
# Some rows on dataframe doesnt respect the sum of theorical and pratice
# classes, and only specify the total hours
# df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"]
df["CH_TOTAL"] = df["TOTAL_CARGA_HORARIA"]
......
......@@ -35,10 +35,10 @@ def build_cache(dataframe, path, current_year = CURRENT_YEAR, current_semester =
for cod, df in dataframe.groupby('COD_CURSO'):
path = path + '/'
generate_degree_data(path, df)
generate_degree_data(path, df, student_analysis)
generate_student_data(path + 'students/', df, student_analysis)
generate_admission_data(path + 'admissions/', df, student_analysis)
generate_course_data(path + 'courses/', dataframe)
generate_course_data(path + 'courses/',current_year, dataframe)
generate_cepe_data(path + '/others/', df)
......@@ -51,12 +51,10 @@ def generate_cepe_data(path, df):
save_json(path + "cepe9615.json", cepe_dict)
def generate_degree_data(path, dataframe):
def generate_degree_data(path, dataframe, student_analysis):
ensure_path_exists(path)
ensure_path_exists(path + 'students')
students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
build_degree_json(path, dataframe)
build_degree_json(path, dataframe, student_analysis)
def historico(dataframe):
......@@ -206,8 +204,8 @@ def generate_admission_list(path, df):
pass
def generate_course_data(path, df):
course = Course(df)
def generate_course_data(path, current_year, df):
course = Course(current_year,df)
course.build_analysis()
courses = course.build_general_course()
save_json(path + "disciplinas.json", courses)
......
......@@ -3,6 +3,7 @@ from submission.analysis.base.dataframe_base import load_dataframes
from submission.analysis.build_cache import build_cache
from datetime import timedelta
import traceback
def analyze(submission, debug=True):
......@@ -24,10 +25,12 @@ def analyze(submission, debug=True):
except Exception as e:
if(debug):
print("Error on submission analysis:", e)
error = traceback.format_exc()
# if(debug):
# print("Error on submission analysis:", error)
print("Error on submission analysis:", error)
submission.set_fail(round(time.clock() - start_time), error_message=str(e))
submission.set_fail(round(time.clock() - start_time), error_message=str(error))
if __name__ == "__main__":
......
......@@ -71,7 +71,7 @@ class EvasionForm:
if(ef[0] == code):
return ef[1].replace("'","").replace("\"","")
return ""
@staticmethod
def str_to_code(name):
for ef in EvasionForm.EVASION_FORM:
......@@ -84,7 +84,7 @@ class EvasionForm:
# reprovacao,reprovacao_freq,dispensa_com_nota aparecem em algumas linha como
# 9999, o valor 9999 é o valor definido pelo sie para ser o 'null' na tabela
# .33, na tabela .18 o 'null' é o zero e não ocorre problema de calculo de
# nota/ira
# nota/ira
# orientaçao: verificar se media_final é maior que 100 se sim atribua 0 se nao
# atribua media_final
......@@ -95,7 +95,7 @@ class Situation:
SIT_APROVADO = 1
SIT_REPROVADO = 2
SIT_MATRICULA = 3
SIT_REPROVADO_FREQ = 4
SIT_EQUIVALENCIA = 5
SIT_CANCELADO = 6
......@@ -113,7 +113,7 @@ class Situation:
SIT_APROV_ADIANTAMENTO = 15
SIT_INCOMPLETO = 16
SIT_REPROVADO_ADIAN = 17
SIT_OUTROS = 100
......@@ -174,9 +174,9 @@ class Situation:
"""
isso deve ser pra filtrar fora coisas que não são disciplinas cumpridas
como "trancamento administrativo" e "horas"
importante pra saber quantas matérias um aluno REALMENTE fez em um semestre
"""
SITUATION_COURSED = (
......