Commit c2402558 authored by Odair M.'s avatar Odair M.
Browse files

Merge branch 'students' into 'back-to-origins'

Students

See merge request pet/adega-reborn!3
parents e1bbb12f 6dbe5bef
......@@ -17,3 +17,4 @@ src/cache
.idea
**/__pycache__
*.ipynb
import pandas as pd
import numpy as np
import math
from utils.situations import Situation, EvasionForm
......@@ -34,3 +33,32 @@ def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def total_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
semester_end = graduates['PERIODO'].max()
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
semester_end = graduates['PERIODO'].max()
year = int(row['ANO_INGRESSO'])
semester = int(row['SEMESTRE_INGRESSO'])
difference = 2 * (year_end - year) + (semester_end - semester) + 1
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
\ No newline at end of file
import pandas as pd
import numpy as np
from utils.situations import *
def average_ira(d):
temp = d.dropna(subset=['MEDIA_FINAL'])
temp = temp[temp['MEDIA_FINAL'] <= 100]
if not temp.empty:
#print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL'])
ch_total = np.sum(temp['CH_TOTAL']) * 100
print(aux/ch_total)
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
def aluno_turmas(df):
def listagem_alunos_ativos(df):
return list(df["MATR_ALUNO"][df["FORMA_EVASAO"] == EvasionForm.EF_ATIVO].drop_duplicates())
def posicao_turmaIngresso_semestral(df):
iras = ira_semestral(df)
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
def periodo_real(df):
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
def periodo_pretendido(df):
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
return students
def ira_semestral(df):
aux = ira_por_quantidade_disciplinas(df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
def ira_por_quantidade_disciplinas(df):
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
media_credito = int(df["MEDIA_CREDITO"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
students[matr][ano + "/" + semestre][0] += nota
students[matr][ano + "/" + semestre][1] += 1
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][1] != 0):
students[matr][periodo][0] /= students[matr][periodo][1] * 100
return (students)
def indice_aprovacao_semestral(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if(not (matr in students)):
students[matr] = []
for s in Situation.SITUATIONS:
if(s[0] == df["SITUACAO"][i]):
situacao = s[1]
break
ano = (df["ANO"][i])
codigo = (df["COD_ATIV_CURRIC"][i])
nome = (df["NOME_ATIV_CURRIC"][i])
nota = (df["MEDIA_FINAL"][i])
semestre = (df["PERIODO"][i])
students[matr].append({
"ano": ano,
"codigo": codigo,
"nome": nome,
"nota": nota,
"semestre": semestre,
"situacao": situacao
})
print(students)
#~ lines = (df[["MATR_ALUNO","ANO","COD_ATIV_CURRIC","NOME_ATIV_CURRIC","MEDIA_FINAL","PERIODO","SITUACAO"]])
#~ for st in (df.groupby("MATR_ALUNO")):
#~ print(st[1]["MATR_ALUNO"])
#~ print(st[1]["ANO"])
#~ print(st[1]["COD_ATIV_CURRIC"])
#~ print(st[1]["NOME_ATIV_CURRIC"])
#~ print(st[1]["MEDIA_FINAL"])
#~ print(st[1]["PERIODO"])
#~ print(st[1]["SITUACAO"])
#~ print("")
#~ total_student = df['MATR_ALUNO'].drop_duplicates()
#~ for st in total_student:
#~ students[st] = []
#~ hist = df[df["MATR_ALUNO"]==st]
#~ for matr in hist:
#~ print(hist["ANO"])
#~ print(hist[matr]["COD_ATIV_CURRIC"])
#~ print(hist[matr]["NOME_ATIV_CURRIC"])
#~ print(hist[matr]["MEDIA_FINAL"])
#~ print(hist[matr]["PERIODO"])
#~ print(hist[matr]["SITUACAO"])
#~ print(hist[matr])
#~ print("")
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
def aluno_turmas(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
situations = dict(Situation.SITUATIONS)
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
students[matr].append(data)
return students
......@@ -30,6 +30,7 @@ def load_dataframes(cwd='.'):
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
......@@ -55,7 +56,7 @@ def fix_dataframes(dataframes):
clean_register(register)
merged = pd.merge(history, register, how='right', on=['MATR_ALUNO'])
#~ print(merged)
fix_situation(merged)
# fix_admission(merged)
fix_evasion(merged)
......@@ -94,9 +95,12 @@ def fix_admission(df):
def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0):
......
import sys
import os
import time
import math
from datetime import timedelta
from pathlib import Path
from utils.utils import build_path
from utils.utils import *
from utils.situations import *
from analysis.degree_analysis import *
from analysis.student_analysis import *
try:
to_unicode = unicode
to_unicode = unicode
except NameError:
to_unicode = str
to_unicode = str
def build_cache(dataframe):
# os.chdir("../src")
path = "cache"
build_path(path)
path += "/curso"
build_path(path)
# generate_degree_data(path, dataframe)
generate_student_data(path,dataframe)
# generate_student_list(path)
# generate_admission_data(path)
# generate_admission_list(path)
# generate_course_data(path)
# generate_course_general_data(path)
# os.chdir("../src")
path = 'cache/curso'
ensure_path_exists(path)
for cod, df in dataframe.groupby('COD_CURSO'):
generate_degree_data(path+'/'+cod+'/', df)
generate_student_data(path+'/'+cod+'/students/',df)
#generate_degree_data(path, dataframe)
#generate_student_data(path, dataframe)
#generate_student_list(path)
#generate_admission_data(path)
#generate_admission_list(path)
#generate_course_data(path)
#generate_course_general_data(path)
def generate_degree_data(path, dataframe):
average_graduation(dataframe)
general_failure(dataframe)
general_ira(dataframe)
pass
ensure_path_exists(path)
ensure_path_exists(path+'students')
students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
data = {
'average_graduation': average_graduation(dataframe),
'general_failure': general_failure(dataframe),
'general_ira': general_ira(dataframe),
'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0],
'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0],
}
save_json(path+'/degree.json', data)
#~ for ind, hist in dataframe.groupby('MATR_ALUNO'):
#~ generate_student_data_old(path+'students/{}.json'.format(ind), dataframe)
def historico(dataframe):
res = []
for _, row in dataframe.iterrows():
res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC',
'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']]))
return res
def process_semestre(per, df):
ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0]
tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0]
return {
'semestre': per,
'ira': ira,
'completas': completas,
'tentativas': tentativas,
'aprovacao': completas/tentativas if tentativas else 0,
'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0
}
def generate_student_data(path,dataframe):
aluno_turmas(dataframe)
pass
def generate_student_data(path, dataframe):
student_data = dict()
all_grrs = list(dataframe["MATR_ALUNO"].drop_duplicates())
for x in all_grrs:
student_data[x] = dict()
analises = [
# tupla que contem no primeiro elemento a funcao que retorna um dicionario com {"GRR": valor}
# e na segunda posicao o nome que esta analise tera no json
(posicao_turmaIngresso_semestral(dataframe),
"posicao_turmaIngresso_semestral"),
(periodo_real(dataframe),
"periodo_real"),
(periodo_pretendido(dataframe),
"periodo_pretendido"),
(ira_semestral(dataframe),
"ira_semestral"),
(ira_por_quantidade_disciplinas(dataframe),
"ira_por_quantidade_disciplinas"),
(indice_aprovacao_semestral(dataframe),
"indice_aprovacao_semestral"),
(aluno_turmas(dataframe),
"aluno_turmas"),
]
for x in student_data:
for a in analises: # Usar para fazer a verificacao de
student_data[x][a[1]] = a[0][x] # analises nulas para um GRR
save_json(path+x, student_data[x][a[1]])
#Falta verificar se alguem nao recebeu algumas analises
def generate_student_list(path):
pass
pass
def generate_admission_data(path):
pass
pass
def generate_admission_list(path):
pass
pass
def generate_course_data(path):
pass
pass
def generate_course_general_data(path):
pass
pass
[
{
"nota": 45.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "CI055",
"ano": "2001",
"nome": "ALGORITMOS E ESTRUTURAS DE DADOS I"
},
{
"nota": 45.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "CM201",
"ano": "2001",
"nome": "CALCULO DIFERENCIAL E INTEGRAL I"
},
{
"nota": 95.0,
"semestre": "2",
"situacao": "Aprovado",
"codigo": "CM045",
"ano": "2001",
"nome": "Geometria Anal\u00edtica"
},
{
"nota": 50.0,
"semestre": "2",
"situacao": "Aprovado",
"codigo": "CM046",
"ano": "2001",
"nome": "INTRODUCAO A ALGEBRA"
},
{
"nota": 64.0,
"semestre": "2",
"situacao": "Aprovado",
"codigo": "CI063",
"ano": "2001",
"nome": "MAQUINAS PROGRAMAVEIS"
},
{
"nota": 66.0,
"semestre": "2",
"situacao": "Aprovado",
"codigo": "CI066",
"ano": "2001",
"nome": "OFICINA DE PROGRAMACAO"
},
{
"nota": 28.0,
"semestre": "1",
"situacao": "Reprovado por nota",
"codigo": "CM005",
"ano": "2002",
"nome": "\u00c1lgebra Linear"
},
{
"nota": 89.0,
"semestre": "1",
"situacao": "Aprovado",
"codigo": "CI055",
"ano": "2002",
"nome": "ALGORITMOS E ESTRUTURAS DE DADOS I"
},
{
"nota": 46.0,
"semestre": "1",
"situacao": "Reprovado por nota",
"codigo": "CM201",
"ano": "2002",
"nome": "CALCULO DIFERENCIAL E INTEGRAL I"
},
{
"nota": 50.0,
"semestre": "1",
"situacao": "Aprovado",
"codigo": "CI068",
"ano": "2002",
"nome": "CIRCUITOS LOGICOS"
},
{
"nota": 79.0,
"semestre": "1",
"situacao": "Aprovado",
"codigo": "CE003",
"ano": "2002",
"nome": "Estat\u00edstica II"
},
{
"nota": 11.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "CI056",
"ano": "2002",
"nome": "ALGORITMOS E ESTRUTURAS DE DADOS II"
},
{
"nota": 52.0,
"semestre": "2",
"situacao": "Aprovado",
"codigo": "CM201",
"ano": "2002",
"nome": "CALCULO DIFERENCIAL E INTEGRAL I"
},
{
"nota": 28.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "SA214",
"ano": "2002",
"nome": "INTRODUCAO A TEORIA GERAL DA ADMINISTRACAO"
},
{
"nota": 9.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "CI237",
"ano": "2002",
"nome": "MATEMATICA DISCRETA"
},
{
"nota": 0.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "CI210",
"ano": "2002",
"nome": "PROJETOS DIGITAIS E MICROPROCESSADORES"
},
{
"nota": 0.0,
"semestre": "2",
"situacao": "Reprovado por nota",
"codigo": "CI064",
"ano": "2002",
"nome": "SOFTWARE BASICO I"
},
{
"nota": 70.0,
"semestre": "1",
"situacao": "Aprovado",
"codigo": "CI069",
"ano": "2003",
"nome": "ADMINISTRACAO DE EMPRESAS DE INFORMATICA"
},
{
"nota": 20.0,
"semestre": "1",
"situacao": "Reprovado por nota",
"codigo": "CM005",
"ano": "2003",
"nome": "\u00c1lgebra Linear"
},
{
"nota": 29.0,
"semestre": "1",
"situacao": "Reprovado por nota",
"codigo": "CI056",
"ano": "2003",
"nome": "ALGORITMOS E ESTRUTURAS DE DADOS II"
},
{
"nota": 10.0,
"semestre": "1",
"situacao": "Reprovado por nota",
"codigo": "CM202",
"ano": "2003",
"nome": "CALCULO DIFERENCIAL E INTEGRAL II"
},
{
"nota": 0.0,
"semestre": "1",
"situacao": "Reprovado por nota",
"codigo": "CI237",
"ano": "2003",
"nome": "MATEMATICA DISCRETA"
},
{
"nota": 29.0,
"semestre": "1",
"situacao": "Reprovado por nota",