Commit c71c62d7 authored by Bruno Meyer's avatar Bruno Meyer 😢

Merge branch 'jomaro' into 'students'

reestruturação de como as analises são rodadas e algumas analises de uma linha

See merge request pet/adega-reborn!2
parents cf807f44 fa8a8f87
import pandas as pd
import numpy as np
import math
from utils.situations import Situation, EvasionForm
......
import pandas as pd
from utils.situations import *
from utils.situations import *
ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
def listagem_evasao(df):
#~ print(df["FORMA_EVASAO"].drop_duplicates())
#~ print(df)
#~ print(Situation.SITUATION_AFFECT_IRA)
#~ print(df)
aux = df[df.FORMA_EVASAO != 1]
print(aux)
#~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"])
#~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)])
#~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA))
#~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last')
#~ print(aux["FORMA_EVASAO"].drop_duplicates())
# ~ print(df["FORMA_EVASAO"].drop_duplicates())
# ~ print(df)
# ~ print(Situation.SITUATION_AFFECT_IRA)
# ~ print(df)
aux = df[df.FORMA_EVASAO != 1]
print(aux)
# ~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"])
# ~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)])
# ~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA))
# ~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last')
# ~ print(aux["FORMA_EVASAO"].drop_duplicates())
def average_ira(d):
temp = d.dropna(subset=['MEDIA_FINAL'])
temp = temp[temp['MEDIA_FINAL'] <= 100]
if not temp.empty:
#print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL'])
# print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
aux = np.sum(temp['MEDIA_FINAL'] * temp['CH_TOTAL'])
ch_total = np.sum(temp['CH_TOTAL']) * 100
return(aux/ch_total)
return (aux / ch_total)
def posicao_turmaIngresso_semestral(df):
iras = ira_semestra(df)
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not(semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if(iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno]/=iraMax[semestreAno]
return iras
iras = ira_semestra(df)
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
def periodo_real(df):
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
def periodo_pretendido(df):
aux = df.groupby(["MATR_ALUNO","ANO_INGRESSO","SEMESTRE_INGRESSO"])
students = {}
for x in aux:
print(x[0][0] + " : "+x[0][1]+" "+x[0][2])
students[x[0][0]] = (ANO_ATUAL - int(x[0][1]))*2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
return students
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
print(x[0][0] + " : " + x[0][1] + " " + x[0][2])
students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
return students
def ira_semestra(df):
aux = ira_por_quantidade_disciplinas(df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
aux = ira_por_quantidade_disciplinas(df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
def ira_por_quantidade_disciplinas(df):
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
#~ print(df["MATR_ALUNO"][178])
#~ print(df["NOME_ATIV_CURRIC"][178])
#~ print(df["PERIODO"][178])
#~ print(df["ANO"][178])
#~ print(df["SITUACAO"][178])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if(not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
media_credito = int(df["MEDIA_CREDITO"][i])
if(situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if not(ano+"/"+semestre in students[matr]):
students[matr][ano+"/"+semestre] = [0,0]
students[matr][ano+"/"+semestre][0]+=nota
students[matr][ano+"/"+semestre][1]+=1
for matr in students:
for periodo in students[matr]:
if(students[matr][periodo][1] != 0):
students[matr][periodo][0]/=students[matr][periodo][1]*100
return(students)
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
# ~ print(df["MATR_ALUNO"][178])
# ~ print(df["NOME_ATIV_CURRIC"][178])
# ~ print(df["PERIODO"][178])
# ~ print(df["ANO"][178])
# ~ print(df["SITUACAO"][178])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
media_credito = int(df["MEDIA_CREDITO"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
students[matr][ano + "/" + semestre][0] += nota
students[matr][ano + "/" + semestre][1] += 1
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][1] != 0):
students[matr][periodo][0] /= students[matr][periodo][1] * 100
return (students)
def indice_aprovacao_semestral(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if(not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not(ano+"/"+semestre in students[matr]):
students[matr][ano+"/"+semestre] = [0,0]
if(situacao in Situation.SITUATION_PASS):
students[matr][ano+"/"+semestre][0]+=1
students[matr][ano+"/"+semestre][1]+=1
if(situacao in Situation.SITUATION_FAIL):
students[matr][ano+"/"+semestre][1]+=1
return(students)
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
def aluno_turmas(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if(not (matr in students)):
students[matr] = []
for s in Situation.SITUATIONS:
if(s[0] == df["SITUACAO"][i]):
situacao = s[1]
break
ano = str(int(df["ANO"][i]))
codigo = (df["COD_ATIV_CURRIC"][i])
nome = (df["NOME_ATIV_CURRIC"][i])
nota = (df["MEDIA_FINAL"][i])
semestre = (df["PERIODO"][i])
students[matr].append({
"ano": ano,
"codigo": codigo,
"nome": nome,
"nota": nota,
"semestre": semestre,
"situacao": situacao
})
return(students)
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
situations = dict(Situation.SITUATIONS)
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
students[matr].append(data)
return students
import sys
import os
import time
import math
from datetime import timedelta
from pathlib import Path
from utils.utils import build_path
from utils.utils import *
from utils.situations import *
from analysis.degree_analysis import *
from analysis.student_analysis import *
try:
to_unicode = unicode
except NameError:
......@@ -17,37 +15,95 @@ except NameError:
def build_cache(dataframe):
# os.chdir("../src")
path = "cache"
build_path(path)
path += "/curso"
build_path(path)
# generate_degree_data(path, dataframe)
generate_student_data(path,dataframe)
# generate_student_list(path)
# generate_admission_data(path)
# generate_admission_list(path)
# generate_course_data(path)
# generate_course_general_data(path)
path = 'cache/curso/'
ensure_path_exists(path)
for cod, df in dataframe.groupby('COD_CURSO'):
generate_degree_data(path+'/'+cod+'/', df)
#generate_degree_data(path, dataframe)
#generate_student_data(path, dataframe)
#generate_student_list(path)
#generate_admission_data(path)
#generate_admission_list(path)
#generate_course_data(path)
#generate_course_general_data(path)
def generate_degree_data(path, dataframe):
average_graduation(dataframe)
general_failure(dataframe)
general_ira(dataframe)
pass
ensure_path_exists(path)
ensure_path_exists(path+'students')
students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
data = {
'average_graduation': average_graduation(dataframe),
'general_failure': general_failure(dataframe),
'general_ira': general_ira(dataframe),
'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0],
'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0],
}
save_json(path+'/degree.json', data)
for ind, hist in dataframe.groupby('MATR_ALUNO'):
generate_student_data(path+'students/{}.json'.format(ind), hist)
def historico(dataframe):
res = []
for _, row in dataframe.iterrows():
res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC',
'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']]))
return res
def process_semestre(per, df):
ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0]
tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0]
return {
'semestre': per,
'ira': ira,
'completas': completas,
'tentativas': tentativas,
'aprovacao': completas/tentativas if tentativas else 0,
'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0
}
def generate_student_data(path, dataframe):
ensure_path_exists(os.path.dirname(path))
data = dict(dataframe.iloc[0][['MATR_ALUNO', 'NOME_ALUNO', 'SEXO', 'FORMA_INGRESSO', 'FORMA_EVASAO', 'ANO_INGRESSO',
'SEMESTRE_INGRESSO', 'ANO_EVASAO', 'SEMESTRE_EVASAO']])
data.update({
'ira': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean(),
'completas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0],
'tentativas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0],
'semestres': [process_semestre(per, dataframe[dataframe.PERIODO == per]) for per in sorted(dataframe.PERIODO.unique())],
'historico': historico(dataframe)
})
save_json(path, data)
def generate_student_data(path,dataframe):
#~ print(aluno_turmas(dataframe))
#~ print(indice_aprovacao_semestral(dataframe))
#~ print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"])
#~ print(ira_semestra(dataframe)["GRR20079775"])
#~ aluno_turmas(dataframe)
#~ indice_aprovacao_semestral(dataframe)
#~ ira_por_quantidade_disciplinas(dataframe)
#~ ira_semestra(dataframe)
#~ periodo_pretendido(dataframe)
#~ print(periodo_real(dataframe))
#~ print(posicao_turmaIngresso_semestral(dataframe))
def generate_student_data_old(path, dataframe):
print(aluno_turmas(dataframe))
print(indice_aprovacao_semestral(dataframe))
print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"])
print(ira_semestra(dataframe)["GRR20079775"])
aluno_turmas(dataframe)
indice_aprovacao_semestral(dataframe)
ira_por_quantidade_disciplinas(dataframe)
ira_semestra(dataframe)
periodo_pretendido(dataframe)
print(periodo_real(dataframe))
print(posicao_turmaIngresso_semestral(dataframe))
print(listagem_evasao(dataframe))
pass
......
......@@ -5,6 +5,8 @@ from build_cache import build_cache
from datetime import timedelta
from analysis.degree_analysis import *
def main():
start_time = time.clock()
start_time_exec = time.time()
......
......@@ -137,3 +137,22 @@ class Situation:
SIT_REPROVADO_FREQ,
SIT_CONHECIMENTO_REPROVADO
)
"""
isso deve ser pra filtrar fora coisas que não são disciplinas cumpridas
como "trancamento administrativo" e "horas"
importante pra saber quantas matérias um aluno REALMENTE fez em um semestre
"""
SITUATION_COURSED = (
SIT_APROVADO,
SIT_REPROVADO,
SIT_REPROVADO_FREQ,
SIT_DISPENSA_COM_NOTA,
SIT_CONHECIMENTO_APROVADO,
SIT_CONHECIMENTO_REPROVADO,
SIT_REPROVADO_SEM_NOTA,
SIT_INCOMPLETO,
SIT_CANCELADO,
)
import os
import json
try:
from django.conf import settings
DEBUG = settings.DEBUG
except:
DEBUG = True
def build_path(path):
if not os.path.exists(path):
os.mkdir(path)
def ensure_path_exists(complete_path):
parts = complete_path.split('/')
for i in range(len(parts)):
if not os.path.exists('/'.join(parts[:i+1])):
os.mkdir('/'.join(parts[:i+1]))
def save_json(path, data):
ensure_path_exists(os.path.dirname(path))
params = {} if not DEBUG else {'indent': 4}
with open(path, 'w') as f:
json.dump(data, f, **params)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment