Newer
Older
#~ TODO:
#~ FAZER CACHE DE TUDO
#~ AO CHAMAR A FUNCAO VERIFICAR SE TEM ALGO NA CACHE
from script.utils.situations import *
from script.utils.utils import memoize
from collections import defaultdict
CURRENT_YEAR = 2017
CURRENT_SEMESTER = 1
class StudentAnalysis:
data_frame = None
def __init__(self, df):
self.data_frame = df
@memoize
def list_students(self, df=None):
df = df if df is not None else self.data_frame
#~ ativos = df[["MATR_ALUNO", "NOME_PESSOA",]][df["FORMA_EVASAO"] == EvasionForm.EF_ATIVO].drop_duplicates()
situations = df.groupby(["MATR_ALUNO", "NOME_PESSOA", "FORMA_EVASAO"])
situations = list(pd.DataFrame({'count' : situations.size()}).reset_index().groupby(["FORMA_EVASAO"]))
#~ Cria lista de nome de listagens
iras = self.ira_alunos()
list_situations = defaultdict(list)
for sit in situations:
#Busca a lista de alunos relacionados a um codigo
grrs = list(sit[1]["MATR_ALUNO"])
people_names = list(sit[1]["NOME_PESSOA"])
evasion_form_name = EvasionForm.code_to_str(sit[0])
for i, student in enumerate(grrs):
list_situations[sit[0]].append({
"forma_evasao": evasion_form_name,
"grr": grrs[i],
"ira": iras[ grrs[i] ],
"nome": people_names[i]
})
return list_situations
@memoize
def ira_alunos(self, df=None):
df = df if df is not None else self.data_frame
iras = self.ira_por_quantidade_disciplinas()
for i in iras:
ira_total = 0
carga_total = 0
for semestre in iras[i]:
ira_total += iras[i][semestre][0]*iras[i][semestre][2]
carga_total += iras[i][semestre][2]
if(carga_total != 0):
iras[i] = ira_total/carga_total
else:
iras[i] = 0
return iras
@memoize
def taxa_aprovacao(self, df=None):
df = df if df is not None else self.data_frame
aprovacoes_semestres = self.indice_aprovacao_semestral()
for aluno in aprovacoes_semestres:
total = sum([aprovacoes_semestres[aluno][s][1] for s in aprovacoes_semestres[aluno]])
aprovacoes = sum([aprovacoes_semestres[aluno][s][0] for s in aprovacoes_semestres[aluno]])
total = float(total)
aprovacoes = float(aprovacoes)
if(total != 0):
aprovacoes_semestres[aluno] = aprovacoes/total
aprovacoes_semestres[aluno] = None
#~ for semestre in aprovacoes_semestres[aluno]:
#~ aprovacoes+=aprovacoes_semestres[aluno][semestre][0]
#~ total+=aprovacoes_semestres[semestre][1]
return aprovacoes_semestres
@memoize
def posicao_turmaIngresso_semestral(self, df=None):
df = df if df is not None else self.data_frame
iras = self.ira_semestral()
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
@memoize
def periodo_real(self, df=None):
df = df if df is not None else self.data_frame
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
@memoize
def periodo_pretendido(self, df=None):
df = df if df is not None else self.data_frame
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
students[x[0][0]] = (CURRENT_YEAR - int(x[0][1])) * 2 + CURRENT_SEMESTER - int(x[0][2]) + 1
return students
@memoize
def ira_semestral(self, df=None):
df = df if df is not None else self.data_frame
aux = self.ira_por_quantidade_disciplinas()
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
@memoize
def ira_por_quantidade_disciplinas(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = df["MATR_ALUNO"][i]
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
carga = float(df["CH_TOTAL"][i])
#media_credito = int(df["MEDIA_CREDITO"][i])
#if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if (situacao in Situation.SITUATION_AFFECT_IRA):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0, 0]
students[matr][ano + "/" + semestre][0] += nota*carga
students[matr][ano + "/" + semestre][1] += 1
students[matr][ano + "/" + semestre][2] += carga
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][2] != 0):
students[matr][periodo][0] /= students[matr][periodo][2] * 100
return students
@memoize
def indice_aprovacao_semestral(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
@memoize
def aluno_turmas(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
situations = dict(Situation.SITUATIONS)
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
students[matr].append(data)