Newer
Older
#~ TODO:
#~ FAZER CACHE DE TUDO
#~ AO CHAMAR A FUNCAO VERIFICAR SE TEM ALGO NA CACHE
from script.utils.situations import *
from script.utils.utils import memoize
from collections import defaultdict
CURRENT_YEAR = 2017
CURRENT_SEMESTER = 1
class StudentAnalysis:
data_frame = None
def __init__(self, df):
self.data_frame = df
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
@memoize
def student_info(self, df=None):
df = df if df is not None else self.data_frame
students = df.groupby([
"MATR_ALUNO",
"NOME_PESSOA",
"ANO_INGRESSO",
"SEMESTRE_INGRESSO",
"ANO_EVASAO",
"SEMESTRE_EVASAO",
"FORMA_EVASAO",
])
students = students.groups.keys()
iras = self.ira_alunos()
info = {}
for stnd in students:
grr = stnd[0]
if(stnd[0][-1] == 1):
print(stnd[0])
info[grr] = {
"grr": grr,
"name": str(stnd[1]),
"ano_ingresso": str(stnd[2]),
"semestre_ingresso": str(stnd[3]),
"ano_evasao": str(stnd[4]),
"semestre_evasao": str(stnd[5]),
"forma_evasao": EvasionForm.code_to_str(stnd[6]),
"ira": iras[grr],
}
return info
@memoize
def list_students(self, df=None):
df = df if df is not None else self.data_frame
situations = df.groupby(["MATR_ALUNO", "NOME_PESSOA", "FORMA_EVASAO"])
situations = list(pd.DataFrame({'count' : situations.size()}).reset_index().groupby(["FORMA_EVASAO"]))
iras = self.ira_alunos()
list_situations = defaultdict(list)
for sit in situations:
grrs = list(sit[1]["MATR_ALUNO"])
people_names = list(sit[1]["NOME_PESSOA"])
evasion_form_name = EvasionForm.code_to_str(sit[0])
for i, student in enumerate(grrs):
list_situations[sit[0]].append({
"forma_evasao": evasion_form_name,
"grr": grrs[i],
"ira": iras[ grrs[i] ],
"nome": people_names[i]
})
return list_situations
@memoize
def ira_alunos(self, df=None):
df = df if df is not None else self.data_frame
iras = self.ira_por_quantidade_disciplinas()
for i in iras:
ira_total = 0
carga_total = 0
for semestre in iras[i]:
ira_total += iras[i][semestre][0]*iras[i][semestre][2]
carga_total += iras[i][semestre][2]
if(carga_total != 0):
iras[i] = ira_total/carga_total
else:
iras[i] = 0
return iras
@memoize
def taxa_aprovacao(self, df=None):
df = df if df is not None else self.data_frame
aprovacoes_semestres = self.indice_aprovacao_semestral()
for aluno in aprovacoes_semestres:
total = sum([aprovacoes_semestres[aluno][s][1] for s in aprovacoes_semestres[aluno]])
aprovacoes = sum([aprovacoes_semestres[aluno][s][0] for s in aprovacoes_semestres[aluno]])
total = float(total)
aprovacoes = float(aprovacoes)
if(total != 0):
aprovacoes_semestres[aluno] = aprovacoes/total
aprovacoes_semestres[aluno] = None
return aprovacoes_semestres
@memoize
def posicao_turmaIngresso_semestral(self, df=None):
df = df if df is not None else self.data_frame
iras = self.ira_semestral()
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
@memoize
def periodo_real(self, df=None):
df = df if df is not None else self.data_frame
aux = df.groupby(["MATR_ALUNO"])
students = {}
bhm15
committed
#TODO: Calculate the real value
for x in aux:
students[x[0]] = None
return students
@memoize
def periodo_pretendido(self, df=None):
df = df if df is not None else self.data_frame
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
students[x[0][0]] = (CURRENT_YEAR - int(x[0][1])) * 2 + CURRENT_SEMESTER - int(x[0][2]) + 1
return students
@memoize
def ira_semestral(self, df=None):
df = df if df is not None else self.data_frame
aux = self.ira_por_quantidade_disciplinas()
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
@memoize
def ira_por_quantidade_disciplinas(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = df["MATR_ALUNO"][i]
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
carga = float(df["CH_TOTAL"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0, 0]
students[matr][ano + "/" + semestre][0] += nota*carga
students[matr][ano + "/" + semestre][1] += 1
students[matr][ano + "/" + semestre][2] += carga
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][2] != 0):
students[matr][periodo][0] /= students[matr][periodo][2] * 100
return students
@memoize
def indice_aprovacao_semestral(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
@memoize
def aluno_turmas(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
situations = dict(Situation.SITUATIONS)
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
students[matr].append(data)