student_analysis.py 9.73 KB
Newer Older
1 2
import numpy as np

3 4 5
# TODO:
# FAZER CACHE DE TUDO
# AO CHAMAR A FUNCAO VERIFICAR SE TEM ALGO NA CACHE
bhm15's avatar
bhm15 committed
6

7 8
from submission.analysis.utils.situations import *
from submission.analysis.utils.utils import memoize
bhm15's avatar
bhm15 committed
9
import pandas as pd
10
from collections import defaultdict
11

12

13

14

15
class StudentAnalysis:
legton's avatar
legton committed
16 17
    data_frame = None

18
    def __init__(self, df, current_year, current_semester):
legton's avatar
legton committed
19
        self.data_frame = df
20 21 22
        self.current_year = current_year
        self.current_semester = current_semester

23
    
legton's avatar
legton committed
24 25 26 27 28 29 30 31 32 33 34 35 36
    def student_info(self, df=None):
        df = df if df is not None else self.data_frame
        students = df.groupby([
            "MATR_ALUNO",
            "NOME_PESSOA",
            "ANO_INGRESSO",
            "SEMESTRE_INGRESSO",
            "ANO_EVASAO",
            "SEMESTRE_EVASAO",
            "FORMA_EVASAO",
        ])

        students = students.groups.keys()
37
        iras = self.ira_alunos(df=df)
legton's avatar
legton committed
38
        info = {}
39

legton's avatar
legton committed
40 41
        for stnd in students:
            grr = stnd[0]
42
            
legton's avatar
legton committed
43 44 45 46 47 48 49 50 51 52 53 54
            info[grr] = {
                "grr": grr,
                "name": str(stnd[1]),
                "ano_ingresso": str(stnd[2]),
                "semestre_ingresso": str(stnd[3]),
                "ano_evasao": str(stnd[4]),
                "semestre_evasao": str(stnd[5]),
                "forma_evasao": EvasionForm.code_to_str(stnd[6]),
                "ira": iras[grr],
            }
        return info

55
    
legton's avatar
legton committed
56 57 58
    def list_students(self, df=None):
        df = df if df is not None else self.data_frame
        situations = df.groupby(["MATR_ALUNO", "NOME_PESSOA", "FORMA_EVASAO"])
59 60 61 62
        situations = list(pd.DataFrame(
            {'count': situations.size()}).reset_index().groupby(["FORMA_EVASAO"]))

        iras = self.ira_alunos(df=df)
legton's avatar
legton committed
63 64 65 66
        list_situations = defaultdict(list)
        for sit in situations:
            grrs = list(sit[1]["MATR_ALUNO"])
            people_names = list(sit[1]["NOME_PESSOA"])
67

legton's avatar
legton committed
68 69 70 71 72 73
            evasion_form_name = EvasionForm.code_to_str(sit[0])

            for i, student in enumerate(grrs):
                list_situations[sit[0]].append({
                    "forma_evasao": evasion_form_name,
                    "grr": grrs[i],
74
                    "ira": iras[grrs[i]],
legton's avatar
legton committed
75 76 77 78 79
                    "nome": people_names[i]
                })

        return list_situations

80
    
legton's avatar
legton committed
81
    def ira_alunos(self, df=None):
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
        """
        Calculates the average IRA per student
        IRA = Sum (grades X coursetime)/ (total course time X 100)

        Parameters
        ----------
        df

        Returns
        -------
        dict

        Example
        --------
        iras = { GRR: number, ...}
        """
legton's avatar
legton committed
98 99
        df = df if df is not None else self.data_frame

100 101
        iras = self.ira_por_quantidade_disciplinas(df=df)
        ira_per_student = {}
legton's avatar
legton committed
102 103 104 105
        for i in iras:
            ira_total = 0
            carga_total = 0
            for semestre in iras[i]:
106

legton's avatar
legton committed
107 108
                ira_total += iras[i][semestre][0]*iras[i][semestre][2]
                carga_total += iras[i][semestre][2]
109

legton's avatar
legton committed
110
            if(carga_total != 0):
111
                ira_per_student[i] = ira_total/carga_total
112
            else: # There is no register of courses for this student
113 114 115
                ira_per_student[i] = 0
        return ira_per_student

116
    
legton's avatar
legton committed
117 118
    def taxa_aprovacao(self, df=None):
        df = df if df is not None else self.data_frame
119 120 121

        aprovacoes_semestres = self.indice_aprovacao_semestral(df=df)

legton's avatar
legton committed
122
        for aluno in aprovacoes_semestres:
123 124 125 126
            total = sum([aprovacoes_semestres[aluno][s][1]
                         for s in aprovacoes_semestres[aluno]])
            aprovacoes = sum([aprovacoes_semestres[aluno][s][0]
                              for s in aprovacoes_semestres[aluno]])
legton's avatar
legton committed
127 128 129 130 131 132
            total = float(total)
            aprovacoes = float(aprovacoes)
            if(total != 0):
                aprovacoes_semestres[aluno] = aprovacoes/total
            else:
                aprovacoes_semestres[aluno] = None
133

legton's avatar
legton committed
134
        return aprovacoes_semestres
135 136 137 138 139

    def turma_ingresso(self, df=None):
        df = df if df is not None else self.data_frame
        df = df.drop_duplicates(subset="MATR_ALUNO", keep="first")
        admissions = {}
140 141 142
        for i, std in df.iterrows():
            admissions[std["MATR_ALUNO"]] = std["ANO_INGRESSO_y"] + \
                "/"+std["SEMESTRE_INGRESSO"]
143
        return admissions
144

145
    
legton's avatar
legton committed
146 147
    def posicao_turmaIngresso_semestral(self, df=None):
        df = df if df is not None else self.data_frame
148

149
        grr_to_admissions = self.turma_ingresso(df=df)
legton's avatar
legton committed
150

151 152 153 154 155 156 157
        admissions = defaultdict(list)

        # Create an dict of list where each key represent an admission class,
        # and its values represents the set of students
        # By instance: {"2015/1":["GRR20151346","GRR20154562", ...], ...}
        for grr in grr_to_admissions:
            admissions[grr_to_admissions[grr]].append(grr)
158 159 160


        iras_by_semester = self.ira_semestral(df=df)
161 162 163 164
        positions = defaultdict(dict)
        for grr in iras_by_semester:
            for semester in iras_by_semester[grr]:
                student_admission = admissions[grr_to_admissions[grr]]
165 166 167

                competition = [
                    matr for matr in student_admission if semester in iras_by_semester[matr]]
168 169 170

                classifications = sorted(
                    competition,
171
                    key=lambda matr: iras_by_semester[matr][semester]
172
                )
173 174
                positions[grr][semester] = (
                    1+classifications.index(grr))/len(competition)
175 176

        return positions
legton's avatar
legton committed
177

178
    
legton's avatar
legton committed
179 180
    def periodo_real(self, df=None):
        df = df if df is not None else self.data_frame
181

legton's avatar
legton committed
182 183
        aux = df.groupby(["MATR_ALUNO"])
        students = {}
184
        # TODO: Calculate the real value
legton's avatar
legton committed
185 186 187 188
        for x in aux:
            students[x[0]] = None
        return students

189
    
legton's avatar
legton committed
190 191
    def periodo_pretendido(self, df=None):
        df = df if df is not None else self.data_frame
192

legton's avatar
legton committed
193 194 195
        aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
        students = {}
        for x in aux:
196 197
            students[x[0][0]] = ((self.current_year - int(x[0][1])) * 2 +
                                  self.current_semester - int(x[0][2]) + 1)
legton's avatar
legton committed
198 199
        return students

200
    
legton's avatar
legton committed
201 202
    def ira_semestral(self, df=None):
        df = df if df is not None else self.data_frame
203 204

        aux = self.ira_por_quantidade_disciplinas(df=df)
legton's avatar
legton committed
205 206 207 208 209
        for matr in aux:
            for periodo in aux[matr]:
                aux[matr][periodo] = aux[matr][periodo][0]
        return aux

210
    
legton's avatar
legton committed
211
    def ira_por_quantidade_disciplinas(self, df=None):
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
        """
        Calculates the ira per year/semester

        Parameters
        ----------
        df : seila

        Returns
        -------
        dict of dict of array
        iras = { GRR: {year/semester: []},
        ...}

        Example
        --------

        """
legton's avatar
legton committed
229
        df = df if df is not None else self.data_frame
230

legton's avatar
legton committed
231 232 233 234 235 236 237 238 239 240 241 242 243 244
        students = {}
        df = df.dropna(subset=["MEDIA_FINAL"])

        total_students = len(df["MATR_ALUNO"])
        for i in range(total_students):
            matr = df["MATR_ALUNO"][i]
            if (not (matr in students)):
                students[matr] = {}

            ano = str(int(df["ANO"][i]))
            semestre = str(df["PERIODO"][i])
            situacao = int(df["SITUACAO"][i])
            nota = float(df["MEDIA_FINAL"][i])
            carga = float(df["CH_TOTAL"][i])
245

legton's avatar
legton committed
246 247 248
            if (situacao in Situation.SITUATION_AFFECT_IRA):
                if not (ano + "/" + semestre in students[matr]):
                    students[matr][ano + "/" + semestre] = [0, 0, 0]
249

legton's avatar
legton committed
250 251 252 253 254 255 256 257
                students[matr][ano + "/" + semestre][0] += nota*carga
                students[matr][ano + "/" + semestre][1] += 1
                students[matr][ano + "/" + semestre][2] += carga

        for matr in students:
            for periodo in students[matr]:
                if (students[matr][periodo][2] != 0):
                    students[matr][periodo][0] /= students[matr][periodo][2] * 100
258

legton's avatar
legton committed
259 260
        return students

261
    
legton's avatar
legton committed
262 263
    def indice_aprovacao_semestral(self, df=None):
        df = df if df is not None else self.data_frame
264

legton's avatar
legton committed
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
        students = {}
        df = df.dropna(subset=['MEDIA_FINAL'])
        total_students = len(df["MATR_ALUNO"])
        for i in range(total_students):
            matr = (df["MATR_ALUNO"][i])
            if (not (matr in students)):
                students[matr] = {}

            ano = str(int(df["ANO"][i]))
            semestre = str(df["PERIODO"][i])
            situacao = int(df["SITUACAO"][i])

            if not (ano + "/" + semestre in students[matr]):
                students[matr][ano + "/" + semestre] = [0, 0]

            if situacao in Situation.SITUATION_PASS:
                students[matr][ano + "/" + semestre][0] += 1
                students[matr][ano + "/" + semestre][1] += 1
            if situacao in Situation.SITUATION_FAIL:
                students[matr][ano + "/" + semestre][1] += 1

286
        return (students)
287

288
    
legton's avatar
legton committed
289
    def aluno_turmas(self, df=None):
290 291 292 293 294 295 296 297 298
        """

        Returns
        -------
        dict of

        Example
        --------
        """
legton's avatar
legton committed
299
        df = df if df is not None else self.data_frame
300

legton's avatar
legton committed
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
        students = {}
        df = df.dropna(subset=['MEDIA_FINAL'])

        situations = dict(Situation.SITUATIONS)

        for matr, hist in df.groupby('MATR_ALUNO'):
            students[matr] = []

            for _, row in hist.iterrows():
                data = {
                    'ano': str(int(row["ANO"])),
                    'codigo': row["COD_ATIV_CURRIC"],
                    'nome': row["NOME_ATIV_CURRIC"],
                    'nota': row["MEDIA_FINAL"],
                    'semestre': row["PERIODO"],
                    'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
                }

                students[matr].append(data)

        return students