degree_analysis.py 4.59 KB
Newer Older
João Denis Rodrigues's avatar
João Denis Rodrigues committed
1
2
3
import pandas as pd
import numpy as np
import math
4
5
from utils.situations import Situation, EvasionForm

João Denis Rodrigues's avatar
João Denis Rodrigues committed
6
7

def average_graduation(df):
8
9
10
    total_student = df['MATR_ALUNO'].drop_duplicates().shape[0]
    total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]

João Denis Rodrigues's avatar
João Denis Rodrigues committed
11
12
    return total_graduate / total_student

13

João Denis Rodrigues's avatar
João Denis Rodrigues committed
14
def general_failure(df):
15
    affect_ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
João Denis Rodrigues's avatar
João Denis Rodrigues committed
16
    failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)]
17

João Denis Rodrigues's avatar
João Denis Rodrigues committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
    average = failures.shape[0] / affect_ira.shape[0]

    student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\
                                .aggregate({'SITUACAO': 'count'})
    student_failures = failures.groupby(['MATR_ALUNO'], as_index=False)\
                               .aggregate({'SITUACAO': 'count'})

    merged = pd.merge(student_courses, student_failures, on=['MATR_ALUNO'])
    merged.columns = ['MART_ALUNO', 'FEITAS', 'REPROVADO']
    variance = merged['REPROVADO'].div(merged['FEITAS']).sub(average)\
                                      .pow(2).sum() / merged.shape[0]
    standard_deviation = math.sqrt(variance)
    return (average, standard_deviation)

32

Legton's avatar
Legton committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def current_students_failure(df):
    fixed = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
    affect_ira = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
    failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)]

    average = failures.shape[0] / affect_ira.shape[0]

    student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\
                                .aggregate({'SITUACAO': 'count'})
    student_failures = failures.groupby(['MATR_ALUNO'], as_index=False)\
                               .aggregate({'SITUACAO': 'count'})

    merged = pd.merge(student_courses, student_failures, on=['MATR_ALUNO'])
    merged.columns = ['MART_ALUNO', 'FEITAS', 'REPROVADO']
    variance = merged['REPROVADO'].div(merged['FEITAS']).sub(average)\
                                      .pow(2).sum() / merged.shape[0]
    standard_deviation = math.sqrt(variance)
    return (average, standard_deviation)

João Denis Rodrigues's avatar
João Denis Rodrigues committed
52
def general_ira(df):
53
    fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
João Denis Rodrigues's avatar
João Denis Rodrigues committed
54
55
    fixed = fixed[fixed.MEDIA_FINAL <= 100]
    return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
Legton's avatar
Legton committed
56
57
    
def current_ira(df):
58
59
60
61
62
63
64
    ano_grade = int(df.loc[df['NUM_VERSAO'].idxmax()]['NUM_VERSAO'])
    fixed = df.loc[(df['NUM_VERSAO'] == ano_grade)]
    fixed = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
    fixed = fixed[fixed.MEDIA_FINAL <= 100]
    return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
    
def current_students_ira(df):
Legton's avatar
Legton committed
65
66
67
68
    fixed = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)]
    fixed = fixed[fixed.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
    fixed = fixed[fixed.MEDIA_FINAL <= 100]
    return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
Legton's avatar
Legton committed
69

Legton's avatar
Legton committed
70
def general_evasion_rate(df):
Legton's avatar
Legton committed
71
72
73
    students = df['MATR_ALUNO'].drop_duplicates()
    total_student = students.shape[0]
    total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
Legton's avatar
Legton committed
74
75
76
77
78
79
80
81
82

    return total_evasion / total_student

def current_evasion_rate(df):
    ano_grade = int(df.loc[df['NUM_VERSAO'].idxmax()]['NUM_VERSAO'])
    students = df.loc[(df['NUM_VERSAO'] == ano_grade)]
    students = students['MATR_ALUNO'].drop_duplicates()
    total_student = students.shape[0]
    total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
Legton's avatar
Legton committed
83
84

    return total_evasion / total_student
Legton's avatar
Legton committed
85
86

def average_graduation_time(df):
Legton's avatar
Legton committed
87
    graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
Legton's avatar
Legton committed
88
89
    total_graduate = graduates.shape[0]
    average_time = 0
Legton's avatar
Legton committed
90
91
    year_end = int(df['ANO'].max())
    semester_end = graduates['PERIODO'].max()
Legton's avatar
Legton committed
92
    for index, row in graduates.iterrows():
Legton's avatar
Legton committed
93
94
        if pd.notnull(row['ANO_EVASAO']):
            year_end = int(row['ANO_EVASAO'])
Legton's avatar
Legton committed
95
            try: 
Legton's avatar
Legton committed
96
                semester_end = int(row['SEMESTRE_EVASAO'])
Legton's avatar
Legton committed
97
            except ValueError:
Legton's avatar
Legton committed
98
                semester_end = graduates['PERIODO'].max()
Legton's avatar
Legton committed
99
100
        year = int(row['ANO_INGRESSO'])
        semester = int(row['SEMESTRE_INGRESSO'])
Legton's avatar
Legton committed
101
102
103
104
        difference = 2 * (year_end - year) + (semester_end - semester) + 1
        average_time += difference
    average_time /= total_graduate
    average_time /= 2
Legton's avatar
Legton committed
105

Legton's avatar
Legton committed
106
107
108
109
    return average_time

def total_students(df):
    return df.loc[(df.FORMA_EVASAO == EvasionForm.EF_ATIVO)].drop_duplicates('MATR_ALUNO').shape[0]