diff --git a/.gitignore b/.gitignore index 931591ae8131e4ee6331a35554df17dc8a262ac9..4d0b7424777370943475fb64712468bc27e1e157 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ src/cache .idea **/__pycache__ +*.ipynb diff --git a/script/analysis/degree_analysis.py b/script/analysis/degree_analysis.py index 9e74941a43e93ded4705a09e7ebee5594b1d17b7..5f01427780f96a35cf253bb80ec908f402058f6e 100644 --- a/script/analysis/degree_analysis.py +++ b/script/analysis/degree_analysis.py @@ -34,3 +34,32 @@ def general_ira(df): fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] fixed = fixed[fixed.MEDIA_FINAL <= 100] return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std()) + +def total_evasion_rate(df): + students = df['MATR_ALUNO'].drop_duplicates() + total_student = students.shape[0] + total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0] + + return total_evasion / total_student + +def average_graduation_time(df): + graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)] + total_graduate = graduates.shape[0] + average_time = 0 + year_end = int(df['ANO'].max()) + semester_end = graduates['PERIODO'].max() + for index, row in graduates.iterrows(): + if pd.notnull(row['ANO_EVASAO']): + year_end = int(row['ANO_EVASAO']) + try: + semester_end = int(row['SEMESTRE_EVASAO']) + except ValueError: + semester_end = graduates['PERIODO'].max() + year = int(row['ANO_INGRESSO']) + semester = int(row['SEMESTRE_INGRESSO']) + difference = 2 * (year_end - year) + (semester_end - semester) + 1 + average_time += difference + average_time /= total_graduate + average_time /= 2 + + return average_time \ No newline at end of file diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py index 8f364258ac9b54a64ebe542a82a49ca9bebcc284..341d958e3723c9c33245811e3e657249abc593c5 100644 --- a/script/base/dataframe_base.py +++ b/script/base/dataframe_base.py @@ -30,6 +30,7 @@ def load_dataframes(cwd='.'): dataframes.append(dh) dataframe = fix_dataframes(dataframes) + dh = DataframeHolder(dataframe) #~ dh.students.aggregate(teste) # print(dh.students['MEDIA_FINAL'].aggregate(teste)) @@ -94,9 +95,12 @@ def fix_admission(df): def fix_evasion(df): + evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] + df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 for evasion in EvasionForm.EVASION_FORM: #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] + #~ if(evasion[0] == 100): #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): #~ if(x != 0.0): diff --git a/script/build_cache.py b/script/build_cache.py index 4b9dae3b9e449408f12d32789332fe3708354f24..ef77277e5bb54c9d3398fa00046f4bbe104bc674 100644 --- a/script/build_cache.py +++ b/script/build_cache.py @@ -22,7 +22,8 @@ def build_cache(dataframe): path += "/curso" build_path(path) -# generate_degree_data(path, dataframe) + generate_student_data(path, dataframe) + generate_degree_data(path, dataframe) generate_student_data(path,dataframe) # generate_student_list(path) # generate_admission_data(path) @@ -34,6 +35,8 @@ def generate_degree_data(path, dataframe): average_graduation(dataframe) general_failure(dataframe) general_ira(dataframe) + total_evasion_rate(dataframe) + average_graduation_time(dataframe) pass def generate_student_data(path,dataframe): diff --git a/script/main.py b/script/main.py index 979bc78769847a0948ea1ca4accf6fbe6b43ea6a..8e3f306ac70d8eacb870f61e43898d09700f435e 100644 --- a/script/main.py +++ b/script/main.py @@ -3,13 +3,13 @@ import time from base.dataframe_base import load_dataframes from build_cache import build_cache from datetime import timedelta +from analysis.degree_analysis import * def main(): start_time = time.clock() start_time_exec = time.time() dataframe = load_dataframes(os.getcwd() + '/' + 'base') - build_cache(dataframe) cpu_time = timedelta(seconds=round(time.clock() - start_time))