Commit 35dcee73 authored by Bruno Meyer's avatar Bruno Meyer
Browse files

Merge branch 'degree' into students

parents 2f3562a9 489adfee
......@@ -17,3 +17,4 @@ src/cache
.idea
**/__pycache__
*.ipynb
......@@ -34,3 +34,32 @@ def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def total_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
semester_end = graduates['PERIODO'].max()
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
semester_end = graduates['PERIODO'].max()
year = int(row['ANO_INGRESSO'])
semester = int(row['SEMESTRE_INGRESSO'])
difference = 2 * (year_end - year) + (semester_end - semester) + 1
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
\ No newline at end of file
......@@ -30,6 +30,7 @@ def load_dataframes(cwd='.'):
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
......@@ -94,9 +95,12 @@ def fix_admission(df):
def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0):
......
......@@ -22,7 +22,8 @@ def build_cache(dataframe):
path += "/curso"
build_path(path)
# generate_degree_data(path, dataframe)
generate_student_data(path, dataframe)
generate_degree_data(path, dataframe)
generate_student_data(path,dataframe)
# generate_student_list(path)
# generate_admission_data(path)
......@@ -34,6 +35,8 @@ def generate_degree_data(path, dataframe):
average_graduation(dataframe)
general_failure(dataframe)
general_ira(dataframe)
total_evasion_rate(dataframe)
average_graduation_time(dataframe)
pass
def generate_student_data(path,dataframe):
......
......@@ -3,13 +3,13 @@ import time
from base.dataframe_base import load_dataframes
from build_cache import build_cache
from datetime import timedelta
from analysis.degree_analysis import *
def main():
start_time = time.clock()
start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment