Skip to content
Snippets Groups Projects
Commit 35dcee73 authored by Bruno Meyer's avatar Bruno Meyer
Browse files

Merge branch 'degree' into students

parents 2f3562a9 489adfee
No related branches found
No related tags found
No related merge requests found
......@@ -17,3 +17,4 @@ src/cache
.idea
**/__pycache__
*.ipynb
......@@ -34,3 +34,32 @@ def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def total_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
semester_end = graduates['PERIODO'].max()
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
semester_end = graduates['PERIODO'].max()
year = int(row['ANO_INGRESSO'])
semester = int(row['SEMESTRE_INGRESSO'])
difference = 2 * (year_end - year) + (semester_end - semester) + 1
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
\ No newline at end of file
......@@ -30,6 +30,7 @@ def load_dataframes(cwd='.'):
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
......@@ -94,9 +95,12 @@ def fix_admission(df):
def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0):
......
......@@ -22,7 +22,8 @@ def build_cache(dataframe):
path += "/curso"
build_path(path)
# generate_degree_data(path, dataframe)
generate_student_data(path, dataframe)
generate_degree_data(path, dataframe)
generate_student_data(path,dataframe)
# generate_student_list(path)
# generate_admission_data(path)
......@@ -34,6 +35,8 @@ def generate_degree_data(path, dataframe):
average_graduation(dataframe)
general_failure(dataframe)
general_ira(dataframe)
total_evasion_rate(dataframe)
average_graduation_time(dataframe)
pass
def generate_student_data(path,dataframe):
......
......@@ -3,13 +3,13 @@ import time
from base.dataframe_base import load_dataframes
from build_cache import build_cache
from datetime import timedelta
from analysis.degree_analysis import *
def main():
start_time = time.clock()
start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment