Skip to content
Snippets Groups Projects
Commit 35dcee73 authored by Bruno Meyer's avatar Bruno Meyer
Browse files

Merge branch 'degree' into students

parents 2f3562a9 489adfee
No related branches found
No related tags found
No related merge requests found
...@@ -17,3 +17,4 @@ src/cache ...@@ -17,3 +17,4 @@ src/cache
.idea .idea
**/__pycache__ **/__pycache__
*.ipynb
...@@ -34,3 +34,32 @@ def general_ira(df): ...@@ -34,3 +34,32 @@ def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100] fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std()) return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def total_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
semester_end = graduates['PERIODO'].max()
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
semester_end = graduates['PERIODO'].max()
year = int(row['ANO_INGRESSO'])
semester = int(row['SEMESTRE_INGRESSO'])
difference = 2 * (year_end - year) + (semester_end - semester) + 1
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
\ No newline at end of file
...@@ -30,6 +30,7 @@ def load_dataframes(cwd='.'): ...@@ -30,6 +30,7 @@ def load_dataframes(cwd='.'):
dataframes.append(dh) dataframes.append(dh)
dataframe = fix_dataframes(dataframes) dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe) dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste) #~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste)) # print(dh.students['MEDIA_FINAL'].aggregate(teste))
...@@ -94,9 +95,12 @@ def fix_admission(df): ...@@ -94,9 +95,12 @@ def fix_admission(df):
def fix_evasion(df): def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM: for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100): #~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0): #~ if(x != 0.0):
......
...@@ -22,7 +22,8 @@ def build_cache(dataframe): ...@@ -22,7 +22,8 @@ def build_cache(dataframe):
path += "/curso" path += "/curso"
build_path(path) build_path(path)
# generate_degree_data(path, dataframe) generate_student_data(path, dataframe)
generate_degree_data(path, dataframe)
generate_student_data(path,dataframe) generate_student_data(path,dataframe)
# generate_student_list(path) # generate_student_list(path)
# generate_admission_data(path) # generate_admission_data(path)
...@@ -34,6 +35,8 @@ def generate_degree_data(path, dataframe): ...@@ -34,6 +35,8 @@ def generate_degree_data(path, dataframe):
average_graduation(dataframe) average_graduation(dataframe)
general_failure(dataframe) general_failure(dataframe)
general_ira(dataframe) general_ira(dataframe)
total_evasion_rate(dataframe)
average_graduation_time(dataframe)
pass pass
def generate_student_data(path,dataframe): def generate_student_data(path,dataframe):
......
...@@ -3,13 +3,13 @@ import time ...@@ -3,13 +3,13 @@ import time
from base.dataframe_base import load_dataframes from base.dataframe_base import load_dataframes
from build_cache import build_cache from build_cache import build_cache
from datetime import timedelta from datetime import timedelta
from analysis.degree_analysis import *
def main(): def main():
start_time = time.clock() start_time = time.clock()
start_time_exec = time.time() start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/' + 'base') dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(dataframe) build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time)) cpu_time = timedelta(seconds=round(time.clock() - start_time))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment