Commit b590f78d authored by Bruno Meyer's avatar Bruno Meyer 😢

Merge branch 'degree' into 'alunos'

Degree

See merge request pet/adega-reborn!1
parents d60a505f 489adfee
......@@ -34,3 +34,32 @@ def general_ira(df):
fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
fixed = fixed[fixed.MEDIA_FINAL <= 100]
return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std())
def total_evasion_rate(df):
students = df['MATR_ALUNO'].drop_duplicates()
total_student = students.shape[0]
total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0]
return total_evasion / total_student
def average_graduation_time(df):
graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)]
total_graduate = graduates.shape[0]
average_time = 0
year_end = int(df['ANO'].max())
semester_end = graduates['PERIODO'].max()
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
semester_end = graduates['PERIODO'].max()
year = int(row['ANO_INGRESSO'])
semester = int(row['SEMESTRE_INGRESSO'])
difference = 2 * (year_end - year) + (semester_end - semester) + 1
average_time += difference
average_time /= total_graduate
average_time /= 2
return average_time
\ No newline at end of file
import pandas as pd
import numpy as np
import math
from utils.situations import Situation, EvasionForm
from utils.situations import *
def average_ira(d):
temp = d.dropna(subset=['MEDIA_FINAL'])
temp = temp[temp['MEDIA_FINAL'] <= 100]
if not temp.empty:
#print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL'])
ch_total = np.sum(temp['CH_TOTAL']) * 100
print(aux/ch_total)
def ira_student(df):
#total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]
#~ return total_graduate / total_student
Students = {}
for i,line in enumerate(df.values):
grr = df['MATR_ALUNO'][i]
nota = df['MEDIA_FINAL'][i]
carga_total = df['CH_TOTAL'][i]
credito = df['MEDIA_CREDITO'][i]
situacao = df['SITUACAO'][i]
if(not grr in Students):
Students[grr] = {
"MEDIA_TOTAL": {"nota":0.0, "total":0, "carga_total": 0},
}
if(situacao in Situation.SITUATION_AFFECT_IRA and credito > 0):
Students[grr]["MEDIA_TOTAL"]["nota"]+=carga_total*nota
Students[grr]["MEDIA_TOTAL"]["total"]+=1
Students[grr]["MEDIA_TOTAL"]["carga_total"]+=carga_total
if(situacao in Situation.SITUATION_PASS and credito > 0):
Students[grr]["APROVADAS"]=Students[grr].get("APROVADAS",0)+1
if(situacao in Situation.SITUATION_FAIL and credito > 0):
Students[grr]["REPROVADAS"]=Students[grr].get("REPROVADAS",0)+1
for s in Students:
if((Students[s]["MEDIA_TOTAL"]["carga_total"]) > 0):
Students[s]["MEDIA_TOTAL"]["nota"]/=(Students[s]["MEDIA_TOTAL"]["carga_total"])
print(str(s)+" : "+str(Students[s]["MEDIA_TOTAL"]["nota"]))
print(str(Students[s].get("APROVADAS"))+" "+str(Students[s].get("REPROVADAS")))
print("")
#~ print(Students)
print(len(Students))
return 1
def aluno_turmas(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if(not (matr in students)):
students[matr] = []
for s in Situation.SITUATIONS:
if(s[0] == df["SITUACAO"][i]):
situacao = s[1]
break
ano = (df["ANO"][i])
codigo = (df["COD_ATIV_CURRIC"][i])
nome = (df["NOME_ATIV_CURRIC"][i])
nota = (df["MEDIA_FINAL"][i])
semestre = (df["PERIODO"][i])
students[matr].append({
"ano": ano,
"codigo": codigo,
"nome": nome,
"nota": nota,
"semestre": semestre,
"situacao": situacao
})
print(students)
#~ lines = (df[["MATR_ALUNO","ANO","COD_ATIV_CURRIC","NOME_ATIV_CURRIC","MEDIA_FINAL","PERIODO","SITUACAO"]])
#~ for st in (df.groupby("MATR_ALUNO")):
#~ print(st[1]["MATR_ALUNO"])
#~ print(st[1]["ANO"])
#~ print(st[1]["COD_ATIV_CURRIC"])
#~ print(st[1]["NOME_ATIV_CURRIC"])
#~ print(st[1]["MEDIA_FINAL"])
#~ print(st[1]["PERIODO"])
#~ print(st[1]["SITUACAO"])
#~ print("")
#~ total_student = df['MATR_ALUNO'].drop_duplicates()
#~ for st in total_student:
#~ students[st] = []
#~ hist = df[df["MATR_ALUNO"]==st]
#~ for matr in hist:
#~ print(hist["ANO"])
#~ print(hist[matr]["COD_ATIV_CURRIC"])
#~ print(hist[matr]["NOME_ATIV_CURRIC"])
#~ print(hist[matr]["MEDIA_FINAL"])
#~ print(hist[matr]["PERIODO"])
#~ print(hist[matr]["SITUACAO"])
#~ print(hist[matr])
#~ print("")
......@@ -2,12 +2,19 @@ import re
import os
import sys
import pandas as pd
import numpy as np
from glob import glob
from json import load as json_load
from utils.situations import *
class DataframeHolder:
def __init__(self, dataframe):
self.students = dataframe.groupby('MATR_ALUNO')
self.courses = dataframe.groupby('COD_ATIV_CURRIC')
self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])
def load_dataframes(cwd='.'):
dataframes = []
for path, dirs, files in os.walk(cwd):
......@@ -23,6 +30,10 @@ def load_dataframes(cwd='.'):
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
return dataframe
......@@ -84,5 +95,15 @@ def fix_admission(df):
def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM:
df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False), 'FORMA_EVASAO'] = evasion[0]
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0):
#~ print(x)
#~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
#~ print(df[['MATR_ALUNO','FORMA_EVASAO']])
......@@ -14,6 +14,7 @@ try:
except NameError:
to_unicode = str
def build_cache(dataframe):
# os.chdir("../src")
path = "cache"
......@@ -21,22 +22,25 @@ def build_cache(dataframe):
path += "/curso"
build_path(path)
generate_student_data(path, dataframe)
generate_degree_data(path, dataframe)
generate_student_data(path,dataframe)
generate_student_list(path)
generate_admission_data(path)
generate_admission_list(path)
generate_course_data(path)
generate_course_general_data(path)
# generate_student_list(path)
# generate_admission_data(path)
# generate_admission_list(path)
# generate_course_data(path)
# generate_course_general_data(path)
def generate_degree_data(path, dataframe):
average_graduation(dataframe)
general_failure(dataframe)
general_ira(dataframe)
total_evasion_rate(dataframe)
average_graduation_time(dataframe)
pass
def generate_student_data(path,dataframe):
ira_student(dataframe)
aluno_turmas(dataframe)
pass
def generate_student_list(path):
......
......@@ -3,13 +3,13 @@ import time
from base.dataframe_base import load_dataframes
from build_cache import build_cache
from datetime import timedelta
from analysis.degree_analysis import *
def main():
start_time = time.clock()
start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/' + 'base')
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment