diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py index 56d1bf01b269d2ddf7f6741f5ad5ed21fd5160b2..b1901c0c03d9f9e8aed1cd56e7419e99d47c3a0f 100644 --- a/script/base/dataframe_base.py +++ b/script/base/dataframe_base.py @@ -2,12 +2,28 @@ import re import os import sys import pandas as pd - +import numpy as np from glob import glob from json import load as json_load from utils.situations import * +def teste(d): + temp = d.dropna(subset=['MEDIA_FINAL']) + temp = temp[temp['MEDIA_FINAL'] <= 100] + if not temp.empty: + #print(temp[['MEDIA_FINAL', 'CH_TOTAL']]) + aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL']) + ch_total = np.sum(temp['CH_TOTAL']) * 100 + print(aux/ch_total) + +class DataframeHolder: + def __init__(self, dataframe): + self.students = dataframe.groupby('MATR_ALUNO') + self.courses = dataframe.groupby('COD_ATIV_CURRIC') + self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) + + def load_dataframes(cwd='.'): dataframes = [] for path, dirs, files in os.walk(cwd): @@ -23,6 +39,9 @@ def load_dataframes(cwd='.'): dataframes.append(dh) dataframe = fix_dataframes(dataframes) + dh = DataframeHolder(dataframe) + dh.students.aggregate(teste) +# print(dh.students['MEDIA_FINAL'].aggregate(teste)) return dataframe diff --git a/script/build_cache.py b/script/build_cache.py index 4ceae5a030c3d99e8092bc9e6e1d45adc6930613..50ef82d80ce0ca2c61afbdf75333eb334f9ff88e 100644 --- a/script/build_cache.py +++ b/script/build_cache.py @@ -13,6 +13,7 @@ try: except NameError: to_unicode = str + def build_cache(dataframe): # os.chdir("../src") path = "cache" @@ -20,13 +21,13 @@ def build_cache(dataframe): path += "/curso" build_path(path) - generate_degree_data(path, dataframe) - generate_student_data(path) - generate_student_list(path) - generate_admission_data(path) - generate_admission_list(path) - generate_course_data(path) - generate_course_general_data(path) +# generate_degree_data(path, dataframe) +# generate_student_data(path) +# generate_student_list(path) +# generate_admission_data(path) +# generate_admission_list(path) +# generate_course_data(path) +# generate_course_general_data(path) def generate_degree_data(path, dataframe): average_graduation(dataframe)