diff --git a/script/analysis/student_analysis.py b/script/analysis/student_analysis.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2df6659d366d07915f181ca3556fcbf4790d3c20 100644 --- a/script/analysis/student_analysis.py +++ b/script/analysis/student_analysis.py @@ -0,0 +1,18 @@ +import re +import pandas as pd +import numpy as np +from utils.situations import * + +def average_ira(df): + print(df) + new_df = df.dropna(subset=['MEDIA_FINAL']) + new_df = new_df[new_df['MEDIA_FINAL'] <= 100] + if not new_df.empty: + grade = np.sum(new_df['MEDIA_FINAL']*new_df['CH_TOTAL']) + total_ch = np.sum(new_df['CH_TOTAL']) * 100 + + return grade/total_ch + +def pass_rate(dt): + new_dt = dt[dt['SITUACAO'].isin(Situation.SITATUION_PASS)] + diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py index 56d1bf01b269d2ddf7f6741f5ad5ed21fd5160b2..11f57f24f74e9d3107d55f7a1de31cc777cc942c 100644 --- a/script/base/dataframe_base.py +++ b/script/base/dataframe_base.py @@ -2,12 +2,19 @@ import re import os import sys import pandas as pd - +import numpy as np from glob import glob from json import load as json_load from utils.situations import * +class DataframeHolder: + def __init__(self, dataframe): + self.students = dataframe.groupby('MATR_ALUNO') + self.courses = dataframe.groupby('COD_ATIV_CURRIC') + self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) + + def load_dataframes(cwd='.'): dataframes = [] for path, dirs, files in os.walk(cwd): @@ -23,7 +30,11 @@ def load_dataframes(cwd='.'): dataframes.append(dh) dataframe = fix_dataframes(dataframes) - return dataframe + + dh = DataframeHolder(dataframe) + + + return dh def read_excel(path, planilha='Planilha1'): diff --git a/script/build_cache.py b/script/build_cache.py index 4ceae5a030c3d99e8092bc9e6e1d45adc6930613..4fea55c6d4af52c148133bd74838067aebfe463c 100644 --- a/script/build_cache.py +++ b/script/build_cache.py @@ -6,13 +6,14 @@ import math from datetime import timedelta from pathlib import Path from utils.utils import build_path -from analysis.degree_analysis import * - +import analysis.degree_analysis as de_an +import analysis.student_analysis as st_an try: to_unicode = unicode except NameError: to_unicode = str + def build_cache(dataframe): # os.chdir("../src") path = "cache" @@ -20,21 +21,34 @@ def build_cache(dataframe): path += "/curso" build_path(path) - generate_degree_data(path, dataframe) - generate_student_data(path) - generate_student_list(path) - generate_admission_data(path) - generate_admission_list(path) - generate_course_data(path) - generate_course_general_data(path) + generate_student_data(path, dataframe) +# generate_degree_data(path, dataframe) +# generate_student_data(path) +# generate_student_list(path) +# generate_admission_data(path) +# generate_admission_list(path) +# generate_course_data(path) +# generate_course_general_data(path) def generate_degree_data(path, dataframe): - average_graduation(dataframe) - general_failure(dataframe) - general_ira(dataframe) + de_an.average_graduation(dataframe) + de_an.general_failure(dataframe) + de_an.general_ira(dataframe) pass -def generate_student_data(path): + +def teste(d): + temp = d.dropna(subset=['MEDIA_FINAL']) + temp = temp[temp['MEDIA_FINAL'] <= 100] + if not temp.empty: + #print(temp[['MEDIA_FINAL', 'CH_TOTAL']]) + aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL']) + ch_total = np.sum(temp['CH_TOTAL']) * 100 + print(aux/ch_total) + +def generate_student_data(path, dataframe): +# student_df = dataframe.groupby('MATR_ALUNO') + dataframe.students.aggregate(teste) pass def generate_student_list(path):