From fa8a8f871776c2a417cbe9b942a754ef23ded460 Mon Sep 17 00:00:00 2001 From: Jomaro Rodrigues <jomaro.rodrigues@gmail.com> Date: Thu, 2 Nov 2017 23:49:35 +0100 Subject: [PATCH] =?UTF-8?q?reestrutura=C3=A7=C3=A3o=20de=20como=20as=20ana?= =?UTF-8?q?lises=20s=C3=A3o=20rodadas=20e=20algumas=20analises=20de=20uma?= =?UTF-8?q?=20linha?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 3 +- script/analysis/degree_analysis.py | 1 - script/analysis/student_analysis.py | 264 ++++++++++++++-------------- script/build_cache.py | 128 ++++++++++---- script/main.py | 2 + script/utils/situations.py | 19 ++ script/utils/utils.py | 30 ++++ 7 files changed, 274 insertions(+), 173 deletions(-) diff --git a/requirements.txt b/requirements.txt index 498d21b..f08277c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -django==1.8 +django==1.11 django-widget-tweaks pandas==0.18.1 django-extension psycopg2 +xlrd diff --git a/script/analysis/degree_analysis.py b/script/analysis/degree_analysis.py index 5f01427..4855ba4 100644 --- a/script/analysis/degree_analysis.py +++ b/script/analysis/degree_analysis.py @@ -1,5 +1,4 @@ import pandas as pd -import numpy as np import math from utils.situations import Situation, EvasionForm diff --git a/script/analysis/student_analysis.py b/script/analysis/student_analysis.py index ac23042..ba017dc 100644 --- a/script/analysis/student_analysis.py +++ b/script/analysis/student_analysis.py @@ -1,161 +1,155 @@ -import pandas as pd -from utils.situations import * +from utils.situations import * ANO_ATUAL = 2017 SEMESTRE_ATUAL = 2 + def listagem_evasao(df): - #~ print(df["FORMA_EVASAO"].drop_duplicates()) - #~ print(df) - #~ print(Situation.SITUATION_AFFECT_IRA) - #~ print(df) - aux = df[df.FORMA_EVASAO != 1] - print(aux) - #~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"]) - #~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]) - #~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA)) - #~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last') - #~ print(aux["FORMA_EVASAO"].drop_duplicates()) + # ~ print(df["FORMA_EVASAO"].drop_duplicates()) + # ~ print(df) + # ~ print(Situation.SITUATION_AFFECT_IRA) + # ~ print(df) + aux = df[df.FORMA_EVASAO != 1] + print(aux) + + +# ~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"]) +# ~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]) +# ~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA)) +# ~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last') +# ~ print(aux["FORMA_EVASAO"].drop_duplicates()) def average_ira(d): temp = d.dropna(subset=['MEDIA_FINAL']) temp = temp[temp['MEDIA_FINAL'] <= 100] if not temp.empty: - #print(temp[['MEDIA_FINAL', 'CH_TOTAL']]) - aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL']) + # print(temp[['MEDIA_FINAL', 'CH_TOTAL']]) + aux = np.sum(temp['MEDIA_FINAL'] * temp['CH_TOTAL']) ch_total = np.sum(temp['CH_TOTAL']) * 100 - return(aux/ch_total) + return (aux / ch_total) + def posicao_turmaIngresso_semestral(df): - iras = ira_semestra(df) - iraMax = {} - for matr in iras: - for semestreAno in iras[matr]: - if not(semestreAno in iraMax): - iraMax[semestreAno] = iras[matr][semestreAno] - else: - if(iras[matr][semestreAno] > iraMax[semestreAno]): - iraMax[semestreAno] = iras[matr][semestreAno] - for matr in iras: - for semestreAno in iras[matr]: - iras[matr][semestreAno]/=iraMax[semestreAno] - - return iras + iras = ira_semestra(df) + iraMax = {} + for matr in iras: + for semestreAno in iras[matr]: + if not (semestreAno in iraMax): + iraMax[semestreAno] = iras[matr][semestreAno] + else: + if (iras[matr][semestreAno] > iraMax[semestreAno]): + iraMax[semestreAno] = iras[matr][semestreAno] + for matr in iras: + for semestreAno in iras[matr]: + iras[matr][semestreAno] /= iraMax[semestreAno] + + return iras + def periodo_real(df): - aux = df.groupby(["MATR_ALUNO"]) - students = {} - for x in aux: - students[x[0]] = None - return students + aux = df.groupby(["MATR_ALUNO"]) + students = {} + for x in aux: + students[x[0]] = None + return students + def periodo_pretendido(df): - aux = df.groupby(["MATR_ALUNO","ANO_INGRESSO","SEMESTRE_INGRESSO"]) - students = {} - for x in aux: - print(x[0][0] + " : "+x[0][1]+" "+x[0][2]) - students[x[0][0]] = (ANO_ATUAL - int(x[0][1]))*2 + SEMESTRE_ATUAL - int(x[0][2]) + 1 - return students + aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"]) + students = {} + for x in aux: + print(x[0][0] + " : " + x[0][1] + " " + x[0][2]) + students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1 + return students + def ira_semestra(df): - aux = ira_por_quantidade_disciplinas(df) - for matr in aux: - for periodo in aux[matr]: - aux[matr][periodo] = aux[matr][periodo][0] - return aux + aux = ira_por_quantidade_disciplinas(df) + for matr in aux: + for periodo in aux[matr]: + aux[matr][periodo] = aux[matr][periodo][0] + return aux + def ira_por_quantidade_disciplinas(df): - students = {} - df = df.dropna(subset=["MEDIA_FINAL"]) - #~ print(df["MATR_ALUNO"][178]) - #~ print(df["NOME_ATIV_CURRIC"][178]) - #~ print(df["PERIODO"][178]) - #~ print(df["ANO"][178]) - #~ print(df["SITUACAO"][178]) - - total_students = len(df["MATR_ALUNO"]) - for i in range(total_students): - matr = (df["MATR_ALUNO"][i]) - if(not (matr in students)): - students[matr] = {} - - - - ano = str(int(df["ANO"][i])) - semestre = str(df["PERIODO"][i]) - situacao = int(df["SITUACAO"][i]) - nota = float(df["MEDIA_FINAL"][i]) - media_credito = int(df["MEDIA_CREDITO"][i]) - - - if(situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0): - - - - if not(ano+"/"+semestre in students[matr]): - students[matr][ano+"/"+semestre] = [0,0] - students[matr][ano+"/"+semestre][0]+=nota - students[matr][ano+"/"+semestre][1]+=1 - - - for matr in students: - for periodo in students[matr]: - if(students[matr][periodo][1] != 0): - students[matr][periodo][0]/=students[matr][periodo][1]*100 - return(students) + students = {} + df = df.dropna(subset=["MEDIA_FINAL"]) + # ~ print(df["MATR_ALUNO"][178]) + # ~ print(df["NOME_ATIV_CURRIC"][178]) + # ~ print(df["PERIODO"][178]) + # ~ print(df["ANO"][178]) + # ~ print(df["SITUACAO"][178]) + + total_students = len(df["MATR_ALUNO"]) + for i in range(total_students): + matr = (df["MATR_ALUNO"][i]) + if (not (matr in students)): + students[matr] = {} + + ano = str(int(df["ANO"][i])) + semestre = str(df["PERIODO"][i]) + situacao = int(df["SITUACAO"][i]) + nota = float(df["MEDIA_FINAL"][i]) + media_credito = int(df["MEDIA_CREDITO"][i]) + + if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0): + + if not (ano + "/" + semestre in students[matr]): + students[matr][ano + "/" + semestre] = [0, 0] + students[matr][ano + "/" + semestre][0] += nota + students[matr][ano + "/" + semestre][1] += 1 + + for matr in students: + for periodo in students[matr]: + if (students[matr][periodo][1] != 0): + students[matr][periodo][0] /= students[matr][periodo][1] * 100 + return (students) + def indice_aprovacao_semestral(df): - students = {} - df = df.dropna(subset=['MEDIA_FINAL']) - total_students = len(df["MATR_ALUNO"]) - for i in range(total_students): - matr = (df["MATR_ALUNO"][i]) - if(not (matr in students)): - students[matr] = {} - - - ano = str(int(df["ANO"][i])) - semestre = str(df["PERIODO"][i]) - situacao = int(df["SITUACAO"][i]) - - - if not(ano+"/"+semestre in students[matr]): - students[matr][ano+"/"+semestre] = [0,0] - - if(situacao in Situation.SITUATION_PASS): - students[matr][ano+"/"+semestre][0]+=1 - students[matr][ano+"/"+semestre][1]+=1 - if(situacao in Situation.SITUATION_FAIL): - students[matr][ano+"/"+semestre][1]+=1 - return(students) - + students = {} + df = df.dropna(subset=['MEDIA_FINAL']) + total_students = len(df["MATR_ALUNO"]) + for i in range(total_students): + matr = (df["MATR_ALUNO"][i]) + if (not (matr in students)): + students[matr] = {} + + ano = str(int(df["ANO"][i])) + semestre = str(df["PERIODO"][i]) + situacao = int(df["SITUACAO"][i]) + + if not (ano + "/" + semestre in students[matr]): + students[matr][ano + "/" + semestre] = [0, 0] + + if situacao in Situation.SITUATION_PASS: + students[matr][ano + "/" + semestre][0] += 1 + students[matr][ano + "/" + semestre][1] += 1 + if situacao in Situation.SITUATION_FAIL: + students[matr][ano + "/" + semestre][1] += 1 + return (students) + def aluno_turmas(df): - students = {} - df = df.dropna(subset=['MEDIA_FINAL']) - total_students = len(df["MATR_ALUNO"]) - for i in range(total_students): - matr = (df["MATR_ALUNO"][i]) - if(not (matr in students)): - students[matr] = [] - - for s in Situation.SITUATIONS: - if(s[0] == df["SITUACAO"][i]): - situacao = s[1] - break - ano = str(int(df["ANO"][i])) - codigo = (df["COD_ATIV_CURRIC"][i]) - nome = (df["NOME_ATIV_CURRIC"][i]) - nota = (df["MEDIA_FINAL"][i]) - semestre = (df["PERIODO"][i]) - - students[matr].append({ - "ano": ano, - "codigo": codigo, - "nome": nome, - "nota": nota, - "semestre": semestre, - "situacao": situacao - }) - return(students) + students = {} + df = df.dropna(subset=['MEDIA_FINAL']) + + situations = dict(Situation.SITUATIONS) + + for matr, hist in df.groupby('MATR_ALUNO'): + students[matr] = [] + + for _, row in hist.iterrows(): + data = { + 'ano': str(int(row["ANO"])), + 'codigo': row["COD_ATIV_CURRIC"], + 'nome': row["NOME_ATIV_CURRIC"], + 'nota': row["MEDIA_FINAL"], + 'semestre': row["PERIODO"], + 'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS) + } + + students[matr].append(data) + + return students diff --git a/script/build_cache.py b/script/build_cache.py index 553b94c..8091390 100644 --- a/script/build_cache.py +++ b/script/build_cache.py @@ -1,14 +1,12 @@ -import sys -import os -import time -import math - -from datetime import timedelta -from pathlib import Path -from utils.utils import build_path + + +from utils.utils import * +from utils.situations import * from analysis.degree_analysis import * from analysis.student_analysis import * + + try: to_unicode = unicode except NameError: @@ -17,37 +15,95 @@ except NameError: def build_cache(dataframe): # os.chdir("../src") - path = "cache" - build_path(path) - path += "/curso" - build_path(path) - -# generate_degree_data(path, dataframe) - generate_student_data(path,dataframe) -# generate_student_list(path) -# generate_admission_data(path) -# generate_admission_list(path) -# generate_course_data(path) -# generate_course_general_data(path) + path = 'cache/curso/' + + ensure_path_exists(path) + + for cod, df in dataframe.groupby('COD_CURSO'): + generate_degree_data(path+'/'+cod+'/', df) + + #generate_degree_data(path, dataframe) + #generate_student_data(path, dataframe) + #generate_student_list(path) + #generate_admission_data(path) + #generate_admission_list(path) + #generate_course_data(path) + #generate_course_general_data(path) def generate_degree_data(path, dataframe): - average_graduation(dataframe) - general_failure(dataframe) - general_ira(dataframe) - pass + ensure_path_exists(path) + ensure_path_exists(path+'students') + + students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates() + + data = { + 'average_graduation': average_graduation(dataframe), + 'general_failure': general_failure(dataframe), + 'general_ira': general_ira(dataframe), + 'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0], + 'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0], + } + + save_json(path+'/degree.json', data) + + for ind, hist in dataframe.groupby('MATR_ALUNO'): + generate_student_data(path+'students/{}.json'.format(ind), hist) + + + +def historico(dataframe): + res = [] + + for _, row in dataframe.iterrows(): + res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC', + 'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']])) + + return res + + +def process_semestre(per, df): + ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean() + completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0] + tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0] + + return { + 'semestre': per, + 'ira': ira, + 'completas': completas, + 'tentativas': tentativas, + 'aprovacao': completas/tentativas if tentativas else 0, + 'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0 + } + +def generate_student_data(path, dataframe): + ensure_path_exists(os.path.dirname(path)) + + data = dict(dataframe.iloc[0][['MATR_ALUNO', 'NOME_ALUNO', 'SEXO', 'FORMA_INGRESSO', 'FORMA_EVASAO', 'ANO_INGRESSO', + 'SEMESTRE_INGRESSO', 'ANO_EVASAO', 'SEMESTRE_EVASAO']]) + + data.update({ + 'ira': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean(), + 'completas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0], + 'tentativas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0], + 'semestres': [process_semestre(per, dataframe[dataframe.PERIODO == per]) for per in sorted(dataframe.PERIODO.unique())], + 'historico': historico(dataframe) + }) + + save_json(path, data) + -def generate_student_data(path,dataframe): - #~ print(aluno_turmas(dataframe)) - #~ print(indice_aprovacao_semestral(dataframe)) - #~ print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"]) - #~ print(ira_semestra(dataframe)["GRR20079775"]) - #~ aluno_turmas(dataframe) - #~ indice_aprovacao_semestral(dataframe) - #~ ira_por_quantidade_disciplinas(dataframe) - #~ ira_semestra(dataframe) - #~ periodo_pretendido(dataframe) - #~ print(periodo_real(dataframe)) - #~ print(posicao_turmaIngresso_semestral(dataframe)) +def generate_student_data_old(path, dataframe): + print(aluno_turmas(dataframe)) + print(indice_aprovacao_semestral(dataframe)) + print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"]) + print(ira_semestra(dataframe)["GRR20079775"]) + aluno_turmas(dataframe) + indice_aprovacao_semestral(dataframe) + ira_por_quantidade_disciplinas(dataframe) + ira_semestra(dataframe) + periodo_pretendido(dataframe) + print(periodo_real(dataframe)) + print(posicao_turmaIngresso_semestral(dataframe)) print(listagem_evasao(dataframe)) pass diff --git a/script/main.py b/script/main.py index 8e3f306..9cec7b0 100644 --- a/script/main.py +++ b/script/main.py @@ -5,6 +5,8 @@ from build_cache import build_cache from datetime import timedelta from analysis.degree_analysis import * + + def main(): start_time = time.clock() start_time_exec = time.time() diff --git a/script/utils/situations.py b/script/utils/situations.py index 80f2b17..e62c509 100644 --- a/script/utils/situations.py +++ b/script/utils/situations.py @@ -137,3 +137,22 @@ class Situation: SIT_REPROVADO_FREQ, SIT_CONHECIMENTO_REPROVADO ) + + """ + isso deve ser pra filtrar fora coisas que não são disciplinas cumpridas + + como "trancamento administrativo" e "horas" + + importante pra saber quantas matérias um aluno REALMENTE fez em um semestre + """ + SITUATION_COURSED = ( + SIT_APROVADO, + SIT_REPROVADO, + SIT_REPROVADO_FREQ, + SIT_DISPENSA_COM_NOTA, + SIT_CONHECIMENTO_APROVADO, + SIT_CONHECIMENTO_REPROVADO, + SIT_REPROVADO_SEM_NOTA, + SIT_INCOMPLETO, + SIT_CANCELADO, + ) diff --git a/script/utils/utils.py b/script/utils/utils.py index 871e935..f69d762 100644 --- a/script/utils/utils.py +++ b/script/utils/utils.py @@ -1,5 +1,35 @@ import os +import json + +try: + from django.conf import settings + + DEBUG = settings.DEBUG +except: + DEBUG = True + + def build_path(path): if not os.path.exists(path): os.mkdir(path) + + +def ensure_path_exists(complete_path): + parts = complete_path.split('/') + + for i in range(len(parts)): + if not os.path.exists('/'.join(parts[:i+1])): + os.mkdir('/'.join(parts[:i+1])) + + +def save_json(path, data): + + ensure_path_exists(os.path.dirname(path)) + + params = {} if not DEBUG else {'indent': 4} + + with open(path, 'w') as f: + json.dump(data, f, **params) + + -- GitLab