diff --git a/.gitignore b/.gitignore index 931591ae8131e4ee6331a35554df17dc8a262ac9..4d0b7424777370943475fb64712468bc27e1e157 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ src/cache .idea **/__pycache__ +*.ipynb diff --git a/makefile b/makefile index 419672601d84dea0a842bd137e54c1279e2c1dbd..a861bf7c382a44f5af40dec0b6975fb8eaa79ed3 100644 --- a/makefile +++ b/makefile @@ -29,8 +29,11 @@ clean-docs: @rm -rf docs install: + apt-get update apt-get install -y python3-dev apt-get install -y python3-pip + apt-get install -y libpq-dev + apt-get install -y postgresql postgresql-contrib pip3 install -U pip setuptools pip3 install -r requirements.txt diff --git a/new_load/data_loarder.py b/new_load/data_loarder.py new file mode 100644 index 0000000000000000000000000000000000000000..e106362bc0f3059629e8518f46e26e4b368f4f4d --- /dev/null +++ b/new_load/data_loarder.py @@ -0,0 +1,48 @@ +import re +import os +import sys +import pandas as pd +import pathlib +from pathlib import Path +from glob import glob +from json import load as json_load + +#import django + +#sys.path.append(os.getcwd()) + +#os.environ["DJANGON_SETTINGS_MODULE"] = "adega.settings" +#django.setup() + + +#from degree.models import * +#from student.models import * +#from course.models import * +#from admission.models import * +#from klass.models import * + +def start(): + directory = os.fsencode('relatorios') +# path = Path(str(directory)) +# print(path) + for path, subdirs, files in os.walk(directory): + # print(subdirs) + # print(pathlib.PurePath(str(p), 'teste')) +# print(pathlib.PurePath( + for f in files: + print(f) + file_path = pathlib.PurePath(str(path), str(f)) + print(pathlib.PurePath(str(path), str(f))) + file_open(file_path) + # print(pathlib.PurePath(path, str(f))) + +def file_open(path): + if str(path).find('csv'): + print('csv') + return pd.read_csv(str(path)) + else: + print('excel') + return pd.read_excel(str(path)) + +if __name__ == '__main__': + start() diff --git a/requirements.txt b/requirements.txt index 5910c0974715a36d04fab7bf649d878564c39280..f08277cbe4e5c839bfbccc8260f12fc0c2ef8e59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ -django==1.8 +django==1.11 django-widget-tweaks pandas==0.18.1 -django-extensions +django-extension +psycopg2 +xlrd diff --git a/script/.gitignore b/script/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a10233a9aae5284b17543b1e1988395b0f181295 --- /dev/null +++ b/script/.gitignore @@ -0,0 +1,2 @@ +cache/* +*.json diff --git a/script/analysis/admission_analysis.py b/script/analysis/admission_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..526c4cdc4278bf036cc1504301c1d1c1024df49a --- /dev/null +++ b/script/analysis/admission_analysis.py @@ -0,0 +1,138 @@ +import numpy as np + +from utils.situations import * +ANO_ATUAL = 2017 +SEMESTRE_ATUAL = 2 + + +def listagem_turma_ingresso(df): + #~ print(df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups) + grupos = df.groupby(["ANO_INGRESSO", "SEMESTRE_INGRESSO"]).groups + for t in grupos: + print(t) + print("\n\n") + print(df["FORMA_INGRESSO"][grupos[t]].drop_duplicates()) + +def listagem_alunos_ativos(df): + return list(df["MATR_ALUNO"][df["FORMA_EVASAO"] == EvasionForm.EF_ATIVO].drop_duplicates()) + + + + +def posicao_turmaIngresso_semestral(df): + iras = ira_semestral(df) + iraMax = {} + for matr in iras: + for semestreAno in iras[matr]: + if not (semestreAno in iraMax): + iraMax[semestreAno] = iras[matr][semestreAno] + else: + if (iras[matr][semestreAno] > iraMax[semestreAno]): + iraMax[semestreAno] = iras[matr][semestreAno] + for matr in iras: + for semestreAno in iras[matr]: + iras[matr][semestreAno] /= iraMax[semestreAno] + + return iras + + +def periodo_real(df): + aux = df.groupby(["MATR_ALUNO"]) + students = {} + for x in aux: + students[x[0]] = None + return students + + +def periodo_pretendido(df): + aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"]) + students = {} + for x in aux: + students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1 + return students + + +def ira_semestral(df): + aux = ira_por_quantidade_disciplinas(df) + for matr in aux: + for periodo in aux[matr]: + aux[matr][periodo] = aux[matr][periodo][0] + return aux + + +def ira_por_quantidade_disciplinas(df): + students = {} + df = df.dropna(subset=["MEDIA_FINAL"]) + + total_students = len(df["MATR_ALUNO"]) + for i in range(total_students): + matr = (df["MATR_ALUNO"][i]) + if (not (matr in students)): + students[matr] = {} + + ano = str(int(df["ANO"][i])) + semestre = str(df["PERIODO"][i]) + situacao = int(df["SITUACAO"][i]) + nota = float(df["MEDIA_FINAL"][i]) + media_credito = int(df["MEDIA_CREDITO"][i]) + + if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0): + + if not (ano + "/" + semestre in students[matr]): + students[matr][ano + "/" + semestre] = [0, 0] + students[matr][ano + "/" + semestre][0] += nota + students[matr][ano + "/" + semestre][1] += 1 + + for matr in students: + for periodo in students[matr]: + if (students[matr][periodo][1] != 0): + students[matr][periodo][0] /= students[matr][periodo][1] * 100 + return (students) + + +def indice_aprovacao_semestral(df): + students = {} + df = df.dropna(subset=['MEDIA_FINAL']) + total_students = len(df["MATR_ALUNO"]) + for i in range(total_students): + matr = (df["MATR_ALUNO"][i]) + if (not (matr in students)): + students[matr] = {} + + ano = str(int(df["ANO"][i])) + semestre = str(df["PERIODO"][i]) + situacao = int(df["SITUACAO"][i]) + + if not (ano + "/" + semestre in students[matr]): + students[matr][ano + "/" + semestre] = [0, 0] + + if situacao in Situation.SITUATION_PASS: + students[matr][ano + "/" + semestre][0] += 1 + students[matr][ano + "/" + semestre][1] += 1 + if situacao in Situation.SITUATION_FAIL: + students[matr][ano + "/" + semestre][1] += 1 + return (students) + + +def aluno_turmas(df): + students = {} + df = df.dropna(subset=['MEDIA_FINAL']) + + situations = dict(Situation.SITUATIONS) + + for matr, hist in df.groupby('MATR_ALUNO'): + students[matr] = [] + + for _, row in hist.iterrows(): + data = { + 'ano': str(int(row["ANO"])), + 'codigo': row["COD_ATIV_CURRIC"], + 'nome': row["NOME_ATIV_CURRIC"], + 'nota': row["MEDIA_FINAL"], + 'semestre': row["PERIODO"], + 'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS) + } + + students[matr].append(data) + + return students diff --git a/script/analysis/course_analysis.py b/script/analysis/course_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/script/analysis/degree_analysis.py b/script/analysis/degree_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..4855ba4aeee2128776e316dc0a33057946d87806 --- /dev/null +++ b/script/analysis/degree_analysis.py @@ -0,0 +1,64 @@ +import pandas as pd +import math +from utils.situations import Situation, EvasionForm + + +def average_graduation(df): + total_student = df['MATR_ALUNO'].drop_duplicates().shape[0] + total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0] + + return total_graduate / total_student + + +def general_failure(df): + affect_ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] + failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)] + + average = failures.shape[0] / affect_ira.shape[0] + + student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\ + .aggregate({'SITUACAO': 'count'}) + student_failures = failures.groupby(['MATR_ALUNO'], as_index=False)\ + .aggregate({'SITUACAO': 'count'}) + + merged = pd.merge(student_courses, student_failures, on=['MATR_ALUNO']) + merged.columns = ['MART_ALUNO', 'FEITAS', 'REPROVADO'] + variance = merged['REPROVADO'].div(merged['FEITAS']).sub(average)\ + .pow(2).sum() / merged.shape[0] + standard_deviation = math.sqrt(variance) + return (average, standard_deviation) + + +def general_ira(df): + fixed = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] + fixed = fixed[fixed.MEDIA_FINAL <= 100] + return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std()) + +def total_evasion_rate(df): + students = df['MATR_ALUNO'].drop_duplicates() + total_student = students.shape[0] + total_evasion = students.loc[(df.FORMA_EVASAO != EvasionForm.EF_ATIVO) & (df.FORMA_EVASAO != EvasionForm.EF_FORMATURA) & (df.FORMA_EVASAO != EvasionForm.EF_REINTEGRACAO)].shape[0] + + return total_evasion / total_student + +def average_graduation_time(df): + graduates = df.loc[(df.FORMA_EVASAO == EvasionForm.EF_FORMATURA)] + total_graduate = graduates.shape[0] + average_time = 0 + year_end = int(df['ANO'].max()) + semester_end = graduates['PERIODO'].max() + for index, row in graduates.iterrows(): + if pd.notnull(row['ANO_EVASAO']): + year_end = int(row['ANO_EVASAO']) + try: + semester_end = int(row['SEMESTRE_EVASAO']) + except ValueError: + semester_end = graduates['PERIODO'].max() + year = int(row['ANO_INGRESSO']) + semester = int(row['SEMESTRE_INGRESSO']) + difference = 2 * (year_end - year) + (semester_end - semester) + 1 + average_time += difference + average_time /= total_graduate + average_time /= 2 + + return average_time \ No newline at end of file diff --git a/script/analysis/student_analysis.py b/script/analysis/student_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..259dd95e6b89ebe4dc56e609b99a0fa7aa3873f2 --- /dev/null +++ b/script/analysis/student_analysis.py @@ -0,0 +1,131 @@ +import numpy as np + +from utils.situations import * + +ANO_ATUAL = 2017 +SEMESTRE_ATUAL = 2 + + +def listagem_alunos_ativos(df): + return list(df["MATR_ALUNO"][df["FORMA_EVASAO"] == EvasionForm.EF_ATIVO].drop_duplicates()) + + + + +def posicao_turmaIngresso_semestral(df): + iras = ira_semestral(df) + iraMax = {} + for matr in iras: + for semestreAno in iras[matr]: + if not (semestreAno in iraMax): + iraMax[semestreAno] = iras[matr][semestreAno] + else: + if (iras[matr][semestreAno] > iraMax[semestreAno]): + iraMax[semestreAno] = iras[matr][semestreAno] + for matr in iras: + for semestreAno in iras[matr]: + iras[matr][semestreAno] /= iraMax[semestreAno] + + return iras + + +def periodo_real(df): + aux = df.groupby(["MATR_ALUNO"]) + students = {} + for x in aux: + students[x[0]] = None + return students + + +def periodo_pretendido(df): + aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"]) + students = {} + for x in aux: + students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1 + return students + + +def ira_semestral(df): + aux = ira_por_quantidade_disciplinas(df) + for matr in aux: + for periodo in aux[matr]: + aux[matr][periodo] = aux[matr][periodo][0] + return aux + + +def ira_por_quantidade_disciplinas(df): + students = {} + df = df.dropna(subset=["MEDIA_FINAL"]) + + total_students = len(df["MATR_ALUNO"]) + for i in range(total_students): + matr = (df["MATR_ALUNO"][i]) + if (not (matr in students)): + students[matr] = {} + + ano = str(int(df["ANO"][i])) + semestre = str(df["PERIODO"][i]) + situacao = int(df["SITUACAO"][i]) + nota = float(df["MEDIA_FINAL"][i]) + media_credito = int(df["MEDIA_CREDITO"][i]) + + if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0): + + if not (ano + "/" + semestre in students[matr]): + students[matr][ano + "/" + semestre] = [0, 0] + students[matr][ano + "/" + semestre][0] += nota + students[matr][ano + "/" + semestre][1] += 1 + + for matr in students: + for periodo in students[matr]: + if (students[matr][periodo][1] != 0): + students[matr][periodo][0] /= students[matr][periodo][1] * 100 + return (students) + + +def indice_aprovacao_semestral(df): + students = {} + df = df.dropna(subset=['MEDIA_FINAL']) + total_students = len(df["MATR_ALUNO"]) + for i in range(total_students): + matr = (df["MATR_ALUNO"][i]) + if (not (matr in students)): + students[matr] = {} + + ano = str(int(df["ANO"][i])) + semestre = str(df["PERIODO"][i]) + situacao = int(df["SITUACAO"][i]) + + if not (ano + "/" + semestre in students[matr]): + students[matr][ano + "/" + semestre] = [0, 0] + + if situacao in Situation.SITUATION_PASS: + students[matr][ano + "/" + semestre][0] += 1 + students[matr][ano + "/" + semestre][1] += 1 + if situacao in Situation.SITUATION_FAIL: + students[matr][ano + "/" + semestre][1] += 1 + return (students) + + +def aluno_turmas(df): + students = {} + df = df.dropna(subset=['MEDIA_FINAL']) + + situations = dict(Situation.SITUATIONS) + + for matr, hist in df.groupby('MATR_ALUNO'): + students[matr] = [] + + for _, row in hist.iterrows(): + data = { + 'ano': str(int(row["ANO"])), + 'codigo': row["COD_ATIV_CURRIC"], + 'nome': row["NOME_ATIV_CURRIC"], + 'nota': row["MEDIA_FINAL"], + 'semestre': row["PERIODO"], + 'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS) + } + + students[matr].append(data) + + return students diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py new file mode 100644 index 0000000000000000000000000000000000000000..341d958e3723c9c33245811e3e657249abc593c5 --- /dev/null +++ b/script/base/dataframe_base.py @@ -0,0 +1,109 @@ +import re +import os +import sys +import pandas as pd +import numpy as np +from glob import glob +from json import load as json_load +from utils.situations import * + + +class DataframeHolder: + def __init__(self, dataframe): + self.students = dataframe.groupby('MATR_ALUNO') + self.courses = dataframe.groupby('COD_ATIV_CURRIC') + self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) + + +def load_dataframes(cwd='.'): + dataframes = [] + for path, dirs, files in os.walk(cwd): + for f in files: + file_path = path + '/' + f + dh = {'name': f, 'dataframe': None} + if 'csv' in f: + dh['dataframe'] = read_csv(file_path) + if 'xls' in f: + dh['dataframe'] = read_excel(file_path) + + if dh['dataframe'] is not None: + dataframes.append(dh) + + dataframe = fix_dataframes(dataframes) + + dh = DataframeHolder(dataframe) + #~ dh.students.aggregate(teste) +# print(dh.students['MEDIA_FINAL'].aggregate(teste)) + return dataframe + + +def read_excel(path, planilha='Planilha1'): + return pd.read_excel(path) + + +def read_csv(path): + return pd.read_csv(path) + + +def fix_dataframes(dataframes): + for df in dataframes: + if df['name'] == 'historico.xls': + history = df['dataframe'] + if df['name'] == 'matricula.xls': + register = df['dataframe'] + + clean_history(history) + clean_register(register) + + merged = pd.merge(history, register, how='right', on=['MATR_ALUNO']) + #~ print(merged) + fix_situation(merged) +# fix_admission(merged) + fix_evasion(merged) + + return merged + + +def clean_history(df): + df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', + 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', + 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' + ], axis=1, inplace=True) + df['PERIODO'] = df['PERIODO'].str.split('o').str[0] + +def clean_register(df): + df_split = df['PERIODO_INGRESSO'].str.split('/') + df['ANO_INGRESSO'] = df_split.str[0] + df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] + df_split = df['PERIODO_EVASAO'].str.split('/') + df['ANO_EVASAO'] = df_split.str[0] + df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] + + df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', + 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', + ],axis=1, inplace=True) + + +def fix_situation(df): + for situation in Situation.SITUATIONS: + df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] + + +def fix_admission(df): + for adm in AdmissionType.ADMISSION_FORM: + df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] + + +def fix_evasion(df): + evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] + df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 + for evasion in EvasionForm.EVASION_FORM: + #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] + df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] + + #~ if(evasion[0] == 100): + #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): + #~ if(x != 0.0): + #~ print(x) + #~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) + #~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) diff --git a/script/build_cache.py b/script/build_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..743b7c5ebb5ce010caf4d6b3a4c660ff4ff0e9bc --- /dev/null +++ b/script/build_cache.py @@ -0,0 +1,134 @@ +from utils.utils import * +from utils.situations import * +from analysis.degree_analysis import * +from analysis.student_analysis import * +from analysis.admission_analysis import * + + +try: + to_unicode = unicode +except NameError: + to_unicode = str + + +def build_cache(dataframe): +# os.chdir("../src") + path = 'cache/curso' + + ensure_path_exists(path) + + for cod, df in dataframe.groupby('COD_CURSO'): + generate_degree_data(path+'/'+cod+'/', df) + generate_student_data(path+'/'+cod+'/students/',df) + #~ generate_admission_data(path+'/'+cod+'/admission/',df) + #generate_degree_data(path, dataframe) + #generate_student_data(path, dataframe) + #generate_student_list(path) + #generate_admission_data(path) + #generate_admission_list(path) + #generate_course_data(path) + #generate_course_general_data(path) + +def generate_degree_data(path, dataframe): + ensure_path_exists(path) + ensure_path_exists(path+'students') + + students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates() + + data = { + 'average_graduation': average_graduation(dataframe), + 'general_failure': general_failure(dataframe), + 'general_ira': general_ira(dataframe), + 'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0], + 'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0], + } + + save_json(path+'/degree.json', data) + + #~ for ind, hist in dataframe.groupby('MATR_ALUNO'): + #~ generate_student_data_old(path+'students/{}.json'.format(ind), dataframe) + + + +def historico(dataframe): + res = [] + + for _, row in dataframe.iterrows(): + res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC', + 'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']])) + + return res + + +def process_semestre(per, df): + ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean() + completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0] + tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0] + + return { + 'semestre': per, + 'ira': ira, + 'completas': completas, + 'tentativas': tentativas, + 'aprovacao': completas/tentativas if tentativas else 0, + 'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0 + } + + + +def generate_student_data(path, dataframe): + student_data = dict() + all_grrs = list(dataframe["MATR_ALUNO"].drop_duplicates()) + for x in all_grrs: + student_data[x] = dict() + + + analises = [ + # tupla que contem no primeiro elemento a funcao que retorna um dicionario com {"GRR": valor} + # e na segunda posicao o nome que esta analise tera no json + + (posicao_turmaIngresso_semestral(dataframe), + "posicao_turmaIngresso_semestral"), + + (periodo_real(dataframe), + "periodo_real"), + + (periodo_pretendido(dataframe), + "periodo_pretendido"), + + (ira_semestral(dataframe), + "ira_semestral"), + + (ira_por_quantidade_disciplinas(dataframe), + "ira_por_quantidade_disciplinas"), + + (indice_aprovacao_semestral(dataframe), + "indice_aprovacao_semestral"), + + (aluno_turmas(dataframe), + "aluno_turmas"), + ] + + for x in student_data: + for a in analises: # Usar para fazer a verificacao de + student_data[x][a[1]] = a[0][x] # analises nulas para um GRR + + save_json(path+x+".json", student_data[x]) + + #Falta verificar se alguem nao recebeu algumas analises + +def generate_student_list(path): + pass + +def generate_admission_data(path,df): + listagem_turma_ingresso(df) + pass + +def generate_admission_list(path): + pass + +def generate_course_data(path): + pass + +def generate_course_general_data(path): + pass diff --git a/script/main.py b/script/main.py new file mode 100644 index 0000000000000000000000000000000000000000..9cec7b09a78b458494d823b843b30664a92b2c40 --- /dev/null +++ b/script/main.py @@ -0,0 +1,23 @@ +import os +import time +from base.dataframe_base import load_dataframes +from build_cache import build_cache +from datetime import timedelta +from analysis.degree_analysis import * + + + +def main(): + start_time = time.clock() + start_time_exec = time.time() + + dataframe = load_dataframes(os.getcwd() + '/' + 'base') + build_cache(dataframe) + + cpu_time = timedelta(seconds=round(time.clock() - start_time)) + run_time = timedelta(seconds=round(time.time() - start_time_exec)) + print("--- Tempo de CPU: {} ---".format(cpu_time)) + print("--- Tempo total: {} ---".format(run_time)) + +if __name__ == "__main__": + main() diff --git a/script/utils/situations.py b/script/utils/situations.py new file mode 100644 index 0000000000000000000000000000000000000000..e62c509f8884a379b25826ec9f7a5423bfb6b838 --- /dev/null +++ b/script/utils/situations.py @@ -0,0 +1,158 @@ +# == Admission Form == # +class AdmissionType: + AT_DESCONHECIDO = 0 + AT_VESTIBULAR = 1 + AT_ENEM = 2 + AT_PROVAR = 3 + AT_REOPCAO = 4 + AT_TRANSFERENCIA_EX_OFICIO = 5 + AT_APROVEITAMENTO_CURSO = 6 + AT_MOBILIDADE = 7 + AT_REINTEGRACAO = 8 + AT_OUTROS = 100 + + ADMISSION_FORM = ( + (AT_DESCONHECIDO, 'Desconhecido'), + (AT_VESTIBULAR, 'Vestibular'), + (AT_ENEM, 'ENEM'), + (AT_PROVAR, 'PROVAR'), + (AT_REOPCAO, 'Reopção de curso'), + (AT_TRANSFERENCIA_EX_OFICIO, 'Transferência por ex-ofÃcio'), + (AT_APROVEITAMENTO_CURSO, 'Aproveitamento de curso'), + (AT_MOBILIDADE, 'Mobilidade Acadêmica'), + (AT_REINTEGRACAO, 'Reintegração'), + (AT_OUTROS, 'Outro'), + ) + +# == Evasion Form == # +class EvasionForm: + EF_DESCONHECIDO = 0 + EF_ATIVO = 1 + EF_FORMATURA = 2 + EF_ABANDONO = 3 + EF_DESISTENCIA_VESTIBULAR = 4 + EF_CANCELAMENTO = 5 + EF_NAO_CONFIRMACAO_VAGA = 6 + EF_NOVO_VESTIBULAR = 7 + EF_TRANSFERENCIA_EXTERNA = 8 + EF_REOPCAO = 9 + EF_DESISTENCIA = 10 + EF_JUBILAMENTO = 11 + EF_DESCUMPRIMENTO_EDITAL = 12 + EF_FALECIMENTO = 13 + EF_TERMINO_REG_TEMP = 14 + EF_REINTEGRACAO = 15 + EF_OUTROS = 100 + + EVASION_FORM = ( + (EF_DESCONHECIDO, 'Desconhecido'), + (EF_ATIVO, 'Sem evasão'), + (EF_FORMATURA, 'Formatura'), + (EF_ABANDONO, 'Abandono'), + (EF_DESISTENCIA_VESTIBULAR, 'Desistência Vestibular'), + (EF_CANCELAMENTO, 'Cancelamento'), + (EF_NAO_CONFIRMACAO_VAGA, 'Não Confirmação de Vaga'), + (EF_NOVO_VESTIBULAR, 'Novo Vestibular'), + (EF_TRANSFERENCIA_EXTERNA, 'Transferência Externa'), + (EF_REOPCAO, 'Reopção'), + (EF_DESISTENCIA, 'Desistência'), + (EF_JUBILAMENTO, 'Jubilamento'), + (EF_DESCUMPRIMENTO_EDITAL, 'Descumprimento Edital'), + (EF_FALECIMENTO, 'Falecimento'), + (EF_TERMINO_REG_TEMP, 'Término de Registro Temporário'), + (EF_REINTEGRACAO, 'Reintegração'), + (EF_OUTROS, 'Outro'), + ) + +# == Situation Courses == # +class Situation: + SIT_DESCONHECIDA = 0 + + SIT_APROVADO = 1 + SIT_REPROVADO = 2 + SIT_MATRICULA = 3 + + SIT_REPROVADO_FREQ = 4 + SIT_EQUIVALENCIA = 5 + SIT_CANCELADO = 6 + + SIT_DISPENSA_COM_NOTA = 7 + SIT_DISPENSA_SEM_NOTA = 8 + + SIT_CONHECIMENTO_APROVADO = 9 + SIT_CONHECIMENTO_REPROVADO = 10 + + SIT_TRANCAMENTO_TOTAL = 11 + SIT_TRANCAMENTO_ADMINISTRATIVO = 12 + SIT_REPROVADO_SEM_NOTA = 13 + SIT_HORAS = 14 + + SIT_APROV_ADIANTAMENTO = 15 + SIT_INCOMPLETO = 16 + + SIT_OUTROS = 100 + + SITUATIONS = ( + (SIT_DESCONHECIDA, 'Desconhecido'), + (SIT_APROVADO, 'Aprovado'), + (SIT_REPROVADO, 'Reprovado por nota'), + (SIT_MATRICULA, 'MatrÃcula'), + (SIT_REPROVADO_FREQ, 'Reprovado por Frequência'), + (SIT_EQUIVALENCIA, 'Equivalência de Disciplina'), + (SIT_CANCELADO, 'Cancelado'), + + (SIT_DISPENSA_COM_NOTA, 'Dispensa de Disciplinas (com nota)'), + (SIT_DISPENSA_SEM_NOTA, 'Dispensa de Disciplinas (sem nota)'), + (SIT_CONHECIMENTO_APROVADO, 'Aprov Conhecimento'), + + (SIT_CONHECIMENTO_REPROVADO, 'Reprov Conhecimento'), + (SIT_TRANCAMENTO_TOTAL, 'Trancamento Total'), + (SIT_TRANCAMENTO_ADMINISTRATIVO, 'Trancamento Administrativo'), + (SIT_REPROVADO_SEM_NOTA, 'Reprovado sem nota'), + + (SIT_HORAS, 'Horas'), + + (SIT_APROV_ADIANTAMENTO, 'Aprov Adiantamento'), + (SIT_INCOMPLETO, 'Incompleto'), + (SIT_OUTROS, 'Outro'), + ) + + SITUATION_AFFECT_IRA = ( + SIT_APROVADO, + SIT_REPROVADO, + SIT_REPROVADO_FREQ, + SIT_DISPENSA_COM_NOTA, + SIT_CONHECIMENTO_APROVADO, + SIT_CONHECIMENTO_REPROVADO + ) + + SITUATION_PASS = ( + SIT_APROVADO, + SIT_CONHECIMENTO_APROVADO, + SIT_DISPENSA_COM_NOTA + ) + + SITUATION_FAIL = ( + SIT_REPROVADO, + SIT_REPROVADO_FREQ, + SIT_CONHECIMENTO_REPROVADO + ) + + """ + isso deve ser pra filtrar fora coisas que não são disciplinas cumpridas + + como "trancamento administrativo" e "horas" + + importante pra saber quantas matérias um aluno REALMENTE fez em um semestre + """ + SITUATION_COURSED = ( + SIT_APROVADO, + SIT_REPROVADO, + SIT_REPROVADO_FREQ, + SIT_DISPENSA_COM_NOTA, + SIT_CONHECIMENTO_APROVADO, + SIT_CONHECIMENTO_REPROVADO, + SIT_REPROVADO_SEM_NOTA, + SIT_INCOMPLETO, + SIT_CANCELADO, + ) diff --git a/script/utils/utils.py b/script/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f69d762eecf5bc3791b4b0f693cb7f7b2b8f0331 --- /dev/null +++ b/script/utils/utils.py @@ -0,0 +1,35 @@ +import os + +import json + +try: + from django.conf import settings + + DEBUG = settings.DEBUG +except: + DEBUG = True + + +def build_path(path): + if not os.path.exists(path): + os.mkdir(path) + + +def ensure_path_exists(complete_path): + parts = complete_path.split('/') + + for i in range(len(parts)): + if not os.path.exists('/'.join(parts[:i+1])): + os.mkdir('/'.join(parts[:i+1])) + + +def save_json(path, data): + + ensure_path_exists(os.path.dirname(path)) + + params = {} if not DEBUG else {'indent': 4} + + with open(path, 'w') as f: + json.dump(data, f, **params) + + diff --git a/src/adega/settings.py b/src/adega/settings.py index 4215fb2132e271fd1c69922386c8074925a62472..a99567c382a09a7b29f5a419ebb2ca3f188d7a3b 100644 --- a/src/adega/settings.py +++ b/src/adega/settings.py @@ -11,7 +11,7 @@ https://docs.djangoproject.com/en/1.11/ref/settings/ """ import os - +from mongoengine import connect # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -81,14 +81,32 @@ WSGI_APPLICATION = 'adega.wsgi.application' # Database # https://docs.djangoproject.com/en/1.11/ref/settings/#databases +#DATABASES = { +# 'default': { +# 'ENGINE': 'django.db.backends.sqlite3', +# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), +# } +#} + DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + 'ENGINE': 'django.db.backends.postgresql_psycopg2', + 'NAME': 'adega', + 'USER': 'adega', + 'PASSWORD': '123456', + 'HOST': 'localhost', + 'PORT': '', } } - +#DATABASES = { +# 'default': { +# 'NAME': 'teste', +# 'ENGINE': 'django_mongodb_engine', +# }, +#} +MONGO_DATABASE_NAME = 'teste' +connect(MONGO_DATABASE_NAME) # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators diff --git a/src/degree/models.py b/src/degree/models.py index 4322cd50bc3cb883dc1b13d3c318ea00359f3689..ed24027a6382d0a4b25115bb9eba367ade11271a 100644 --- a/src/degree/models.py +++ b/src/degree/models.py @@ -1,6 +1,8 @@ from django.db import models from django.db.models import Max from course.models import Course +from mongoengine import Document, EmbeddedDocument, fields + # Create your models here. class Degree(models.Model): name = models.CharField(max_length = 50) @@ -23,3 +25,28 @@ class CourseCurriculum(models.Model): course = models.ForeignKey(Course) period = models.PositiveIntegerField(null = True) type_course = models.CharField(max_length = 255) + +class HistoryData(models.Model): + degree = models.ForeignKey(Degree) + student = models.ForeignKey(Student) + year = models.PositiveIntegerField(null = True, blank = True) + semester = models.PositiveIntegerField(null = True, blank = True) + situation = models.CharField(max_length=255) + grade = models.FloatField() + letter = models.CharField(max_length=2) + course_code = models.CharField(max_length=5) + credits = models.PositiveIntegerField() + course_type = models.CharField(max_length=255) + frequency = models.FloatField() + #course = models.ForeignKey(Course) + +class CourseCurriculumMongo(Document): + start_year = fields.IntField() + +class CurriculumMongo(Document): + courses = fields.ListField(fields.EmbeddedDocumentField('CourseCurriculumMongo')) + + +class DegreeMongo(Document): + name = fields.StringField(max_length=50) + curriculum = fields.ListField(fields.EmbeddedDocumentField('CurriculumMongo')) diff --git a/src/student/models.py b/src/student/models.py index 77db826ba0664e321810de10d4a42ec7bedb90e1..68d3ea8462b99d2fe8c437cf52ce1fd10f852bdb 100644 --- a/src/student/models.py +++ b/src/student/models.py @@ -14,6 +14,7 @@ class Student(models.Model): evasion_semester = models.PositiveIntegerField(null = True, blank = True) current_curriculum = models.ForeignKey(Curriculum) admission = models.ForeignKey(Admission) + admission_form = models.CharField(max_length=255) klasses = models.ManyToManyField('klass.Klass', through = 'klass.StudentKlass') def get_time_in_degree(self): diff --git a/src/test.py b/src/test.py new file mode 100644 index 0000000000000000000000000000000000000000..90b0ec28cc623a32eb2075847f42475837996cca --- /dev/null +++ b/src/test.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +import sys +import os +import django +import time +import json +import io +import math + +from datetime import timedelta +from pathlib import Path + +sys.path.append(os.getcwd()) +os.environ["DJANGO_SETTINGS_MODULE"] = "adega.settings" +django.setup() + +from django.db import models +from student.models import * +from course.models import * +from degree.models import * +from admission.models import * + +from student.analysis import * +from course.analysis import * +from degree.analysis import * +from admission.analysis import * +from utils.data import * +try: + to_unicode = unicode +except NameError: + to_unicode = str + +def main(): + start_time = time.clock() + start_time_exec = time.time() + +# generate_data() +# degree = DegreeMongo() +# degree.name = "teste" +# degree.save() +# course = CourseCurriculumMongo() +# print(course) +# course.start = 1923 +# print(course) +# course.save() +# print("Saved") +# course = CourseCurriculumMongo() +# course.start = 1933 +# print(course) +# course.save() + +# curriculum = CurriculumMongo() +# curriculum.courses = CourseCurriculumMongo.objects +# curriculum.save() +# +# degree = DegreeMongo() +# degree.name = "Novo Teste" +# degree.curriculum = CurriculumMongo.objects +# degree.save() + new_degree = DegreeMongo.objects() + for nd in new_degree: + print(nd.name) + for curriculum in nd.curriculum: + for c in curriculum.courses: + print(CourseCurriculumMongo.objects(start=1933)) + print(new_degree) + print("--- Tempo de cpu: {} ---".format(timedelta(seconds=round(time.clock() - start_time)))) + print("--- Duração real: {} ---".format(timedelta(seconds=round(time.time() - start_time_exec)))) + + +def generate_data(): + path = 'cache' + if not os.path.exists(path): + os.mkdir(path) + + path = 'cache/curso' + if not os.path.exists(path): + os.mkdir(path) + + degrees = Degree.objects.all() + + for degree in degrees: + path = 'cache/curso/' + degree.code + if not os.path.exists(path): + os.mkdir(path) + generate_degree_data(degree, path) + generate_student_data(degree, path) + generate_student_list_data(degree, path) + generate_admission_data(degree, path) + generate_admission_list_data(degree, path) + generate_course_data(degree, path) + generate_course_general_data(degree, path) + generate_cepe9615_data(degree, path) + +def generate_degree_data(degree, path): + print("Fazendo analises do Curso - {}".format(degree.name)) + average_grad = average_graduation(degree) # media_formandos + + dic = merge_dicts( + ['average_ira', 'semester_evasion', 'graduation'], + graph_average_ira(degree), + graph_average_ira_evasion_semester(degree), + graph_average_ira_graduation(degree) + ) + + degree_data = { + 'time_graduation': average_time_graduation_degree(degree), + 'graduation_rate': average_grad[0], + 'student_amount': average_grad[1], + 'failure_rate': average_general_failure_standard_deviation(degree), + 'failure_actives': average_actives_failure_standard_deviation(degree), + 'ira_average': calculate_average_general_ira_standard_deviation(degree), + 'ira_actives': calculate_average_actives_ira_standard_deviation(degree), + 'evasion_rate': calculate_general_evasion(degree), + 'average_ira_graph': json.dumps(sorted(dic.items())), + 'evasion_graph': json.dumps(sorted(graph_evasion(degree).items())), + 'retirement': student_retirement(degree), + 'amount_student_actives': amount_student_actives(degree), + 'amount_locking': student_lock(degree), + 'gradueted': student_gradueted(degree) + } + + with io.open(path + '/degree.json', 'w', encoding='utf8') as output: + str_ = json.dumps(degree_data, indent = 4, sort_keys = True, + separators=(',',': '), ensure_ascii = False) + output.write(to_unicode(str_)) + +def generate_student_data(degree, path): + print("\t- Fazendo analises dos alunos") + students = Student.objects.filter(admission__degree = degree) + students_amount = students.count() + counter = 0 + student_path = path + '/student' + if not os.path.exists(student_path): + os.mkdir(student_path) + + for student in students: + student_klasses = StudentKlass.objects.filter(student=student) + amount_courses_semester = get_amount_courses_completed(student) + failures_semester = semester_pass_rate(student) + failures_amount_courses_semester = merge_dicts( + ['reprovacoes', 'cursadas'], + failures_semester, + amount_courses_semester, + ) + + ira_courses = sorted(ira_amount_courses(student).items()) + pass_rate = calculate_pass_rate(student_klasses) + pass_rate_semester = sorted(failures_amount_courses_semester.items()) + position = sorted(get_student_position(student).items()) + real_period = get_real_period(student) + intended_period = get_intended_period(student) + + dict_ira_semester = {} + dict_ira_amount_courses = {} + dict_position = {} + dict_pass = {} + + for item, course_pass, pos in zip(ira_courses,pass_rate_semester, position): + ca = list(course_pass) + i = list(item) + p = list(pos) + d_pass, d_done = ca[1] + date = ca[0].split('/') + + semester_data = {} + data = '{}/{}'.format(date[0], date[1]) + + dict_ira_semester[data] = i[1][0] + dict_ira_amount_courses[data] = [i[1][0], d_done] + dict_position[data] = pos[1] + dict_pass[data] = [d_pass, d_done] + student_klasses = StudentKlass.objects.filter(student=student) + student_klass = [] + for sk in student_klasses: + sk_dict = { + 'grade': sk.grade, + 'name': sk.klass.course.name, + 'code': sk.klass.course.code, + 'situation': sk.situation, + 'year': sk.klass.year, + 'semester': sk.klass.semester + } + student_klass.append(sk_dict) + + student_data = { + 'ira_semester': dict_ira_semester, + 'semester_pass_rate': dict_pass, + 'position': dict_position, + 'ira_amount_courses': dict_ira_amount_courses, + 'pass_rate': pass_rate, + 'intended_period': intended_period, + 'real_period': real_period, + 'student_klass': student_klass + } + + counter += 1 + with io.open(student_path + '/' + student.grr + '.json', 'w', encoding = 'utf8') as output: + str_ = json.dumps(student_data, indent = 3, sort_keys = True, + separators=(',', ': '), ensure_ascii = False) + output.write(to_unicode(str_)) + + if counter % 100 == 0: + print("\t\t- %d alunos processados de %d" % (counter, students_amount)) + return + +def generate_student_list_data(degree, path): + print("\t- Criando lista de alunos") + return + +def generate_admission_data(degree, path): + return + +def generate_admission_list_data(degree, path): + return + +def generate_course_data(degree, path): + print("\t- Fazendo analises das disciplinas") + return + +def generate_course_general_data(degree, path): + print("\t- Fazendo analise geral das disciplinas") + return + +def generate_cepe9615_data(degree, path): + return + +if __name__ == '__main__': + main() diff --git a/src/utils/analysis.py b/src/utils/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/utils/build_cache.py b/src/utils/build_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..b64a7ac553ffe3f7378b871e02d00774161ef3c4 --- /dev/null +++ b/src/utils/build_cache.py @@ -0,0 +1,11 @@ +import sys +import os +import django +import time +import math + +from datetime import timedelta +from pathlib import Path + +os.environ["DJANGO_SETTINGS_MODULE"] = "adega.settings" +django.setup()