diff --git a/new_load/data_loarder.py b/new_load/data_loarder.py new file mode 100644 index 0000000000000000000000000000000000000000..e106362bc0f3059629e8518f46e26e4b368f4f4d --- /dev/null +++ b/new_load/data_loarder.py @@ -0,0 +1,48 @@ +import re +import os +import sys +import pandas as pd +import pathlib +from pathlib import Path +from glob import glob +from json import load as json_load + +#import django + +#sys.path.append(os.getcwd()) + +#os.environ["DJANGON_SETTINGS_MODULE"] = "adega.settings" +#django.setup() + + +#from degree.models import * +#from student.models import * +#from course.models import * +#from admission.models import * +#from klass.models import * + +def start(): + directory = os.fsencode('relatorios') +# path = Path(str(directory)) +# print(path) + for path, subdirs, files in os.walk(directory): + # print(subdirs) + # print(pathlib.PurePath(str(p), 'teste')) +# print(pathlib.PurePath( + for f in files: + print(f) + file_path = pathlib.PurePath(str(path), str(f)) + print(pathlib.PurePath(str(path), str(f))) + file_open(file_path) + # print(pathlib.PurePath(path, str(f))) + +def file_open(path): + if str(path).find('csv'): + print('csv') + return pd.read_csv(str(path)) + else: + print('excel') + return pd.read_excel(str(path)) + +if __name__ == '__main__': + start() diff --git a/script/analysis/admission_analysis.py b/script/analysis/admission_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/script/analysis/course_analysis.py b/script/analysis/course_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/script/analysis/degree_analysis.py b/script/analysis/degree_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..5d91ef7d3c3fa176571e272f28590f40f41703ef --- /dev/null +++ b/script/analysis/degree_analysis.py @@ -0,0 +1,34 @@ +import pandas as pd +import numpy as np +import math +from utils.situations import Situation + +def average_graduation(df): + not_nan = df.dropna(axis=0) + total_student = not_nan.shape[0] + list_graduation = not_nan[not_nan.FORMA_EVASAO == 'Formatura'] + total_graduate = list_graduation.shape[0] + return total_graduate / total_student + +def general_failure(df): + not_nan = df.dropna(axis=0) + affect_ira = not_nan[not_nan.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] + failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)] + average = failures.shape[0] / affect_ira.shape[0] + + student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\ + .aggregate({'SITUACAO': 'count'}) + student_failures = failures.groupby(['MATR_ALUNO'], as_index=False)\ + .aggregate({'SITUACAO': 'count'}) + + merged = pd.merge(student_courses, student_failures, on=['MATR_ALUNO']) + merged.columns = ['MART_ALUNO', 'FEITAS', 'REPROVADO'] + variance = merged['REPROVADO'].div(merged['FEITAS']).sub(average)\ + .pow(2).sum() / merged.shape[0] + standard_deviation = math.sqrt(variance) + return (average, standard_deviation) + +def general_ira(df): + fixed = df.dropna(axis=0)[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)] + fixed = fixed[fixed.MEDIA_FINAL <= 100] + return (fixed.MEDIA_FINAL.mean(), fixed.MEDIA_FINAL.std()) diff --git a/script/analysis/student_analysis.py b/script/analysis/student_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/script/base/dataframe_base.py b/script/base/dataframe_base.py new file mode 100644 index 0000000000000000000000000000000000000000..9df6c99e0a265e21e233342e81f4f2a78c4240b5 --- /dev/null +++ b/script/base/dataframe_base.py @@ -0,0 +1,43 @@ +import re +import os +import sys +import pandas as pd + +from glob import glob +from json import load as json_load +from utils.situations import * + + +def load_dataframes(cwd='.'): + dataframes = [] + for path, dirs, files in os.walk(cwd): + for f in files: + file_path = path + '/' + f + dh = {'name': f, 'dataframe': None} + if 'csv' in f: + dh['dataframe'] = read_csv(file_path) + if 'xls' in f: + dh['dataframe'] = read_excel(file_path) + + if dh['dataframe'] is not None: + dataframes.append(dh) + + return dataframes + +def read_excel(path, planilha='Planilha1'): + return pd.read_excel(path) + +def read_csv(path): + return pd.read_csv(path) + +def fix_dataframes(dataframes): + for df in dataframes: + fix_situation(df['dataframe']) + +def fix_situation(df): + if hasattr(df, 'SITUACAO'): + for situation in Situation.SITUATIONS: + df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] + if situation[1] == 'Outro': + temp = df[~df['SITUACAO'].astype(str).str.isdigit()] + df.loc[~df.SITUACAO.astype(str).str.isdigit()] = situation[0] diff --git a/script/build_cache.py b/script/build_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..6f6c2f49716d9e37c2d228caef239e0841f1351e --- /dev/null +++ b/script/build_cache.py @@ -0,0 +1,53 @@ +import sys +import os +import time +import math + +from datetime import timedelta +from pathlib import Path +from utils.utils import build_path +from analysis.degree_analysis import * + +try: + to_unicode = unicode +except NameError: + to_unicode = str + +def build_cache(registry, history): +# os.chdir("../src") + path = "cache" + build_path(path) + path += "/curso" + build_path(path) + + generate_degree_data(path, registry, history) + generate_student_data(path) + generate_student_list(path) + generate_admission_data(path) + generate_admission_list(path) + generate_course_data(path) + generate_course_general_data(path) + +def generate_degree_data(path, registry, history): + average_graduation(registry) + general_failure(history) + general_ira(history) + pass + +def generate_student_data(path): + pass + +def generate_student_list(path): + pass + +def generate_admission_data(path): + pass + +def generate_admission_list(path): + pass + +def generate_course_data(path): + pass + +def generate_course_general_data(path): + pass diff --git a/script/main.py b/script/main.py new file mode 100644 index 0000000000000000000000000000000000000000..7b3a6b127004240cabcbdc7e2a2931eb1e84f735 --- /dev/null +++ b/script/main.py @@ -0,0 +1,27 @@ +import os +import time +from base.dataframe_base import load_dataframes, fix_dataframes +from build_cache import build_cache +from datetime import timedelta + +def main(): + start_time = time.clock() + start_time_exec = time.time() + + dataframes = load_dataframes(os.getcwd() + '/' + 'base') + fix_dataframes(dataframes) + for df in dataframes: + if 'historico' in df['name']: + history = df['dataframe'] + if 'matricula.xls' in df['name']: + registry = df['dataframe'] + + build_cache(registry, history) + + cpu_time = timedelta(seconds=round(time.clock() - start_time)) + run_time = timedelta(seconds=round(time.time() - start_time_exec)) + print("--- Tempo de CPU: {} ---".format(cpu_time)) + print("--- Tempo total: {} ---".format(run_time)) + +if __name__ == "__main__": + main() diff --git a/script/utils/situations.py b/script/utils/situations.py new file mode 100644 index 0000000000000000000000000000000000000000..81cbc9190df8259da57ee4ef6d238b55d28311ee --- /dev/null +++ b/script/utils/situations.py @@ -0,0 +1,126 @@ +# == Admission Form == # +class AdmissionType: + AT_DESCONHECIDO = 0 + AT_VESTIBULAR = 1 + AT_ENEM = 2 + AT_PROVAR = 3 + AT_REOPCAO = 4 + AT_TRANSFERENCIA_EX_OFICIO = 5 + AT_APROVEITAMENTO_CURSO = 6 + AT_MOBILIDADE = 7 + AT_REINTEGRACAO = 8 + AT_OUTROS = 100 + + ADMISSION_FORM = ( + (AT_DESCONHECIDO, 'Desconhecido'), + (AT_VESTIBULAR, 'Vestibular'), + (AT_ENEM, 'ENEM'), + (AT_PROVAR, 'PROVAR'), + (AT_REOPCAO, 'Reopção de curso'), + (AT_TRANSFERENCIA_EX_OFICIO, 'Transferência por ex-ofÃcio'), + (AT_APROVEITAMENTO_CURSO, 'Aproveitamento de curso'), + (AT_MOBILIDADE, 'Mobilidade Acadêmica'), + (AT_REINTEGRACAO, 'Reintegração'), + (AT_OUTROS, 'Outro'), + ) + +# == Evasion Form == # +class EvasionForm: + EF_DESCONHECIDO = 0 + EF_ATIVO = 1 + EF_FORMATURA = 2 + EF_ABANDONO = 3 + EF_DESISTENCIA_VESTIBULAR = 4 + EF_CANCELAMENTO = 5 + EF_NAO_CONFIRMACAO_VAGA = 6 + EF_NOVO_VESTIBULAR = 7 + EF_TRANSFERENCIA_EXTERNA = 8 + EF_REOPCAO = 9 + EF_DESISTENCIA = 10 + EF_JUBILAMENTO = 11 + EF_OUTROS = 100 + + EVASION_FORM = ( + (EF_DESCONHECIDO, 'Desconhecido'), + (EF_ATIVO, 'Ativo'), + (EF_FORMATURA, 'Formado'), + (EF_ABANDONO, 'Abandono'), + (EF_DESISTENCIA_VESTIBULAR, 'Desistencia vestibular'), + (EF_CANCELAMENTO, 'Cancelamento'), + (EF_NAO_CONFIRMACAO_VAGA, 'Não confirmação de vaga'), + (EF_NOVO_VESTIBULAR, 'Novo vestibular'), + (EF_TRANSFERENCIA_EXTERNA, 'Transferência externa'), + (EF_REOPCAO, 'Reopção de curso'), + (EF_DESISTENCIA, 'Desistência'), + (EF_JUBILAMENTO, 'Jubilado'), + (EF_OUTROS, 'Outros'), + ) + +# == Situation Courses == # +class Situation: + SIT_DESCONHECIDA = 0 + + SIT_APROVADO = 1 + SIT_REPROVADO = 2 + SIT_MATRICULA = 3 + + SIT_REPROVADO_FREQ = 4 + SIT_EQUIVALENCIA = 5 + SIT_CANCELADO = 6 + + SIT_DISPENSA_COM_NOTA = 7 + SIT_DISPENSA_SEM_NOTA = 8 + + SIT_CONHECIMENTO_APROVADO = 9 + SIT_CONHECIMENTO_REPROVADO = 10 + + SIT_TRANCAMENTO_TOTAL = 11 + SIT_TRANCAMENTO_ADMINISTRATIVO = 12 + SIT_REPROVADO_SEM_NOTA = 13 + SIT_HORAS = 13 + + SIT_OUTROS = 100 + + SITUATIONS = ( + (SIT_DESCONHECIDA, 'Desconhecido'), + (SIT_APROVADO, 'Aprovado'), + (SIT_REPROVADO, 'Reprovado por nota'), + (SIT_MATRICULA, 'MatrÃcula'), + (SIT_REPROVADO_FREQ, 'Reprovado por Frequência'), + (SIT_EQUIVALENCIA, 'Equivalência de Disciplina'), + (SIT_CANCELADO, 'Cancelado'), + + (SIT_DISPENSA_COM_NOTA, 'Dispensa de Disciplinas (com nota)'), + (SIT_DISPENSA_SEM_NOTA, 'Dispensa de Disciplinas (sem nota)'), + (SIT_CONHECIMENTO_APROVADO, 'Aprov Conhecimento'), + + (SIT_CONHECIMENTO_REPROVADO, 'Reprov Conhecimento'), + (SIT_TRANCAMENTO_TOTAL, 'Trancamento Total'), + (SIT_TRANCAMENTO_ADMINISTRATIVO, 'Trancamento Administrativo'), + (SIT_REPROVADO_SEM_NOTA, 'Reprovado sem nota'), + + (SIT_HORAS, 'Horas'), + + (SIT_OUTROS, 'Outro'), + ) + + SITUATION_AFFECT_IRA = ( + SIT_APROVADO, + SIT_REPROVADO, + SIT_REPROVADO_FREQ, + SIT_DISPENSA_COM_NOTA, + SIT_CONHECIMENTO_APROVADO, + SIT_CONHECIMENTO_REPROVADO + ) + + SITUATION_PASS = ( + SIT_APROVADO, + SIT_CONHECIMENTO_APROVADO, + SIT_DISPENSA_COM_NOTA + ) + + SITUATION_FAIL = ( + SIT_REPROVADO, + SIT_REPROVADO_FREQ, + SIT_CONHECIMENTO_REPROVADO + ) diff --git a/script/utils/utils.py b/script/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..871e935c35c6ddcec2d8c6facd6ffad779d6e1cc --- /dev/null +++ b/script/utils/utils.py @@ -0,0 +1,5 @@ +import os + +def build_path(path): + if not os.path.exists(path): + os.mkdir(path) diff --git a/src/utils/analysis.py b/src/utils/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/utils/build_cache.py b/src/utils/build_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..b64a7ac553ffe3f7378b871e02d00774161ef3c4 --- /dev/null +++ b/src/utils/build_cache.py @@ -0,0 +1,11 @@ +import sys +import os +import django +import time +import math + +from datetime import timedelta +from pathlib import Path + +os.environ["DJANGO_SETTINGS_MODULE"] = "adega.settings" +django.setup()