diff --git a/.gitignore b/.gitignore index 309a1d5dcbdc37a40a15c515bdb0d1c5820ad7ab..9bff41c6e983e1ef1c225b840f00a293e9af6545 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ src/cache *.json +src/.coverage + # lixo diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..56f6ce0d7a7708af0698eeff0df1671450d854f7 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,12 @@ +before_script: + - make install + - pipenv shell + - python --version + +django-tests: + script: + - cd src + - python manage.py test + + + diff --git a/README.md b/README.md index f9940a5766818d94b88804a857925c3ad26181db..828411075f77d7719dc3986fadf8bc3dac413e59 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,9 @@ $ git clone git@gitlab.c3sl.ufpr.br:pet/A.D.E.G.A.git ```bash -sudo make install-dev +sudo make install +make install-user +pipenv install --dev ``` se você possui o arquivo do banco de dados compartilhado internamente pelos diff --git a/makefile b/makefile index d16a18f1e0b60178a26e6318e8a7e584ceb6cc66..f1aafd6e4879e83bab36663c81c9ecfedc260459 100644 --- a/makefile +++ b/makefile @@ -11,9 +11,8 @@ clean-deploy: clean @rm -rf static db.sqlite3 coverage: - coverage run --source='.' manage.py test - coverage html - xdg-open htmlcov/index.html + (cd src; coverage run --source='.' manage.py test; coverage html) + mv src/htmlcov . docs: @@ -34,9 +33,10 @@ install: apt-get install -y python3-pip apt-get install -y libpq-dev apt-get install -y postgresql postgresql-contrib - pip3 install --user -U pip setuptools pipenv + bash <<< '(which pip || ln -s /usr/bin/pip3 /usr/bin/pip)' + +install-user: + pip3 install --user -U pip setuptools pipenv==9.0.3 pipenv install -install-dev: install - pipenv install --dev diff --git a/src/adega/settings.py b/src/adega/settings.py index d9a977220efac5dcf5fb14f6e6c0da9231a5306a..af7831d07570646e51bb44e86140d2f4216dbfce 100644 --- a/src/adega/settings.py +++ b/src/adega/settings.py @@ -37,7 +37,6 @@ INSTALLED_APPS = [ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', - 'django_extensions', 'adega', diff --git a/src/script/base/dataframe_base.py b/src/script/base/dataframe_base.py index 261d9b84d07c7eb8e26ffccac2ccdb92203a5475..51a2eb1b21498b933c2dcc1941e494123572d9ff 100644 --- a/src/script/base/dataframe_base.py +++ b/src/script/base/dataframe_base.py @@ -1,114 +1,106 @@ -import re import os -import sys import pandas as pd import numpy as np -from glob import glob -from json import load as json_load from script.utils.situations import * - class DataframeHolder: - def __init__(self, dataframe): - self.students = dataframe.groupby('MATR_ALUNO') - self.courses = dataframe.groupby('COD_ATIV_CURRIC') - self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) + def __init__(self, dataframe): + self.students = dataframe.groupby('MATR_ALUNO') + self.courses = dataframe.groupby('COD_ATIV_CURRIC') + self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) def load_dataframes(cwd='.'): - dataframes = [] - for path, dirs, files in os.walk(cwd): - for f in files: - file_path = path + '/' + f - dh = {'name': f, 'dataframe': None} - if 'csv' in f: - dh['dataframe'] = read_csv(file_path) - if 'xls' in f: - dh['dataframe'] = read_excel(file_path) + dataframes = [] + for path, dirs, files in os.walk(cwd): + for f in files: + file_path = path + '/' + f + dh = {'name': f, 'dataframe': None} + if 'csv' in f: + dh['dataframe'] = read_csv(file_path) + if 'xls' in f: + dh['dataframe'] = read_excel(file_path) - if dh['dataframe'] is not None: - dataframes.append(dh) + if dh['dataframe'] is not None: + dataframes.append(dh) - dataframe = fix_dataframes(dataframes) + dataframe = fix_dataframes(dataframes) - dh = DataframeHolder(dataframe) - #~ dh.students.aggregate(teste) -# print(dh.students['MEDIA_FINAL'].aggregate(teste)) - return dataframe + return dataframe def read_excel(path, planilha='Planilha1'): - return pd.read_excel(path) + return pd.read_excel(path) def read_csv(path): - return pd.read_csv(path) + return pd.read_csv(path) def fix_dataframes(dataframes): - for df in dataframes: - if df['name'] == 'historico.xls' or df['name'] == 'historico.csv': - history = df['dataframe'] - if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv': - register = df['dataframe'] - - clean_history(history) - clean_register(register) - #~ df.dropna(axis=0, how='all') - history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce') - history = history[np.isfinite(history['MEDIA_FINAL'])] - - - merged = pd.merge(history, register, how='right', on=['MATR_ALUNO']) - #~ print(merged) - fix_situation(merged) -# fix_admission(merged) - fix_evasion(merged) - - return merged + for df in dataframes: + if df['name'] == 'historico.xls' or df['name'] == 'historico.csv': + history = df['dataframe'] + if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv': + register = df['dataframe'] + + clean_history(history) + clean_register(register) + # ~ df.dropna(axis=0, how='all') + history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce') + history = history[np.isfinite(history['MEDIA_FINAL'])] + + merged = pd.merge(history, register, how='right', on=['MATR_ALUNO']) + # ~ print(merged) + fix_situation(merged) + # fix_admission(merged) + fix_evasion(merged) + + return merged def clean_history(df): - df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', - 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', - 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' - ], axis=1, inplace=True) - df['PERIODO'] = df['PERIODO'].str.split('o').str[0] + df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', + 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', + 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' + ], axis=1, inplace=True) + df['PERIODO'] = df['PERIODO'].str.split('o').str[0] + def clean_register(df): - df_split = df['PERIODO_INGRESSO'].str.split('/') - df['ANO_INGRESSO'] = df_split.str[0] - df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] - df_split = df['PERIODO_EVASAO'].str.split('/') - df['ANO_EVASAO'] = df_split.str[0] - df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] + df_split = df['PERIODO_INGRESSO'].str.split('/') + df['ANO_INGRESSO'] = df_split.str[0] + df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] + df_split = df['PERIODO_EVASAO'].str.split('/') + df['ANO_EVASAO'] = df_split.str[0] + df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] - df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', - 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', - ],axis=1, inplace=True) + df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', + 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', + ], axis=1, inplace=True) def fix_situation(df): - for situation in Situation.SITUATIONS: - df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] + for situation in Situation.SITUATIONS: + df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] def fix_admission(df): - for adm in AdmissionType.ADMISSION_FORM: - df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] + for adm in AdmissionType.ADMISSION_FORM: + df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] def fix_evasion(df): - evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] - df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 - for evasion in EvasionForm.EVASION_FORM: - #~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] - df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] - - #~ if(evasion[0] == 100): - #~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): - #~ if(x != 0.0): - #~ print(x) - #~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) - #~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) + evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] + df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 + for evasion in EvasionForm.EVASION_FORM: + # ~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] + df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] + + # ~ if(evasion[0] == 100): + # ~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): + # ~ if(x != 0.0): + # ~ print(x) +# ~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) +# ~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) diff --git a/src/script/main.py b/src/script/main.py index dbfb53942ae354b5b9ac625b95c335db2937612f..a87bb15069645f737c2f853dd22ea4b01ce555eb 100644 --- a/src/script/main.py +++ b/src/script/main.py @@ -3,25 +3,38 @@ import time from datetime import timedelta from script.base.dataframe_base import load_dataframes from script.build_cache import build_cache -from script.analysis.degree_analysis import * +def analyze(submission): + start_time = time.clock() + start_time_exec = time.time() + + dataframe = load_dataframes(submission.path()) + + build_cache(dataframe) + + cpu_time = timedelta(seconds=round(time.clock() - start_time)) + run_time = timedelta(seconds=round(time.time() - start_time_exec)) + print("--- Tempo de CPU: {} ---".format(cpu_time)) + print("--- Tempo total: {} ---".format(run_time)) + def main(): - start_time = time.clock() - start_time_exec = time.time() + start_time = time.clock() + start_time_exec = time.time() + + dataframe = load_dataframes(os.getcwd() + '/script/' + 'base') + # ~ for i, line in enumerate(dataframe): + # ~ print(type(dataframe["MEDIA_FINAL"][i])) + # ~ print(dataframe["MEDIA_FINAL"][i]) + # ~ print(dataframe) + build_cache(dataframe) - dataframe = load_dataframes(os.getcwd() + '/script/' + 'base') - #~ for i, line in enumerate(dataframe): - #~ print(type(dataframe["MEDIA_FINAL"][i])) - #~ print(dataframe["MEDIA_FINAL"][i]) - #~ print(dataframe) - build_cache(dataframe) + cpu_time = timedelta(seconds=round(time.clock() - start_time)) + run_time = timedelta(seconds=round(time.time() - start_time_exec)) + print("--- Tempo de CPU: {} ---".format(cpu_time)) + print("--- Tempo total: {} ---".format(run_time)) - cpu_time = timedelta(seconds=round(time.clock() - start_time)) - run_time = timedelta(seconds=round(time.time() - start_time_exec)) - print("--- Tempo de CPU: {} ---".format(cpu_time)) - print("--- Tempo total: {} ---".format(run_time)) if __name__ == "__main__": - main() + main() diff --git a/src/uploads/admin.py b/src/uploads/admin.py index 8c38f3f3dad51e4585f3984282c2a4bec5349c1e..f6770b002b386aa4a54922d3d8c86872aada55f9 100644 --- a/src/uploads/admin.py +++ b/src/uploads/admin.py @@ -1,3 +1,36 @@ + from django.contrib import admin +from .models import Submission + +from script.main import analyze + +from traceback import print_exc + + +def make_analysis(modeladmin, request, queryset): + for submission in queryset: + try: + print('analisando: '+str(submission)) + analyze(submission) + + submission.processed = True + + print('salvando') + submission.save() + + print('OK') + except: + print('Análise falhou') + + print_exc() + + +class SubmissionAdmin(admin.ModelAdmin): + date_hierarchy = 'timestamp' + + list_display = ('author', 'course', 'processed', 'last', 'timestamp') + + actions = [make_analysis] + -# Register your models here. +admin.site.register(Submission, SubmissionAdmin) \ No newline at end of file diff --git a/src/uploads/forms.py b/src/uploads/forms.py deleted file mode 100644 index b2ad085c4f040fbb9a1012f98422addf547329e4..0000000000000000000000000000000000000000 --- a/src/uploads/forms.py +++ /dev/null @@ -1,9 +0,0 @@ -from django import forms - -from uploads.models import Document - - -class DocumentForm(forms.ModelForm): - class Meta: - model = Document - fields = ('description', 'document', ) diff --git a/src/uploads/models.py b/src/uploads/models.py index 19b74accf00d8420d22578bcaf7fd5ea340f0bb8..b2786fcaf539dc9e12c9a0a62bbc45df8a778bf8 100644 --- a/src/uploads/models.py +++ b/src/uploads/models.py @@ -1,9 +1,33 @@ -from __future__ import unicode_literals - from django.db import models +from django.contrib.auth.models import User +from django.utils import timezone + +from os import path +from django.conf import settings + + +def get_path(instance, filename): + return '{}/{}/{}'.format(instance.course, instance.id, filename) + + +class Submission(models.Model): + author = models.ForeignKey(User) + + historico = models.FileField(upload_to=get_path) + matricula = models.FileField(upload_to=get_path) + + course = models.CharField(max_length=10, default='21A') + + timestamp = models.DateTimeField(default=timezone.now) + + last = models.BooleanField(default=True) + + processed = models.BooleanField(default=False) + def path(self): + return path.join(settings.MEDIA_ROOT, self.course, str(self.id)) -class Document(models.Model): - description = models.CharField(max_length=255, blank=True) - document = models.FileField(upload_to='documents/') - uploaded_at = models.DateTimeField(auto_now_add=True) + def __str__(self): + return 'Submission (from: {}, to: {}, on: {})'.format(self.author.first_name, + self.course, + self.timestamp) \ No newline at end of file diff --git a/src/uploads/views.py b/src/uploads/views.py index 1c01f80517775b32e74027106704c6763ca73c7c..1c0ba656eb4390fd2aab4744c034c3a2ee4d77f4 100644 --- a/src/uploads/views.py +++ b/src/uploads/views.py @@ -1,45 +1,44 @@ from django.shortcuts import render, redirect from django.core.files.storage import FileSystemStorage -from uploads.models import Document -from uploads.forms import DocumentForm -from script.main import main as analysis -import os +from uploads.models import Submission + +from uploads.core.models import Document +from uploads.core.forms import DocumentForm + def home(request): - documents = Document.objects.all() - return render(request, 'uploads/home.html', { 'documents': documents }) + documents = Document.objects.all() + return render(request, 'core/home.html', {'documents': documents}) def simple_upload(request): - - if request.method == 'POST' and request.FILES['historico'] and request.FILES['matricula']: - myfile = request.FILES['historico'] - fs = FileSystemStorage() - filename = fs.save(myfile.name, myfile) - uploaded_file_url = fs.url(filename) - - myfile = request.FILES['matricula'] - fs = FileSystemStorage() - filename = fs.save(myfile.name, myfile) - uploaded_file_url = fs.url(filename) - - analysis() - os.system("rm script/base/*.csv; rm script/base/*.xls;") - return render(request, 'uploads/simple_upload.html', { - 'uploaded_file_url': uploaded_file_url - }) - return render(request, 'uploads/simple_upload.html') + if request.method == 'POST' and request.FILES['historico'] and request.FILES['matricula']: + + submission = Submission.objects.create(author=request.user) + submission.course = '21A' + + fs = FileSystemStorage(location=submission.path()) + + fs.save('historico.xls', request.FILES['historico']) + fs.save('matricula.xls', request.FILES['matricula']) + + submission.historico.name = submission.path() + '/historico.xls' + submission.matricula.name = submission.path() + '/matricula.xls' + + submission.save() + + return render(request, 'core/simple_upload.html') def model_form_upload(request): - if request.method == 'POST': - form = DocumentForm(request.POST, request.FILES) - if form.is_valid(): - form.save() - return redirect('uploads:home') - else: - form = DocumentForm() - return render(request, 'uploads/model_form_upload.html', { - 'form': form - }) + if request.method == 'POST': + form = DocumentForm(request.POST, request.FILES) + if form.is_valid(): + form.save() + return redirect('home') + else: + form = DocumentForm() + return render(request, 'core/model_form_upload.html', { + 'form': form + })