Commit 2d0f7ecf authored by Jomaro Rodrigues's avatar Jomaro Rodrigues
Browse files

analises feitas de maneira modular

parent f6389c86
import re
import os import os
import sys
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from glob import glob
from json import load as json_load
from script.utils.situations import * from script.utils.situations import *
class DataframeHolder: class DataframeHolder:
def __init__(self, dataframe): def __init__(self, dataframe):
self.students = dataframe.groupby('MATR_ALUNO') self.students = dataframe.groupby('MATR_ALUNO')
self.courses = dataframe.groupby('COD_ATIV_CURRIC') self.courses = dataframe.groupby('COD_ATIV_CURRIC')
self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO']) self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])
def load_dataframes(cwd='.'): def load_dataframes(cwd='.'):
dataframes = [] dataframes = []
for path, dirs, files in os.walk(cwd): for path, dirs, files in os.walk(cwd):
for f in files: for f in files:
file_path = path + '/' + f file_path = path + '/' + f
dh = {'name': f, 'dataframe': None} dh = {'name': f, 'dataframe': None}
if 'csv' in f: if 'csv' in f:
dh['dataframe'] = read_csv(file_path) dh['dataframe'] = read_csv(file_path)
if 'xls' in f: if 'xls' in f:
dh['dataframe'] = read_excel(file_path) dh['dataframe'] = read_excel(file_path)
if dh['dataframe'] is not None: if dh['dataframe'] is not None:
dataframes.append(dh) dataframes.append(dh)
dataframe = fix_dataframes(dataframes) dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe) return dataframe
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
return dataframe
def read_excel(path, planilha='Planilha1'): def read_excel(path, planilha='Planilha1'):
return pd.read_excel(path) return pd.read_excel(path)
def read_csv(path): def read_csv(path):
return pd.read_csv(path) return pd.read_csv(path)
def fix_dataframes(dataframes): def fix_dataframes(dataframes):
for df in dataframes: for df in dataframes:
if df['name'] == 'historico.xls' or df['name'] == 'historico.csv': if df['name'] == 'historico.xls' or df['name'] == 'historico.csv':
history = df['dataframe'] history = df['dataframe']
if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv': if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv':
register = df['dataframe'] register = df['dataframe']
clean_history(history) clean_history(history)
clean_register(register) clean_register(register)
#~ df.dropna(axis=0, how='all') # ~ df.dropna(axis=0, how='all')
history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce') history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
history = history[np.isfinite(history['MEDIA_FINAL'])] history = history[np.isfinite(history['MEDIA_FINAL'])]
merged = pd.merge(history, register, how='right', on=['MATR_ALUNO'])
merged = pd.merge(history, register, how='right', on=['MATR_ALUNO']) # ~ print(merged)
#~ print(merged) fix_situation(merged)
fix_situation(merged) # fix_admission(merged)
# fix_admission(merged) fix_evasion(merged)
fix_evasion(merged)
return merged
return merged
def clean_history(df): def clean_history(df):
df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO', df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO', 'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR' 'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR'
], axis=1, inplace=True) ], axis=1, inplace=True)
df['PERIODO'] = df['PERIODO'].str.split('o').str[0] df['PERIODO'] = df['PERIODO'].str.split('o').str[0]
def clean_register(df): def clean_register(df):
df_split = df['PERIODO_INGRESSO'].str.split('/') df_split = df['PERIODO_INGRESSO'].str.split('/')
df['ANO_INGRESSO'] = df_split.str[0] df['ANO_INGRESSO'] = df_split.str[0]
df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0] df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0]
df_split = df['PERIODO_EVASAO'].str.split('/') df_split = df['PERIODO_EVASAO'].str.split('/')
df['ANO_EVASAO'] = df_split.str[0] df['ANO_EVASAO'] = df_split.str[0]
df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0] df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0]
df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE', df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE',
'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO', 'COD_CURSO', 'NUM_VERSAO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO',
],axis=1, inplace=True) ], axis=1, inplace=True)
def fix_situation(df): def fix_situation(df):
for situation in Situation.SITUATIONS: for situation in Situation.SITUATIONS:
df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0] df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0]
def fix_admission(df): def fix_admission(df):
for adm in AdmissionType.ADMISSION_FORM: for adm in AdmissionType.ADMISSION_FORM:
df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0] df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0]
def fix_evasion(df): def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM] evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100 df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM: for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0] # ~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0] df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100): # ~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False): # ~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0): # ~ if(x != 0.0):
#~ print(x) # ~ print(x)
#~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5)) # ~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
#~ print(df[['MATR_ALUNO','FORMA_EVASAO']]) # ~ print(df[['MATR_ALUNO','FORMA_EVASAO']])
...@@ -3,25 +3,38 @@ import time ...@@ -3,25 +3,38 @@ import time
from datetime import timedelta from datetime import timedelta
from script.base.dataframe_base import load_dataframes from script.base.dataframe_base import load_dataframes
from script.build_cache import build_cache from script.build_cache import build_cache
from script.analysis.degree_analysis import *
def analyze(submission):
start_time = time.clock()
start_time_exec = time.time()
dataframe = load_dataframes(submission.path())
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
run_time = timedelta(seconds=round(time.time() - start_time_exec))
print("--- Tempo de CPU: {} ---".format(cpu_time))
print("--- Tempo total: {} ---".format(run_time))
def main(): def main():
start_time = time.clock() start_time = time.clock()
start_time_exec = time.time() start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/script/' + 'base')
# ~ for i, line in enumerate(dataframe):
# ~ print(type(dataframe["MEDIA_FINAL"][i]))
# ~ print(dataframe["MEDIA_FINAL"][i])
# ~ print(dataframe)
build_cache(dataframe)
dataframe = load_dataframes(os.getcwd() + '/script/' + 'base') cpu_time = timedelta(seconds=round(time.clock() - start_time))
#~ for i, line in enumerate(dataframe): run_time = timedelta(seconds=round(time.time() - start_time_exec))
#~ print(type(dataframe["MEDIA_FINAL"][i])) print("--- Tempo de CPU: {} ---".format(cpu_time))
#~ print(dataframe["MEDIA_FINAL"][i]) print("--- Tempo total: {} ---".format(run_time))
#~ print(dataframe)
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
run_time = timedelta(seconds=round(time.time() - start_time_exec))
print("--- Tempo de CPU: {} ---".format(cpu_time))
print("--- Tempo total: {} ---".format(run_time))
if __name__ == "__main__": if __name__ == "__main__":
main() main()
from django.contrib import admin
from .models import Submission
from script.main import analyze
from traceback import print_exc
def make_analysis(modeladmin, request, queryset):
for submission in queryset:
try:
print('analisando: '+str(submission))
analyze(submission)
submission.processed = True
print('salvando')
submission.save()
print('OK')
except:
print('Análise falhou')
print_exc()
class SubmissionAdmin(admin.ModelAdmin):
date_hierarchy = 'timestamp'
list_display = ('author', 'course', 'processed', 'last', 'timestamp')
actions = [make_analysis]
admin.site.register(Submission, SubmissionAdmin)
\ No newline at end of file
from django.shortcuts import render, redirect from django.shortcuts import render, redirect
from django.conf import settings
from django.core.files.storage import FileSystemStorage from django.core.files.storage import FileSystemStorage
from uploads.models import Submission
from uploads.core.models import Document from uploads.core.models import Document
from uploads.core.forms import DocumentForm from uploads.core.forms import DocumentForm
from script.main import main as analysis
import os
def home(request): def home(request):
documents = Document.objects.all() documents = Document.objects.all()
return render(request, 'core/home.html', { 'documents': documents }) return render(request, 'core/home.html', {'documents': documents})
def simple_upload(request): def simple_upload(request):
if request.method == 'POST' and request.FILES['historico'] and request.FILES['matricula']:
if request.method == 'POST' and request.FILES['historico'] and request.FILES['matricula']:
myfile = request.FILES['historico'] submission = Submission.objects.create(author=request.user)
fs = FileSystemStorage() submission.course = '21A'
filename = fs.save(myfile.name, myfile)
uploaded_file_url = fs.url(filename) fs = FileSystemStorage(location=submission.path())
fs.save('historico.xls', request.FILES['historico'])
fs.save('matricula.xls', request.FILES['matricula'])
myfile = request.FILES['matricula'] submission.historico.name = submission.path() + '/historico.xls'
fs = FileSystemStorage() submission.matricula.name = submission.path() + '/matricula.xls'
filename = fs.save(myfile.name, myfile)
uploaded_file_url = fs.url(filename) submission.save()
analysis() return render(request, 'core/simple_upload.html')
os.system("rm script/base/*.csv; rm script/base/*.xls;")
return render(request, 'core/simple_upload.html', {
'uploaded_file_url': uploaded_file_url
})
return render(request, 'core/simple_upload.html')
def model_form_upload(request): def model_form_upload(request):
if request.method == 'POST': if request.method == 'POST':
form = DocumentForm(request.POST, request.FILES) form = DocumentForm(request.POST, request.FILES)
if form.is_valid(): if form.is_valid():
form.save() form.save()
return redirect('home') return redirect('home')
else: else:
form = DocumentForm() form = DocumentForm()
return render(request, 'core/model_form_upload.html', { return render(request, 'core/model_form_upload.html', {
'form': form 'form': form
}) })
from django import forms
from .models import Submission
class SubmissionForm(forms.ModelForm):
class Meta:
model = Submission
\ No newline at end of file
from django.db import models
from django.contrib.auth.models import User
from django.utils import timezone
from os import path
from django.conf import settings
def get_path(instance, filename):
return '{}/{}/{}'.format(instance.course, instance.id, filename)
class Submission(models.Model):
author = models.ForeignKey(User)
historico = models.FileField(upload_to=get_path)
matricula = models.FileField(upload_to=get_path)
course = models.CharField(max_length=10, default='21A')
timestamp = models.DateTimeField(default=timezone.now)
last = models.BooleanField(default=True)
processed = models.BooleanField(default=False)
def path(self):
return path.join(settings.MEDIA_ROOT, self.course, str(self.id))
def __str__(self):
return 'Submission (from: {}, to: {}, on: {})'.format(self.author.first_name,
self.course,
self.timestamp)
...@@ -38,7 +38,8 @@ INSTALLED_APPS = [ ...@@ -38,7 +38,8 @@ INSTALLED_APPS = [
'django.contrib.messages', 'django.contrib.messages',
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'uploads.core' 'uploads.core',
'uploads'
] ]
MIDDLEWARE_CLASSES = [ MIDDLEWARE_CLASSES = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment