From fa8a8f871776c2a417cbe9b942a754ef23ded460 Mon Sep 17 00:00:00 2001
From: Jomaro Rodrigues <jomaro.rodrigues@gmail.com>
Date: Thu, 2 Nov 2017 23:49:35 +0100
Subject: [PATCH] =?UTF-8?q?reestrutura=C3=A7=C3=A3o=20de=20como=20as=20ana?=
 =?UTF-8?q?lises=20s=C3=A3o=20rodadas=20e=20algumas=20analises=20de=20uma?=
 =?UTF-8?q?=20linha?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt                    |   3 +-
 script/analysis/degree_analysis.py  |   1 -
 script/analysis/student_analysis.py | 264 ++++++++++++++--------------
 script/build_cache.py               | 128 ++++++++++----
 script/main.py                      |   2 +
 script/utils/situations.py          |  19 ++
 script/utils/utils.py               |  30 ++++
 7 files changed, 274 insertions(+), 173 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 498d21b..f08277c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
-django==1.8
+django==1.11
 django-widget-tweaks
 pandas==0.18.1
 django-extension
 psycopg2
+xlrd
diff --git a/script/analysis/degree_analysis.py b/script/analysis/degree_analysis.py
index 5f01427..4855ba4 100644
--- a/script/analysis/degree_analysis.py
+++ b/script/analysis/degree_analysis.py
@@ -1,5 +1,4 @@
 import pandas as pd
-import numpy as np
 import math
 from utils.situations import Situation, EvasionForm
 
diff --git a/script/analysis/student_analysis.py b/script/analysis/student_analysis.py
index ac23042..ba017dc 100644
--- a/script/analysis/student_analysis.py
+++ b/script/analysis/student_analysis.py
@@ -1,161 +1,155 @@
-import pandas as pd
-from utils.situations import *
 
+from utils.situations import *
 
 ANO_ATUAL = 2017
 SEMESTRE_ATUAL = 2
 
+
 def listagem_evasao(df):
-	#~ print(df["FORMA_EVASAO"].drop_duplicates())
-	#~ print(df)
-	#~ print(Situation.SITUATION_AFFECT_IRA)
-	#~ print(df)
-	aux = df[df.FORMA_EVASAO != 1]
-	print(aux)
-	#~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"])
-	#~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)])
-	#~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA))
-	#~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last')
-	#~ print(aux["FORMA_EVASAO"].drop_duplicates())
+    # ~ print(df["FORMA_EVASAO"].drop_duplicates())
+    # ~ print(df)
+    # ~ print(Situation.SITUATION_AFFECT_IRA)
+    # ~ print(df)
+    aux = df[df.FORMA_EVASAO != 1]
+    print(aux)
+
+
+# ~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"])
+# ~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)])
+# ~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA))
+# ~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last')
+# ~ print(aux["FORMA_EVASAO"].drop_duplicates())
 
 def average_ira(d):
     temp = d.dropna(subset=['MEDIA_FINAL'])
     temp = temp[temp['MEDIA_FINAL'] <= 100]
     if not temp.empty:
-        #print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
-        aux = np.sum(temp['MEDIA_FINAL']*temp['CH_TOTAL'])
+        # print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
+        aux = np.sum(temp['MEDIA_FINAL'] * temp['CH_TOTAL'])
         ch_total = np.sum(temp['CH_TOTAL']) * 100
-        return(aux/ch_total)
+        return (aux / ch_total)
+
 
 def posicao_turmaIngresso_semestral(df):
-	iras = ira_semestra(df)
-	iraMax = {}
-	for matr in iras:
-		for semestreAno in iras[matr]:
-			if not(semestreAno in iraMax):
-				iraMax[semestreAno] = iras[matr][semestreAno]
-			else:
-				if(iras[matr][semestreAno] > iraMax[semestreAno]):
-					iraMax[semestreAno] = iras[matr][semestreAno]
-	for matr in iras:
-		for semestreAno in iras[matr]:
-			iras[matr][semestreAno]/=iraMax[semestreAno]
-	
-	return iras
+    iras = ira_semestra(df)
+    iraMax = {}
+    for matr in iras:
+        for semestreAno in iras[matr]:
+            if not (semestreAno in iraMax):
+                iraMax[semestreAno] = iras[matr][semestreAno]
+            else:
+                if (iras[matr][semestreAno] > iraMax[semestreAno]):
+                    iraMax[semestreAno] = iras[matr][semestreAno]
+    for matr in iras:
+        for semestreAno in iras[matr]:
+            iras[matr][semestreAno] /= iraMax[semestreAno]
+
+    return iras
+
 
 def periodo_real(df):
-	aux = df.groupby(["MATR_ALUNO"])
-	students = {}
-	for x in aux:
-		students[x[0]] = None
-	return students
+    aux = df.groupby(["MATR_ALUNO"])
+    students = {}
+    for x in aux:
+        students[x[0]] = None
+    return students
+
 
 def periodo_pretendido(df):
-	aux = df.groupby(["MATR_ALUNO","ANO_INGRESSO","SEMESTRE_INGRESSO"])
-	students = {}
-	for x in aux:
-		print(x[0][0] + " : "+x[0][1]+" "+x[0][2]) 
-		students[x[0][0]] = (ANO_ATUAL - int(x[0][1]))*2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
-	return students
+    aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
+    students = {}
+    for x in aux:
+        print(x[0][0] + " : " + x[0][1] + " " + x[0][2])
+        students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
+    return students
+
 
 def ira_semestra(df):
-	aux = ira_por_quantidade_disciplinas(df)
-	for matr in aux:
-		for periodo in aux[matr]:
-			aux[matr][periodo] = aux[matr][periodo][0]
-	return aux
+    aux = ira_por_quantidade_disciplinas(df)
+    for matr in aux:
+        for periodo in aux[matr]:
+            aux[matr][periodo] = aux[matr][periodo][0]
+    return aux
+
 
 def ira_por_quantidade_disciplinas(df):
-	students = {}
-	df = df.dropna(subset=["MEDIA_FINAL"])
-	#~ print(df["MATR_ALUNO"][178])
-	#~ print(df["NOME_ATIV_CURRIC"][178])
-	#~ print(df["PERIODO"][178])
-	#~ print(df["ANO"][178])
-	#~ print(df["SITUACAO"][178])
-	
-	total_students = len(df["MATR_ALUNO"])
-	for i in range(total_students):
-		matr = (df["MATR_ALUNO"][i])
-		if(not (matr in students)):
-			students[matr] = {}
-		
-		
-		
-		ano = str(int(df["ANO"][i]))
-		semestre = str(df["PERIODO"][i])
-		situacao = int(df["SITUACAO"][i])
-		nota = float(df["MEDIA_FINAL"][i])
-		media_credito = int(df["MEDIA_CREDITO"][i])
-		
-		
-		if(situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
-			
-			
-			
-			if not(ano+"/"+semestre in students[matr]):
-				students[matr][ano+"/"+semestre] = [0,0]
-			students[matr][ano+"/"+semestre][0]+=nota
-			students[matr][ano+"/"+semestre][1]+=1
-			
-			
-	for matr in students:
-		for periodo in students[matr]:
-			if(students[matr][periodo][1] != 0):
-				students[matr][periodo][0]/=students[matr][periodo][1]*100
-	return(students)
+    students = {}
+    df = df.dropna(subset=["MEDIA_FINAL"])
+    # ~ print(df["MATR_ALUNO"][178])
+    # ~ print(df["NOME_ATIV_CURRIC"][178])
+    # ~ print(df["PERIODO"][178])
+    # ~ print(df["ANO"][178])
+    # ~ print(df["SITUACAO"][178])
+
+    total_students = len(df["MATR_ALUNO"])
+    for i in range(total_students):
+        matr = (df["MATR_ALUNO"][i])
+        if (not (matr in students)):
+            students[matr] = {}
+
+        ano = str(int(df["ANO"][i]))
+        semestre = str(df["PERIODO"][i])
+        situacao = int(df["SITUACAO"][i])
+        nota = float(df["MEDIA_FINAL"][i])
+        media_credito = int(df["MEDIA_CREDITO"][i])
+
+        if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
+
+            if not (ano + "/" + semestre in students[matr]):
+                students[matr][ano + "/" + semestre] = [0, 0]
+            students[matr][ano + "/" + semestre][0] += nota
+            students[matr][ano + "/" + semestre][1] += 1
+
+    for matr in students:
+        for periodo in students[matr]:
+            if (students[matr][periodo][1] != 0):
+                students[matr][periodo][0] /= students[matr][periodo][1] * 100
+    return (students)
+
 
 def indice_aprovacao_semestral(df):
-	students = {}
-	df = df.dropna(subset=['MEDIA_FINAL'])
-	total_students = len(df["MATR_ALUNO"])
-	for i in range(total_students):
-		matr = (df["MATR_ALUNO"][i])
-		if(not (matr in students)):
-			students[matr] = {}
-		
-		
-		ano = str(int(df["ANO"][i]))
-		semestre = str(df["PERIODO"][i])
-		situacao = int(df["SITUACAO"][i])
-		
-		
-		if not(ano+"/"+semestre in students[matr]):
-			students[matr][ano+"/"+semestre] = [0,0]
-		
-		if(situacao in Situation.SITUATION_PASS):
-			students[matr][ano+"/"+semestre][0]+=1
-			students[matr][ano+"/"+semestre][1]+=1
-		if(situacao in Situation.SITUATION_FAIL):
-			students[matr][ano+"/"+semestre][1]+=1
-	return(students)
-		
+    students = {}
+    df = df.dropna(subset=['MEDIA_FINAL'])
+    total_students = len(df["MATR_ALUNO"])
+    for i in range(total_students):
+        matr = (df["MATR_ALUNO"][i])
+        if (not (matr in students)):
+            students[matr] = {}
+
+        ano = str(int(df["ANO"][i]))
+        semestre = str(df["PERIODO"][i])
+        situacao = int(df["SITUACAO"][i])
+
+        if not (ano + "/" + semestre in students[matr]):
+            students[matr][ano + "/" + semestre] = [0, 0]
+
+        if situacao in Situation.SITUATION_PASS:
+            students[matr][ano + "/" + semestre][0] += 1
+            students[matr][ano + "/" + semestre][1] += 1
+        if situacao in Situation.SITUATION_FAIL:
+            students[matr][ano + "/" + semestre][1] += 1
+    return (students)
+
 
 def aluno_turmas(df):
-	students = {}
-	df = df.dropna(subset=['MEDIA_FINAL'])
-	total_students = len(df["MATR_ALUNO"])
-	for i in range(total_students):
-		matr = (df["MATR_ALUNO"][i])
-		if(not (matr in students)):
-			students[matr] = []
-		
-		for s in Situation.SITUATIONS:
-			if(s[0] == df["SITUACAO"][i]):
-				situacao = s[1]
-				break
-		ano = str(int(df["ANO"][i]))
-		codigo = (df["COD_ATIV_CURRIC"][i])
-		nome = (df["NOME_ATIV_CURRIC"][i])
-		nota = (df["MEDIA_FINAL"][i])
-		semestre = (df["PERIODO"][i])
-		
-		students[matr].append({
-			"ano": ano,
-			"codigo": codigo,
-			"nome": nome,
-			"nota": nota,
-			"semestre": semestre,
-			"situacao": situacao
-		})
-	return(students)
+    students = {}
+    df = df.dropna(subset=['MEDIA_FINAL'])
+
+    situations = dict(Situation.SITUATIONS)
+
+    for matr, hist in df.groupby('MATR_ALUNO'):
+        students[matr] = []
+
+        for _, row in hist.iterrows():
+            data = {
+                'ano': str(int(row["ANO"])),
+                'codigo': row["COD_ATIV_CURRIC"],
+                'nome': row["NOME_ATIV_CURRIC"],
+                'nota': row["MEDIA_FINAL"],
+                'semestre': row["PERIODO"],
+                'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
+            }
+
+            students[matr].append(data)
+
+    return students
diff --git a/script/build_cache.py b/script/build_cache.py
index 553b94c..8091390 100644
--- a/script/build_cache.py
+++ b/script/build_cache.py
@@ -1,14 +1,12 @@
-import sys
-import os
-import time
-import math
-
-from datetime import timedelta
-from pathlib import Path
-from utils.utils import build_path
+
+
+from utils.utils import *
+from utils.situations import *
 from analysis.degree_analysis import *
 from analysis.student_analysis import *
 
+
+
 try:
     to_unicode = unicode
 except NameError:
@@ -17,37 +15,95 @@ except NameError:
 
 def build_cache(dataframe):
 #    os.chdir("../src")
-    path = "cache"
-    build_path(path)   
-    path += "/curso"
-    build_path(path)
-
-#    generate_degree_data(path, dataframe)
-    generate_student_data(path,dataframe)
-#    generate_student_list(path)
-#    generate_admission_data(path)
-#    generate_admission_list(path)
-#    generate_course_data(path)
-#    generate_course_general_data(path)
+    path = 'cache/curso/'
+
+    ensure_path_exists(path)
+
+    for cod, df in dataframe.groupby('COD_CURSO'):
+        generate_degree_data(path+'/'+cod+'/', df)
+
+    #generate_degree_data(path, dataframe)
+    #generate_student_data(path, dataframe)
+    #generate_student_list(path)
+    #generate_admission_data(path)
+    #generate_admission_list(path)
+    #generate_course_data(path)
+    #generate_course_general_data(path)
 
 def generate_degree_data(path, dataframe):
-    average_graduation(dataframe)
-    general_failure(dataframe)
-    general_ira(dataframe)
-    pass
+    ensure_path_exists(path)
+    ensure_path_exists(path+'students')
+
+    students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
+
+    data = {
+        'average_graduation': average_graduation(dataframe),
+        'general_failure': general_failure(dataframe),
+        'general_ira': general_ira(dataframe),
+        'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0],
+        'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0],
+    }
+
+    save_json(path+'/degree.json', data)
+
+    for ind, hist in dataframe.groupby('MATR_ALUNO'):
+        generate_student_data(path+'students/{}.json'.format(ind), hist)
+
+
+
+def historico(dataframe):
+    res = []
+
+    for _, row in dataframe.iterrows():
+        res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC',
+                             'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']]))
+
+    return res
+
+
+def process_semestre(per, df):
+    ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
+    completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0]
+    tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0]
+
+    return {
+        'semestre': per,
+        'ira': ira,
+        'completas': completas,
+        'tentativas': tentativas,
+        'aprovacao': completas/tentativas if tentativas else 0,
+        'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0
+    }
+
+def generate_student_data(path, dataframe):
+    ensure_path_exists(os.path.dirname(path))
+
+    data = dict(dataframe.iloc[0][['MATR_ALUNO', 'NOME_ALUNO', 'SEXO', 'FORMA_INGRESSO', 'FORMA_EVASAO', 'ANO_INGRESSO',
+                                'SEMESTRE_INGRESSO', 'ANO_EVASAO', 'SEMESTRE_EVASAO']])
+
+    data.update({
+        'ira': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean(),
+        'completas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0],
+        'tentativas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0],
+        'semestres': [process_semestre(per, dataframe[dataframe.PERIODO == per]) for per in sorted(dataframe.PERIODO.unique())],
+        'historico': historico(dataframe)
+    })
+
+    save_json(path, data)
+
 
-def generate_student_data(path,dataframe):
-    #~ print(aluno_turmas(dataframe))
-    #~ print(indice_aprovacao_semestral(dataframe))
-    #~ print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"])
-    #~ print(ira_semestra(dataframe)["GRR20079775"])
-    #~ aluno_turmas(dataframe)
-    #~ indice_aprovacao_semestral(dataframe)
-    #~ ira_por_quantidade_disciplinas(dataframe)
-    #~ ira_semestra(dataframe)
-    #~ periodo_pretendido(dataframe)
-    #~ print(periodo_real(dataframe))
-    #~ print(posicao_turmaIngresso_semestral(dataframe))
+def generate_student_data_old(path, dataframe):
+    print(aluno_turmas(dataframe))
+    print(indice_aprovacao_semestral(dataframe))
+    print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"])
+    print(ira_semestra(dataframe)["GRR20079775"])
+    aluno_turmas(dataframe)
+    indice_aprovacao_semestral(dataframe)
+    ira_por_quantidade_disciplinas(dataframe)
+    ira_semestra(dataframe)
+    periodo_pretendido(dataframe)
+    print(periodo_real(dataframe))
+    print(posicao_turmaIngresso_semestral(dataframe))
     print(listagem_evasao(dataframe))
     pass
 
diff --git a/script/main.py b/script/main.py
index 8e3f306..9cec7b0 100644
--- a/script/main.py
+++ b/script/main.py
@@ -5,6 +5,8 @@ from build_cache import build_cache
 from datetime import timedelta
 from analysis.degree_analysis import *
 
+
+
 def main():
     start_time = time.clock()
     start_time_exec = time.time()
diff --git a/script/utils/situations.py b/script/utils/situations.py
index 80f2b17..e62c509 100644
--- a/script/utils/situations.py
+++ b/script/utils/situations.py
@@ -137,3 +137,22 @@ class Situation:
         SIT_REPROVADO_FREQ,
         SIT_CONHECIMENTO_REPROVADO
     )
+
+    """
+    isso deve ser pra filtrar fora coisas que nÃ£o sÃ£o disciplinas cumpridas
+    
+    como "trancamento administrativo" e "horas"
+    
+    importante pra saber quantas matÃ©rias um aluno REALMENTE fez em um semestre
+    """
+    SITUATION_COURSED = (
+        SIT_APROVADO,
+        SIT_REPROVADO,
+        SIT_REPROVADO_FREQ,
+        SIT_DISPENSA_COM_NOTA,
+        SIT_CONHECIMENTO_APROVADO,
+        SIT_CONHECIMENTO_REPROVADO,
+        SIT_REPROVADO_SEM_NOTA,
+        SIT_INCOMPLETO,
+        SIT_CANCELADO,
+    )
diff --git a/script/utils/utils.py b/script/utils/utils.py
index 871e935..f69d762 100644
--- a/script/utils/utils.py
+++ b/script/utils/utils.py
@@ -1,5 +1,35 @@
 import os
 
+import json
+
+try:
+    from django.conf import settings
+
+    DEBUG = settings.DEBUG
+except:
+    DEBUG = True
+
+
 def build_path(path):
     if not os.path.exists(path):
         os.mkdir(path)
+
+
+def ensure_path_exists(complete_path):
+    parts = complete_path.split('/')
+
+    for i in range(len(parts)):
+        if not os.path.exists('/'.join(parts[:i+1])):
+            os.mkdir('/'.join(parts[:i+1]))
+
+
+def save_json(path, data):
+
+    ensure_path_exists(os.path.dirname(path))
+
+    params = {} if not DEBUG else {'indent': 4}
+
+    with open(path, 'w') as f:
+        json.dump(data, f, **params)
+
+
-- 
GitLab