...
 
Commits (2)
......@@ -47,7 +47,7 @@ docker-production:
docker-compose --project-directory . -f docker_scripts/docker-production.yml -p adega up
docker-remove-all:
docker rm adega_web_1 adega_db_1
docker rm adega_web adega_db
docker rmi adega_web
......@@ -68,6 +68,6 @@ args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}`
docker-manage:
@echo $(call args,"")
docker exec -it adega_web_1 bash -c "cd src; python3 manage.py $(call args,'')"
docker exec -it adega_web bash -c "cd src; python3 manage.py $(call args,'')"
......@@ -2,19 +2,51 @@ import math
import json
import pandas as pd
from submission.analysis.utils.situations import Situation, EvasionForm
from submission.analysis.utils import IntervalCount
from submission.analysis.analysis.student_analysis import *
def average_graduation(df):
"""
This function calculates the ratio of students who have already graduated
to number of students on the original dataframe.
Parameters
----------
df : DataFrame
Returns
-------
float
Examples
--------
13.395865237366003
"""
total_student = df['MATR_ALUNO'].drop_duplicates().shape[0]
total_graduate = df[df.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0]
return total_graduate / total_student
def general_failure(df):
"""
This function
Parameters
----------
df : DataFrame
Returns
-------
float
Examples
--------
13.395865237366003
"""
affect_ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)]
failures = affect_ira[affect_ira.SITUACAO.isin(Situation.SITUATION_FAIL)]
# average = reprovados/
average = failures.shape[0] / affect_ira.shape[0]
student_courses = affect_ira.groupby(['MATR_ALUNO'], as_index=False)\
......@@ -91,7 +123,7 @@ def average_graduation_time(df):
for index, row in graduates.iterrows():
if pd.notnull(row['ANO_EVASAO']):
year_end = int(row['ANO_EVASAO'])
try:
try:
semester_end = int(row['SEMESTRE_EVASAO'])
except ValueError:
try:
......@@ -128,11 +160,8 @@ def taxa_abandono(df):
return total_abandono / total_student
def average_ira_graph(df):
alunos = df.drop_duplicates('MATR_ALUNO')
dic = build_dict_ira_medio(alunos)
def average_ira_graph(student_analysis):
dic = build_dict_ira_medio_certo(student_analysis.ira_alunos())
return dic
def current_students_average_ira_graph(df):
......@@ -155,7 +184,7 @@ def period_evasion_graph(df):
di_qtd = {}
dic = {}
evasions_total = 0
# Discover the minimum and maximum values for year
year_start = int(df['ANO'].min())
year_end = int(df['ANO'].max()) + 1
......@@ -186,11 +215,11 @@ def period_evasion_graph(df):
# Name of string on dictionary generated
date = str(year) + ' {}º Período'.format(semester)
di_qtd[date] = evasions
# Count the total of evasions identified, that will be
# used to compute the rate
evasions_total += evasions
# If at least one evasion was computed
if evasions_total:
# Compute the ratio of evasion per
......@@ -198,46 +227,120 @@ def period_evasion_graph(df):
for di in di_qtd:
qtd = di_qtd[di]
dic[di] = {'qtd': qtd, 'taxa': (float(qtd)/evasions_total)*100}
return dic
def build_dict_ira_medio(alunos):
dic = {"00-4.9":0, "05-9.9":0, "10-14.9":0, "15-19.9":0, "20-24.9":0, "25-29.9":0, "30-34.9":0,
"35-39.9":0, "40-44.9":0, "45-49.9":0, "50-54.9":0, "55-59.9":0, "60-64.9":0, "65-69.9":0,
"70-74.9":0, "75-79.9":0, "80-84.9":0, "85-89.9":0, "90-94.9": 0,"95-100":0}
"""
Cretes a dict with IRA intevals as keys
Parameters
----------
df : alunos (df with one line per student)
Returns
-------
dic : {string: integer}
Example
--------
{'80-84.9': 64, '25-29.9': 41, ...}
"""
dic = {"00-4.9":0, "05-9.9":0, "10-14.9":0, "15-19.9":0, "20-24.9":0,
"25-29.9":0, "30-34.9":0, "35-39.9":0, "40-44.9":0, "45-49.9":0,
"50-54.9":0, "55-59.9":0, "60-64.9":0, "65-69.9":0, "70-74.9":0,
"75-79.9":0, "80-84.9":0, "85-89.9":0, "90-94.9": 0,"95-100":0}
iras = []
for index, row in alunos.iterrows():
if(row['MEDIA_FINAL'] is not None):
iras.append(row['MEDIA_FINAL'])
for d in dic:
aux = d.split('-')
for interval in dic:
aux = interval.split('-')
v1 = float(aux[0])
if (v1 == 0.0):
v1 += 0.01
v2 = float(aux[1])
dic[d] = sum((float(num) >= v1) and (float(num) < v2) for num in iras)
dic[interval] = sum((float(num) >= v1) and (float(num) < v2) for num in iras)
return dic
def build_degree_json(path,df):
def merge_dicts(dict1, dict2, dict3):
dict_out = {}
for key, value in dict1.items():
v2 = dict2[key] if key in dict2 else None
v3 = dict3[key] if key in dict3 else None
dict_out[key] = {
'ira_medio': value,
'sem_evasao': v2,
'formatura': v3
}
return dict_out
def generate_intervals():
intervals = {}
def build_dict_ira_medio_certo(iras):
icount = IntervalCount(1, 0, 0.05)
#
# dic = {"00-4.9":0, "05-9.9":0, "10-14.9":0, "15-19.9":0, "20-24.9":0,
# "25-29.9":0, "30-34.9":0, "35-39.9":0, "40-44.9":0, "45-49.9":0,
# "50-54.9":0, "55-59.9":0, "60-64.9":0, "65-69.9":0, "70-74.9":0,
# "75-79.9":0, "80-84.9":0, "85-89.9":0, "90-94.9": 0,"95-100":0}
print(iras)
for interval in icount.dict:
aux = interval.split('-')
v1 = float(aux[0])
if (v1 == 0.0):
v1 += 0.01
v2 = float(aux[1])
for grr in iras:
if (float(iras[grr]) >= v1) and (float(iras[grr]) < v2):
dic[interval] += 1
icount.do_count(grade)
# dic[interval] = sum((float(iras[grr]) >= v1) and (float(iras[grr]) < v2) for grr in iras)
return icount.to_dict()
def merge_dicts(dict1, dict2, dict3):
"""
Makes a dict for the STUDENTS per IRA GRAPH.
Takes 3 dictionaries whose keys are IRA intervals and merge them.
Each IRA interval (dict_out's keys) got as value another dictionary with 3 itens:
number of all students with that IRA range;
number of students attending University with that IRA range;
number of graduated students with with that IRA range;
Parameters
----------
df : 3 dicts
Returns
-------
dict of dict
Example
--------
{'05-9.9': {'sem_evasao': 9,
'formatura': 3,
'ira_medio': 43},
'10-14.9': {'sem_evasao': 12,
'formatura': 7,
'ira_medio': 37},
...}
"""
dict_out = {}
for key, value in dict1.items():
v2 = dict2[key] if key in dict2 else None
v3 = dict3[key] if key in dict3 else None
dict_out[key] = {
'ira_medio': value,
'sem_evasao': v2,
'formatura': v3
}
return dict_out
def build_degree_json(path,df,student_analysis):
dic = merge_dicts(
average_ira_graph(df),
average_ira_graph(student_analysis),
current_students_average_ira_graph(df),
graduates_average_ira_graph(df)
)
......
......@@ -17,7 +17,7 @@ class StudentAnalysis:
def __init__(self, df):
self.data_frame = df
@memoize
def student_info(self, df=None):
df = df if df is not None else self.data_frame
......@@ -34,7 +34,7 @@ class StudentAnalysis:
students = students.groups.keys()
iras = self.ira_alunos()
info = {}
for stnd in students:
grr = stnd[0]
if(stnd[0][-1] == 1):
......@@ -58,13 +58,13 @@ class StudentAnalysis:
situations = df.groupby(["MATR_ALUNO", "NOME_PESSOA", "FORMA_EVASAO"])
situations = list(pd.DataFrame({'count' : situations.size()}).reset_index().groupby(["FORMA_EVASAO"]))
iras = self.ira_alunos()
list_situations = defaultdict(list)
for sit in situations:
grrs = list(sit[1]["MATR_ALUNO"])
people_names = list(sit[1]["NOME_PESSOA"])
evasion_form_name = EvasionForm.code_to_str(sit[0])
for i, student in enumerate(grrs):
......@@ -80,29 +80,44 @@ class StudentAnalysis:
@memoize
def ira_alunos(self, df=None):
"""
Parameters
----------
df
Returns
-------
dict
Example
iras = { GRR: number, ...}
--------
"""
df = df if df is not None else self.data_frame
iras = self.ira_por_quantidade_disciplinas()
ira_per_student = {}
for i in iras:
ira_total = 0
carga_total = 0
for semestre in iras[i]:
ira_total += iras[i][semestre][0]*iras[i][semestre][2]
carga_total += iras[i][semestre][2]
if(carga_total != 0):
iras[i] = ira_total/carga_total
ira_per_student[i] = ira_total/carga_total
else:
iras[i] = 0
return iras
ira_per_student[i] = 0
return ira_per_student
@memoize
def taxa_aprovacao(self, df=None):
df = df if df is not None else self.data_frame
aprovacoes_semestres = self.indice_aprovacao_semestral()
for aluno in aprovacoes_semestres:
total = sum([aprovacoes_semestres[aluno][s][1] for s in aprovacoes_semestres[aluno]])
aprovacoes = sum([aprovacoes_semestres[aluno][s][0] for s in aprovacoes_semestres[aluno]])
......@@ -112,9 +127,9 @@ class StudentAnalysis:
aprovacoes_semestres[aluno] = aprovacoes/total
else:
aprovacoes_semestres[aluno] = None
return aprovacoes_semestres
def turma_ingresso(self, df=None):
df = df if df is not None else self.data_frame
......@@ -123,13 +138,13 @@ class StudentAnalysis:
for i,std in df.iterrows():
admissions[std["MATR_ALUNO"]] = std["ANO_INGRESSO_y"]+"/"+std["SEMESTRE_INGRESSO"]
return admissions
@memoize
def posicao_turmaIngresso_semestral(self, df=None):
df = df if df is not None else self.data_frame
grr_to_admissions = self.turma_ingresso()
admissions = defaultdict(list)
......@@ -138,14 +153,14 @@ class StudentAnalysis:
# By instance: {"2015/1":["GRR20151346","GRR20154562", ...], ...}
for grr in grr_to_admissions:
admissions[grr_to_admissions[grr]].append(grr)
iras_by_semester = self.ira_semestral()
positions = defaultdict(dict)
for grr in iras_by_semester:
for semester in iras_by_semester[grr]:
student_admission = admissions[grr_to_admissions[grr]]
competition = [matr for matr in student_admission if semester in iras_by_semester[matr]]
classifications = sorted(
......@@ -154,14 +169,14 @@ class StudentAnalysis:
)
positions[grr][semester] = (1+classifications.index(grr))/len(competition)
return positions
@memoize
def periodo_real(self, df=None):
df = df if df is not None else self.data_frame
aux = df.groupby(["MATR_ALUNO"])
students = {}
#TODO: Calculate the real value
......@@ -172,7 +187,7 @@ class StudentAnalysis:
@memoize
def periodo_pretendido(self, df=None):
df = df if df is not None else self.data_frame
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
......@@ -182,7 +197,7 @@ class StudentAnalysis:
@memoize
def ira_semestral(self, df=None):
df = df if df is not None else self.data_frame
aux = self.ira_por_quantidade_disciplinas()
for matr in aux:
for periodo in aux[matr]:
......@@ -191,8 +206,25 @@ class StudentAnalysis:
@memoize
def ira_por_quantidade_disciplinas(self, df=None):
"""
Calculates the ira per year/semester
Parameters
----------
df : seila
Returns
-------
dict of dict of array
iras = { GRR: {year/semester: []},
...}
Example
--------
"""
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
......@@ -207,12 +239,12 @@ class StudentAnalysis:
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
carga = float(df["CH_TOTAL"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0, 0]
students[matr][ano + "/" + semestre][0] += nota*carga
students[matr][ano + "/" + semestre][1] += 1
students[matr][ano + "/" + semestre][2] += carga
......@@ -221,14 +253,14 @@ class StudentAnalysis:
for periodo in students[matr]:
if (students[matr][periodo][2] != 0):
students[matr][periodo][0] /= students[matr][periodo][2] * 100
return students
@memoize
def indice_aprovacao_semestral(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
......@@ -249,14 +281,14 @@ class StudentAnalysis:
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
@memoize
def aluno_turmas(self, df=None):
df = df if df is not None else self.data_frame
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
......
import os
import copy
import numpy as np
import ujson as json
import pprint
try:
from django.conf import settings
......@@ -10,13 +12,72 @@ try:
except:
DEBUG = True
'''
Use this class to generate a dictionary like this:
{"00-4.9":0, "05-9.9":0, "10-14.9":0, "15-19.9":0, "20-24.9":0,
"25-29.9":0, "30-34.9":0, "35-39.9":0, "40-44.9":0, "45-49.9":0,
"50-54.9":0, "55-59.9":0, "60-64.9":0, "65-69.9":0, "70-74.9":0,
"75-79.9":0, "80-84.9":0, "85-89.9":0, "90-94.9": 0,"95-100":0}
Where
sup is the maximum value of range
inf is the minumum
and gap is the shift between elements keys
do_count(value) increment the value in specific range
to_dict return the specified dictionary above
'''
class IntervalCount:
def __init__(self, sup, inf=0, gap=1, digit=3):
self.sup = sup
self.inf = inf
self.gap = gap
self.dict = {key:0 for key in range(self.get_total_slices())}
self.digit = "{:."+str(digit)+"f}" # number of digits after .
# for interval in zip( np.arange(inf,sup,gap, dtype=np.float), np.arange(inf+gap,sup+gap,gap, dtype=np.float)):
# self.dict[interval] = 0
def get_total_slices(self):
return round((self.sup-self.inf)/self.gap)
def projection_inf(self,key):
return key*self.gap + self.inf
def projection_sup(self,key):
return key*self.gap + self.inf + self.gap
def count_on_interval(self, value):
if(value >= self.inf):
for key in self.dict:
# pinf = self.projection_inf(key)
# psup = self.projection_sup(key)
pinf = float(self.digit.format(key))
psup = float(self.digit.format(key))
print(pinf,psup,value)
if(value >= pinf and value < psup):
break
self.dict[key]+=1
def to_dict(self):
dic = {}
for key in self.dict:
pinf = self.projection_inf(key)
psup = self.projection_sup(key)
convert_key = "{:.2f}".format(pinf) + "-" + "{:.2f}".format(psup)
dic[convert_key] = self.dict[key]
return dic
# Use this class as decorator to save functions returns
def memoize(f):
memo = {}
def helper(x):
if str(x) not in memo:
if str(x) not in memo:
memo[str(x)] = f(x)
return copy.deepcopy(memo[str(x)])
return helper
......@@ -48,3 +109,11 @@ def save_json(path, data):
with open(path, 'w') as f:
json.dump(data, f, **params, ensure_ascii=False)
if __name__ == "__main__":
icount = IntervalCount(1,0,0.05)
icount.count_on_interval(0.3)
icount.count_on_interval(0.3)
icount.count_on_interval(0.9)
icount.count_on_interval(0.98)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(icount.to_dict())