Commit 3d2a7cfa authored by Bruno Meyer's avatar Bruno Meyer
Browse files

Terminando as analises (falta fazer checagem e refatorar)

parent c71c62d7
import numpy as np
from utils.situations import *
......@@ -5,151 +6,126 @@ ANO_ATUAL = 2017
SEMESTRE_ATUAL = 2
def listagem_evasao(df):
# ~ print(df["FORMA_EVASAO"].drop_duplicates())
# ~ print(df)
# ~ print(Situation.SITUATION_AFFECT_IRA)
# ~ print(df)
aux = df[df.FORMA_EVASAO != 1]
print(aux)
def listagem_alunos_ativos(df):
return list(df["MATR_ALUNO"][df["FORMA_EVASAO"] == EvasionForm.EF_ATIVO].drop_duplicates())
# ~ print(aux.where(aux.SITUACAO != 1)["SITUACAO"])
# ~ print(df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)])
# ~ print(df.where(df["SITUACAO"] in Situation.SITUATION_AFFECT_IRA))
# ~ aux = df.drop_duplicates(['MATR_ALUNO'], keep='last')
# ~ print(aux["FORMA_EVASAO"].drop_duplicates())
def average_ira(d):
temp = d.dropna(subset=['MEDIA_FINAL'])
temp = temp[temp['MEDIA_FINAL'] <= 100]
if not temp.empty:
# print(temp[['MEDIA_FINAL', 'CH_TOTAL']])
aux = np.sum(temp['MEDIA_FINAL'] * temp['CH_TOTAL'])
ch_total = np.sum(temp['CH_TOTAL']) * 100
return (aux / ch_total)
def posicao_turmaIngresso_semestral(df):
iras = ira_semestra(df)
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
iras = ira_semestral(df)
iraMax = {}
for matr in iras:
for semestreAno in iras[matr]:
if not (semestreAno in iraMax):
iraMax[semestreAno] = iras[matr][semestreAno]
else:
if (iras[matr][semestreAno] > iraMax[semestreAno]):
iraMax[semestreAno] = iras[matr][semestreAno]
for matr in iras:
for semestreAno in iras[matr]:
iras[matr][semestreAno] /= iraMax[semestreAno]
return iras
def periodo_real(df):
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
aux = df.groupby(["MATR_ALUNO"])
students = {}
for x in aux:
students[x[0]] = None
return students
def periodo_pretendido(df):
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
print(x[0][0] + " : " + x[0][1] + " " + x[0][2])
students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
return students
aux = df.groupby(["MATR_ALUNO", "ANO_INGRESSO", "SEMESTRE_INGRESSO"])
students = {}
for x in aux:
students[x[0][0]] = (ANO_ATUAL - int(x[0][1])) * 2 + SEMESTRE_ATUAL - int(x[0][2]) + 1
return students
def ira_semestra(df):
aux = ira_por_quantidade_disciplinas(df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
def ira_semestral(df):
aux = ira_por_quantidade_disciplinas(df)
for matr in aux:
for periodo in aux[matr]:
aux[matr][periodo] = aux[matr][periodo][0]
return aux
def ira_por_quantidade_disciplinas(df):
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
# ~ print(df["MATR_ALUNO"][178])
# ~ print(df["NOME_ATIV_CURRIC"][178])
# ~ print(df["PERIODO"][178])
# ~ print(df["ANO"][178])
# ~ print(df["SITUACAO"][178])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
media_credito = int(df["MEDIA_CREDITO"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
students[matr][ano + "/" + semestre][0] += nota
students[matr][ano + "/" + semestre][1] += 1
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][1] != 0):
students[matr][periodo][0] /= students[matr][periodo][1] * 100
return (students)
students = {}
df = df.dropna(subset=["MEDIA_FINAL"])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
nota = float(df["MEDIA_FINAL"][i])
media_credito = int(df["MEDIA_CREDITO"][i])
if (situacao in Situation.SITUATION_AFFECT_IRA and media_credito != 0):
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
students[matr][ano + "/" + semestre][0] += nota
students[matr][ano + "/" + semestre][1] += 1
for matr in students:
for periodo in students[matr]:
if (students[matr][periodo][1] != 0):
students[matr][periodo][0] /= students[matr][periodo][1] * 100
return (students)
def indice_aprovacao_semestral(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
total_students = len(df["MATR_ALUNO"])
for i in range(total_students):
matr = (df["MATR_ALUNO"][i])
if (not (matr in students)):
students[matr] = {}
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
ano = str(int(df["ANO"][i]))
semestre = str(df["PERIODO"][i])
situacao = int(df["SITUACAO"][i])
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if not (ano + "/" + semestre in students[matr]):
students[matr][ano + "/" + semestre] = [0, 0]
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
if situacao in Situation.SITUATION_PASS:
students[matr][ano + "/" + semestre][0] += 1
students[matr][ano + "/" + semestre][1] += 1
if situacao in Situation.SITUATION_FAIL:
students[matr][ano + "/" + semestre][1] += 1
return (students)
def aluno_turmas(df):
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
students = {}
df = df.dropna(subset=['MEDIA_FINAL'])
situations = dict(Situation.SITUATIONS)
situations = dict(Situation.SITUATIONS)
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for matr, hist in df.groupby('MATR_ALUNO'):
students[matr] = []
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
for _, row in hist.iterrows():
data = {
'ano': str(int(row["ANO"])),
'codigo': row["COD_ATIV_CURRIC"],
'nome': row["NOME_ATIV_CURRIC"],
'nota': row["MEDIA_FINAL"],
'semestre': row["PERIODO"],
'situacao': situations.get(row["SITUACAO"], Situation.SIT_OUTROS)
}
students[matr].append(data)
students[matr].append(data)
return students
return students
from utils.utils import *
from utils.situations import *
from analysis.degree_analysis import *
from analysis.student_analysis import *
try:
to_unicode = unicode
to_unicode = unicode
except NameError:
to_unicode = str
to_unicode = str
def build_cache(dataframe):
# os.chdir("../src")
path = 'cache/curso/'
ensure_path_exists(path)
for cod, df in dataframe.groupby('COD_CURSO'):
generate_degree_data(path+'/'+cod+'/', df)
#generate_degree_data(path, dataframe)
#generate_student_data(path, dataframe)
#generate_student_list(path)
#generate_admission_data(path)
#generate_admission_list(path)
#generate_course_data(path)
#generate_course_general_data(path)
# os.chdir("../src")
path = 'cache/curso'
ensure_path_exists(path)
for cod, df in dataframe.groupby('COD_CURSO'):
generate_degree_data(path+'/'+cod+'/', df)
generate_student_data(path+'/'+cod+'/students/',df)
#generate_degree_data(path, dataframe)
#generate_student_data(path, dataframe)
#generate_student_list(path)
#generate_admission_data(path)
#generate_admission_list(path)
#generate_course_data(path)
#generate_course_general_data(path)
def generate_degree_data(path, dataframe):
ensure_path_exists(path)
ensure_path_exists(path+'students')
ensure_path_exists(path)
ensure_path_exists(path+'students')
students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
students = dataframe[['MATR_ALUNO', 'FORMA_EVASAO']].drop_duplicates()
data = {
'average_graduation': average_graduation(dataframe),
'general_failure': general_failure(dataframe),
'general_ira': general_ira(dataframe),
'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0],
'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0],
}
data = {
'average_graduation': average_graduation(dataframe),
'general_failure': general_failure(dataframe),
'general_ira': general_ira(dataframe),
'active_students': students[students.FORMA_EVASAO == EvasionForm.EF_ATIVO].shape[0],
'graduated_students': students[students.FORMA_EVASAO == EvasionForm.EF_FORMATURA].shape[0],
}
save_json(path+'/degree.json', data)
for ind, hist in dataframe.groupby('MATR_ALUNO'):
generate_student_data(path+'students/{}.json'.format(ind), hist)
save_json(path+'/degree.json', data)
#~ for ind, hist in dataframe.groupby('MATR_ALUNO'):
#~ generate_student_data_old(path+'students/{}.json'.format(ind), dataframe)
def historico(dataframe):
res = []
res = []
for _, row in dataframe.iterrows():
res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC',
'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']]))
for _, row in dataframe.iterrows():
res.append(dict(row[['ANO', 'MEDIA_FINAL', 'PERIODO', 'SITUACAO', 'COD_ATIV_CURRIC', 'NOME_ATIV_CURRIC',
'CREDITOS', 'CH_TOTAL', 'DESCR_ESTRUTURA', 'FREQUENCIA']]))
return res
return res
def process_semestre(per, df):
ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0]
tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0]
return {
'semestre': per,
'ira': ira,
'completas': completas,
'tentativas': tentativas,
'aprovacao': completas/tentativas if tentativas else 0,
'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0
}
ira = df[df.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean()
completas = df[df.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0]
tentativas = df[df.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0]
return {
'semestre': per,
'ira': ira,
'completas': completas,
'tentativas': tentativas,
'aprovacao': completas/tentativas if tentativas else 0,
'ira_por_quantidade_disciplinas': ira/tentativas if tentativas else 0
}
def generate_student_data(path, dataframe):
ensure_path_exists(os.path.dirname(path))
data = dict(dataframe.iloc[0][['MATR_ALUNO', 'NOME_ALUNO', 'SEXO', 'FORMA_INGRESSO', 'FORMA_EVASAO', 'ANO_INGRESSO',
'SEMESTRE_INGRESSO', 'ANO_EVASAO', 'SEMESTRE_EVASAO']])
data.update({
'ira': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_AFFECT_IRA)].MEDIA_FINAL.mean(),
'completas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_PASS)].shape[0],
'tentativas': dataframe[dataframe.SITUACAO.isin(Situation.SITUATION_COURSED)].shape[0],
'semestres': [process_semestre(per, dataframe[dataframe.PERIODO == per]) for per in sorted(dataframe.PERIODO.unique())],
'historico': historico(dataframe)
})
save_json(path, data)
def generate_student_data_old(path, dataframe):
print(aluno_turmas(dataframe))
print(indice_aprovacao_semestral(dataframe))
print("2007/1" in ira_por_quantidade_disciplinas(dataframe)["GRR20066955"])
print(ira_semestra(dataframe)["GRR20079775"])
aluno_turmas(dataframe)
indice_aprovacao_semestral(dataframe)
ira_por_quantidade_disciplinas(dataframe)
ira_semestra(dataframe)
periodo_pretendido(dataframe)
print(periodo_real(dataframe))
print(posicao_turmaIngresso_semestral(dataframe))
print(listagem_evasao(dataframe))
pass
def generate_student_data(path, dataframe):
student_data = dict()
all_grrs = list(dataframe["MATR_ALUNO"].drop_duplicates())
for x in all_grrs:
student_data[x] = dict()
analises = [
# tupla que contem no primeiro elemento a funcao que retorna um dicionario com {"GRR": valor}
# e na segunda posicao o nome que esta analise tera no json
(posicao_turmaIngresso_semestral(dataframe),
"posicao_turmaIngresso_semestral"),
(periodo_real(dataframe),
"periodo_real"),
(periodo_pretendido(dataframe),
"periodo_pretendido"),
(ira_semestral(dataframe),
"ira_semestral"),
(ira_por_quantidade_disciplinas(dataframe),
"ira_por_quantidade_disciplinas"),
(indice_aprovacao_semestral(dataframe),
"indice_aprovacao_semestral"),
(aluno_turmas(dataframe),
"aluno_turmas"),
]
for x in student_data:
#~ for a in analises: # Usar para fazer a verificacao de
#~ student_data[x][a[1]] = a[0][x] # analises nulas para um GRR
save_json(path+x, student_data[x])
#Falta verificar se alguem nao recebeu algumas analises
def generate_student_list(path):
pass
pass
def generate_admission_data(path):
pass
pass
def generate_admission_list(path):
pass
pass
def generate_course_data(path):
pass
pass
def generate_course_general_data(path):
pass
pass
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
{}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment