Commit 237a1119 authored by Odair M.'s avatar Odair M.
Browse files

Merge branch 'pandas-0.22' into 'master'

Pandas 0.22

See merge request pet/A.D.E.G.A!13
parents a9c8a584 06c46fe5
......@@ -15,7 +15,7 @@ ipython = "*"
django = "==1.11.10"
django-widget-tweaks = "*"
pandas = "==0.18.1"
pandas = "==0.22"
"psycopg2" = "*"
xlrd = "*"
django-extensions = "*"
......
{
"_meta": {
"hash": {
"sha256": "0b62cd0d5cd72fee71fa1f56dde87cabb220300bca052e3c9006ba8da5edeca6"
"sha256": "36924d8179f463a93a6998ff08f3840a6e3125ae52f0dbf7a2a82a3563d8e337"
},
"host-environment-markers": {
"implementation_name": "cpython",
......@@ -36,10 +36,10 @@
},
"django-extensions": {
"hashes": [
"sha256:24c24bbc6ef6dd36fe6b2b7c48d171a9d22fe76895610fe19087af657fa27930",
"sha256:1f424a7f87974c2e2602b8b41cae52eb08105523f0c70320203abf58bcb84404"
"sha256:37a543af370ee3b0721ff50442d33c357dd083e6ea06c5b94a199283b6f9e361",
"sha256:bc9f2946c117bb2f49e5e0633eba783787790ae810ea112fe7fd82fa64de2ff1"
],
"version": "==2.0.5"
"version": "==2.0.6"
},
"django-widget-tweaks": {
"hashes": [
......@@ -77,26 +77,8 @@
"version": "==1.14.2"
},
"pandas": {
"hashes": [
"sha256:2aeebd55027eb1fcb5020ec141696be47fff65fb86c276e46bae42f04b3bfeaf",
"sha256:6f31b4510da92f8beec17fe9ecb3f386984a4b35e1d1dee062b3463f63e70bbc",
"sha256:b7a6ce196452bf9a020074b68c184b174c12a22c285603ceebb09c645cf001d1",
"sha256:42c933501341263194926d00c1039d314039f6fbe763e13d983918d273a0ad68",
"sha256:aa50475fafbc689dead2e9a4e98b96fc43f1190f6661d1daf560f8c05ac26496",
"sha256:9984b284ab6d7672c720ea960f4d19b9dd0bea061c2ccd641b0c20d34ce03f7a",
"sha256:a0af231d6bf20d3f94a4d694bb3cd26c1b330aa4ed124ea99eff49a583ed10ff",
"sha256:9b1a7834e10c5a2afacaae8ba10054dc2ee5ae81eeaecf44d9eaa4d726962817",
"sha256:6621db235422aa48d7513a7f332a7bfc6e9a54b0283bac145cccec7c4c0ccd7d",
"sha256:c39dbc38bc031f099bcfa408a93c801f0141ee49a7d4e0df09cdf9dcf01f27e6",
"sha256:fccbc771a23d51b366182c136cd735cf1642744270fee964f5b1fe9103d66239",
"sha256:80bf0d32432fe588a0e94ff6b216aa5c61ddba2348ca904bda240218f9cbe122",
"sha256:931d25b391eb01c52239a41e2b1c29c8337a6789852ecc0d4ce39ce2491424e6",
"sha256:563720b6302a4e2b513471c16bd7e89db2ae44d3f6b0745896b9c289f3c6b2fb",
"sha256:c850d8c41b5417ba361967d3e2b6119c681b9f0bd5eb77f4c013c46dbf0ebe95",
"sha256:d2e483692c7915916dffd1b83256ea9761b4224c8d45646ceddf48b977ee77b2",
"sha256:de8661d3a71bac8b5100c2a85fdb1b55c9b41534aba7a9671d1130d43ab2de59"
],
"version": "==0.18.1"
"hashes": [],
"version": "==0.22"
},
"psycopg2": {
"hashes": [
......
......@@ -3,112 +3,134 @@ import os
import pandas as pd
import numpy as np
from script.utils.situations import *
from script.utils.utils import invert_dict
class DataframeHolder:
def __init__(self, dataframe):
self.students = dataframe.groupby('MATR_ALUNO')
self.courses = dataframe.groupby('COD_ATIV_CURRIC')
self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])
def __init__(self, dataframe):
self.students = dataframe.groupby('MATR_ALUNO')
self.courses = dataframe.groupby('COD_ATIV_CURRIC')
self.admission = dataframe.groupby(['ANO_INGRESSO', 'SEMESTRE_INGRESSO'])
def load_dataframes(cwd='.'):
dataframes = []
for path, dirs, files in os.walk(cwd):
for f in files:
file_path = path + '/' + f
dh = {'name': f, 'dataframe': None}
if 'csv' in f:
dh['dataframe'] = read_csv(file_path)
if 'xls' in f:
dh['dataframe'] = read_excel(file_path)
if dh['dataframe'] is not None:
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
dataframes = []
for path, dirs, files in os.walk(cwd):
for f in files:
file_path = path + '/' + f
dh = {'name': f, 'dataframe': None}
if 'csv' in f:
dh['dataframe'] = read_csv(file_path)
if 'xls' in f:
dh['dataframe'] = read_excel(file_path)
if dh['dataframe'] is not None:
dataframes.append(dh)
dataframe = fix_dataframes(dataframes)
dh = DataframeHolder(dataframe)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
return dataframe
return dataframe
def read_excel(path, planilha='Planilha1'):
return pd.read_excel(path)
return pd.read_excel(path)
def read_csv(path):
return pd.read_csv(path)
return pd.read_csv(path)
def fix_dataframes(dataframes):
for df in dataframes:
if df['name'] == 'historico.xls' or df['name'] == 'historico.csv':
history = df['dataframe']
history.rename(columns={'DESCR_SITUACAO': 'SITUACAO'}, inplace=True)
if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv':
register = df['dataframe']
for df in dataframes:
if df['name'] == 'historico.xls' or df['name'] == 'historico.csv':
history = df['dataframe']
history.rename(columns={'DESCR_SITUACAO': 'SITUACAO'}, inplace=True)
if df['name'] == 'matricula.xls' or df['name'] == 'matricula.csv':
register = df['dataframe']
#~ clean_history(history)
clean_register(register)
#~ df.dropna(axis=0, how='all')
history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
history = history[np.isfinite(history['MEDIA_FINAL'])]
#~ clean_history(history)
clean_register(register)
#~ df.dropna(axis=0, how='all')
history["MEDIA_FINAL"] = pd.to_numeric(history["MEDIA_FINAL"], errors='coerce')
history = history[np.isfinite(history['MEDIA_FINAL'])]
# inner = exste nos dois relatórios, é o que a gente quer
# o que fazer com quem não está em um dos dois é um questão em aberto
merged = pd.merge(history, register, how='inner', on=['MATR_ALUNO'])
merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"})
merged = pd.merge(history, register, how='outer', on=['MATR_ALUNO'])
merged = merged.rename(index=str, columns={"ANO_INGRESSO_x": "ANO_INGRESSO", "SEMESTRE_INGRESSO_x": "SEMESTRE_INGRESSO", "FORMA_INGRESSO_x": "FORMA_INGRESSO"})
fix_situation(merged)
fix_admission(merged)
fix_evasion(merged)
fix_carga(merged)
fix_situation(merged)
fix_admission(merged)
fix_evasion(merged)
fix_carga(merged)
return merged
return merged
def clean_history(df):
df.drop(['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR', 'NUM_VERSAO'
], axis=1, inplace=True)
print(df.columns)
drop_columns = ['ID_NOTA', 'CONCEITO', 'ID_LOCAL_DISPENSA', 'SITUACAO_CURRICULO',
'ID_CURSO_ALUNO', 'ID_VERSAO_CURSO', 'ID_CURRIC_ALUNO',
'ID_ATIV_CURRIC', 'SITUACAO_ITEM', 'ID_ESTRUTURA_CUR'
]
drop_columns = [x for x in drop_columns if x in df.columns]
df.drop(drop_columns, axis=1, inplace=True)
df['PERIODO'] = df['PERIODO'].str.split('o').str[0]
def clean_register(df):
df_split = df['PERIODO_INGRESSO'].str.split('/')
df['ANO_INGRESSO'] = df_split.str[0]
df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0]
df_split = df['PERIODO_EVASAO'].str.split('/')
df['ANO_EVASAO'] = df_split.str[0]
df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0]
df_split = df['PERIODO_INGRESSO'].str.split('/')
df['ANO_INGRESSO'] = df_split.str[0]
df['SEMESTRE_INGRESSO'] = df_split.str[1].str.split('o').str[0]
df_split = df['PERIODO_EVASAO'].str.split('/')
df['ANO_EVASAO'] = df_split.str[0]
df['SEMESTRE_EVASAO'] = df_split.str[1].str.split('o').str[0]
drop_columns = ['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE','COD_CURSO',
'PERIODO_INGRESSO', 'PERIODO_EVASAO']
drop_columns = [x for x in drop_columns if x in df.columns]
df.drop(drop_columns, axis=1, inplace=True)
df.drop(['ID_PESSOA', 'NOME_PESSOA', 'DT_NASCIMENTO', 'NOME_UNIDADE','COD_CURSO', 'PERIODO_INGRESSO', 'PERIODO_EVASAO'],axis=1, inplace=True)
def get_situation(d, default):
def getter(x):
return invert_dict(d).get(x, default)
return getter
def fix_situation(df):
for situation in Situation.SITUATIONS:
df.loc[df.SITUACAO == situation[1], 'SITUACAO'] = situation[0]
df.rename(columns={"SITUACAO": "SITUACAO2"}, inplace=True)
df['SITUACAO'] = df.SITUACAO2.apply(get_situation(Situation.SITUATIONS, Situation.SIT_OUTROS))
df.drop(['SITUACAO2'], axis=1, inplace=True)
def fix_admission(df):
for adm in AdmissionType.ADMISSION_FORM:
df.loc[df.FORMA_INGRESSO == adm[1], 'FORMA_INGRESSO'] = adm[0]
df.rename(columns={'FORMA_INGRESSO': 'FORMA_INGRESSO2'}, inplace=True)
df['FORMA_INGRESSO'] = df.FORMA_INGRESSO2.apply(get_situation(AdmissionType.ADMISSION_FORM,
AdmissionType.AT_OUTROS))
df.drop(['FORMA_INGRESSO2'], axis=1, inplace=True)
def fix_carga(df):
df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"]
df["CH_TOTAL"] = df["CH_TEORICA"]+df["CH_PRATICA"]
def fix_evasion(df):
evasionForms = [x[1] for x in EvasionForm.EVASION_FORM]
df.loc[~df.FORMA_EVASAO.isin(evasionForms), 'FORMA_EVASAO'] = 100
for evasion in EvasionForm.EVASION_FORM:
#~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df.loc[df.FORMA_EVASAO == evasion[1], 'FORMA_EVASAO'] = evasion[0]
#~ if(evasion[0] == 100):
#~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#~ if(x != 0.0):
#~ print(x)
#~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
#~ print(df[['MATR_ALUNO','FORMA_EVASAO']])
df.rename(columns={'FORMA_EVASAO': 'FORMA_EVASAO2'}, inplace=True)
df['FORMA_EVASAO'] = df.FORMA_EVASAO2.apply(get_situation(EvasionForm.EVASION_FORM,
EvasionForm.EF_OUTROS))
df.drop(['FORMA_EVASAO2'], axis=1, inplace=True)
......@@ -31,6 +31,7 @@ def main():
start_time_exec = time.time()
dataframe = load_dataframes(os.getcwd() + '/script/' + 'base')
build_cache(dataframe)
cpu_time = timedelta(seconds=round(time.clock() - start_time))
analises_disciplinas(dataframe)
......
......@@ -74,6 +74,7 @@ class EvasionForm:
# orientaçao: verificar se media_final é maior que 100 se sim atribua 0 se nao
# atribua media_final
class Situation:
SIT_DESCONHECIDA = 0
......
......@@ -10,6 +10,10 @@ except:
DEBUG = True
def invert_dict(d):
return {v: k for k, v in d}
def build_path(path):
if not os.path.exists(path):
os.mkdir(path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment