Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Odair M.
adega
Commits
9ff21bbd
Commit
9ff21bbd
authored
Aug 21, 2017
by
João Denis Rodrigues
Browse files
Fix dataframe merge + media formatura
parent
4a615b20
Changes
5
Hide whitespace changes
Inline
Side-by-side
script/analysis/degree_analysis.py
View file @
9ff21bbd
import
pandas
as
pd
import
numpy
as
np
import
math
from
utils.situations
import
Situation
from
utils.situations
import
Situation
,
EvasionForm
def
average_graduation
(
df
):
not_nan
=
df
.
dropna
(
axis
=
0
)
total_student
=
not_nan
.
shape
[
0
]
list_graduation
=
not_nan
[
not_nan
.
FORMA_EVASAO
==
'Formatura'
]
total_graduate
=
list_graduation
.
shape
[
0
]
total_student
=
df
[
'MATR_ALUNO'
].
drop_duplicates
().
shape
[
0
]
total_graduate
=
df
[
df
.
FORMA_EVASAO
==
EvasionForm
.
EF_FORMATURA
].
shape
[
0
]
return
total_graduate
/
total_student
def
general_failure
(
df
):
not_nan
=
df
.
dropna
(
axis
=
0
)
affect_ira
=
not_nan
[
not_nan
.
SITUACAO
.
isin
(
Situation
.
SITUATION_AFFECT_IRA
)]
...
...
@@ -28,6 +29,7 @@ def general_failure(df):
standard_deviation
=
math
.
sqrt
(
variance
)
return
(
average
,
standard_deviation
)
def
general_ira
(
df
):
fixed
=
df
.
dropna
(
axis
=
0
)[
df
.
SITUACAO
.
isin
(
Situation
.
SITUATION_AFFECT_IRA
)]
fixed
=
fixed
[
fixed
.
MEDIA_FINAL
<=
100
]
...
...
script/base/dataframe_base.py
View file @
9ff21bbd
...
...
@@ -22,44 +22,67 @@ def load_dataframes(cwd='.'):
if
dh
[
'dataframe'
]
is
not
None
:
dataframes
.
append
(
dh
)
return
dataframes
dataframe
=
fix_dataframes
(
dataframes
)
return
dataframe
def
read_excel
(
path
,
planilha
=
'Planilha1'
):
return
pd
.
read_excel
(
path
)
def
read_csv
(
path
):
return
pd
.
read_csv
(
path
)
def
fix_dataframes
(
dataframes
):
for
df
in
dataframes
:
fix_situation
(
df
[
'dataframe'
])
fix_admission
(
df
[
'dataframe'
])
fix_evasion
(
df
[
'dataframe'
])
if
df
[
'name'
]
==
'historico.xls'
:
hist
=
df
[
'dataframe'
]
hist
ory
=
df
[
'dataframe'
]
if
df
[
'name'
]
==
'matricula.xls'
:
mat
=
df
[
'dataframe'
]
merged
=
pd
.
merge
(
hist
,
mat
,
on
=
[
'MATR_ALUNO'
])
merged
.
drop
([
'ID_PESSOA'
,
'ID_CURRIC_ALUNO'
,
'CONCEITO'
,
'NOME_UNIDADE'
,
'ID_NOTA'
,
'ID_VERSAO_CURSO'
,
'NOME_PESSOA'
,
'SIGLA'
,
'NUM_VERSAO_y'
,
'COD_CURSO_y'
,
'DT_NASCIMENTO'
],
axis
=
1
,
inplace
=
True
)
merged
.
rename
(
columns
=
{
'NUM_VERSAO_x'
:
'NUM_VERSAO'
,
'COD_CURSO_x'
:
'COD_CURSO'
},
inplace
=
True
)
print
(
list
(
merged
))
register
=
df
[
'dataframe'
]
clean_history
(
history
)
clean_register
(
register
)
merged
=
pd
.
merge
(
history
,
register
,
how
=
'right'
,
on
=
[
'MATR_ALUNO'
])
fix_situation
(
merged
)
# fix_admission(merged)
fix_evasion
(
merged
)
return
merged
def
clean_history
(
df
):
df
.
drop
([
'ID_NOTA'
,
'CONCEITO'
,
'ID_LOCAL_DISPENSA'
,
'SITUACAO_CURRICULO'
,
'ID_CURSO_ALUNO'
,
'ID_VERSAO_CURSO'
,
'ID_CURRIC_ALUNO'
,
'ID_ATIV_CURRIC'
,
'SITUACAO_ITEM'
,
'ID_ESTRUTURA_CUR'
],
axis
=
1
,
inplace
=
True
)
df
[
'PERIODO'
]
=
df
[
'PERIODO'
].
str
.
split
(
'o'
).
str
[
0
]
def
clean_register
(
df
):
df_split
=
df
[
'PERIODO_INGRESSO'
].
str
.
split
(
'/'
)
df
[
'ANO_INGRESSO'
]
=
df_split
.
str
[
0
]
df
[
'SEMESTRE_INGRESSO'
]
=
df_split
.
str
[
1
].
str
.
split
(
'o'
).
str
[
0
]
df_split
=
df
[
'PERIODO_EVASAO'
].
str
.
split
(
'/'
)
df
[
'ANO_EVASAO'
]
=
df_split
.
str
[
0
]
df
[
'SEMESTRE_EVASAO'
]
=
df_split
.
str
[
1
].
str
.
split
(
'o'
).
str
[
0
]
df
.
drop
([
'ID_PESSOA'
,
'NOME_PESSOA'
,
'DT_NASCIMENTO'
,
'NOME_UNIDADE'
,
'COD_CURSO'
,
'NUM_VERSAO'
,
'PERIODO_INGRESSO'
,
'PERIODO_EVASAO'
,
],
axis
=
1
,
inplace
=
True
)
def
fix_situation
(
df
):
if
hasattr
(
df
,
'SITUACAO'
):
for
situation
in
Situation
.
SITUATIONS
:
df
.
loc
[
df
.
SITUACAO
==
situation
[
1
],
'SITUACAO'
]
=
situation
[
0
]
if
situation
[
1
]
==
'Outro'
:
temp
=
df
[
~
df
[
'SITUACAO'
].
astype
(
str
).
str
.
isdigit
()]
df
.
loc
[
~
df
.
SITUACAO
.
astype
(
str
).
str
.
isdigit
()]
=
situation
[
0
]
for
situation
in
Situation
.
SITUATIONS
:
df
.
loc
[
df
.
SITUACAO
==
situation
[
1
],
'SITUACAO'
]
=
situation
[
0
]
def
fix_admission
(
df
):
pass
def
fix_evasion
(
df
):
pass
def
fix_admission
(
df
):
for
adm
in
AdmissionType
.
ADMISSION_FORM
:
df
.
loc
[
df
.
FORMA_INGRESSO
==
adm
[
1
],
'FORMA_INGRESSO'
]
=
adm
[
0
]
def
fix_evasion
(
df
):
for
evasion
in
EvasionForm
.
EVASION_FORM
:
df
.
loc
[
df
.
FORMA_EVASAO
.
str
.
contains
(
evasion
[
1
]).
fillna
(
False
),
'FORMA_EVASAO'
]
=
evasion
[
0
]
script/build_cache.py
View file @
9ff21bbd
...
...
@@ -13,14 +13,14 @@ try:
except
NameError
:
to_unicode
=
str
def
build_cache
(
registry
,
history
):
def
build_cache
(
dataframe
):
# os.chdir("../src")
path
=
"cache"
build_path
(
path
)
path
+=
"/curso"
build_path
(
path
)
generate_degree_data
(
path
,
registry
,
history
)
generate_degree_data
(
path
,
dataframe
)
generate_student_data
(
path
)
generate_student_list
(
path
)
generate_admission_data
(
path
)
...
...
@@ -28,10 +28,10 @@ def build_cache(registry, history):
generate_course_data
(
path
)
generate_course_general_data
(
path
)
def
generate_degree_data
(
path
,
registry
,
history
):
average_graduation
(
registry
)
general_failure
(
history
)
general_ira
(
history
)
def
generate_degree_data
(
path
,
dataframe
):
average_graduation
(
dataframe
)
#
general_failure(
dataframe
)
#
general_ira(
dataframe
)
pass
def
generate_student_data
(
path
):
...
...
script/main.py
View file @
9ff21bbd
import
os
import
time
from
base.dataframe_base
import
load_dataframes
,
fix_dataframes
from
base.dataframe_base
import
load_dataframes
from
build_cache
import
build_cache
from
datetime
import
timedelta
...
...
@@ -8,15 +8,9 @@ def main():
start_time
=
time
.
clock
()
start_time_exec
=
time
.
time
()
dataframes
=
load_dataframes
(
os
.
getcwd
()
+
'/'
+
'base'
)
fix_dataframes
(
dataframes
)
for
df
in
dataframes
:
if
'historico'
in
df
[
'name'
]:
history
=
df
[
'dataframe'
]
if
'matricula.xls'
in
df
[
'name'
]:
registry
=
df
[
'dataframe'
]
dataframe
=
load_dataframes
(
os
.
getcwd
()
+
'/'
+
'base'
)
build_cache
(
registry
,
history
)
build_cache
(
dataframe
)
cpu_time
=
timedelta
(
seconds
=
round
(
time
.
clock
()
-
start_time
))
run_time
=
timedelta
(
seconds
=
round
(
time
.
time
()
-
start_time_exec
))
...
...
script/utils/situations.py
View file @
9ff21bbd
...
...
@@ -38,22 +38,30 @@ class EvasionForm:
EF_REOPCAO
=
9
EF_DESISTENCIA
=
10
EF_JUBILAMENTO
=
11
EF_DESCUMPRIMENTO_EDITAL
=
12
EF_FALECIMENTO
=
13
EF_TERMINO_REG_TEMP
=
14
EF_REINTEGRACAO
=
15
EF_OUTROS
=
100
EVASION_FORM
=
(
(
EF_DESCONHECIDO
,
'Desconhecido'
),
(
EF_ATIVO
,
'
Ativ
o'
),
(
EF_FORMATURA
,
'Forma
do
'
),
(
EF_ATIVO
,
'
Sem evasã
o'
),
(
EF_FORMATURA
,
'Forma
tura
'
),
(
EF_ABANDONO
,
'Abandono'
),
(
EF_DESISTENCIA_VESTIBULAR
,
'Desist
e
ncia
v
estibular'
),
(
EF_DESISTENCIA_VESTIBULAR
,
'Desist
ê
ncia
V
estibular'
),
(
EF_CANCELAMENTO
,
'Cancelamento'
),
(
EF_NAO_CONFIRMACAO_VAGA
,
'Não
c
onfirmação de
v
aga'
),
(
EF_NOVO_VESTIBULAR
,
'Novo
v
estibular'
),
(
EF_TRANSFERENCIA_EXTERNA
,
'Transferência
e
xterna'
),
(
EF_REOPCAO
,
'Reopção
de curso
'
),
(
EF_NAO_CONFIRMACAO_VAGA
,
'Não
C
onfirmação de
V
aga'
),
(
EF_NOVO_VESTIBULAR
,
'Novo
V
estibular'
),
(
EF_TRANSFERENCIA_EXTERNA
,
'Transferência
E
xterna'
),
(
EF_REOPCAO
,
'Reopção'
),
(
EF_DESISTENCIA
,
'Desistência'
),
(
EF_JUBILAMENTO
,
'Jubilado'
),
(
EF_OUTROS
,
'Outros'
),
(
EF_JUBILAMENTO
,
'Jubilamento'
),
(
EF_DESCUMPRIMENTO_EDITAL
,
'Descumprimento Edital'
),
(
EF_FALECIMENTO
,
'Falecimento'
),
(
EF_TERMINO_REG_TEMP
,
'Término de Registro Temporário'
),
(
EF_REINTEGRACAO
,
'Reintegração'
),
(
EF_OUTROS
,
'Outro'
),
)
# == Situation Courses == #
...
...
@@ -77,7 +85,10 @@ class Situation:
SIT_TRANCAMENTO_TOTAL
=
11
SIT_TRANCAMENTO_ADMINISTRATIVO
=
12
SIT_REPROVADO_SEM_NOTA
=
13
SIT_HORAS
=
13
SIT_HORAS
=
14
SIT_APROV_ADIANTAMENTO
=
15
SIT_INCOMPLETO
=
16
SIT_OUTROS
=
100
...
...
@@ -101,6 +112,8 @@ class Situation:
(
SIT_HORAS
,
'Horas'
),
(
SIT_APROV_ADIANTAMENTO
,
'Aprov Adiantamento'
),
(
SIT_INCOMPLETO
,
'Incompleto'
),
(
SIT_OUTROS
,
'Outro'
),
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment