Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Odair M.
adega
Commits
2d0f7ecf
Commit
2d0f7ecf
authored
Mar 06, 2018
by
Jomaro Rodrigues
Browse files
analises feitas de maneira modular
parent
f6389c86
Changes
7
Hide whitespace changes
Inline
Side-by-side
src/script/base/dataframe_base.py
View file @
2d0f7ecf
import
re
import
os
import
sys
import
pandas
as
pd
import
numpy
as
np
from
glob
import
glob
from
json
import
load
as
json_load
from
script.utils.situations
import
*
class
DataframeHolder
:
def
__init__
(
self
,
dataframe
):
self
.
students
=
dataframe
.
groupby
(
'MATR_ALUNO'
)
self
.
courses
=
dataframe
.
groupby
(
'COD_ATIV_CURRIC'
)
self
.
admission
=
dataframe
.
groupby
([
'ANO_INGRESSO'
,
'SEMESTRE_INGRESSO'
])
def
__init__
(
self
,
dataframe
):
self
.
students
=
dataframe
.
groupby
(
'MATR_ALUNO'
)
self
.
courses
=
dataframe
.
groupby
(
'COD_ATIV_CURRIC'
)
self
.
admission
=
dataframe
.
groupby
([
'ANO_INGRESSO'
,
'SEMESTRE_INGRESSO'
])
def
load_dataframes
(
cwd
=
'.'
):
dataframes
=
[]
for
path
,
dirs
,
files
in
os
.
walk
(
cwd
):
for
f
in
files
:
file_path
=
path
+
'/'
+
f
dh
=
{
'name'
:
f
,
'dataframe'
:
None
}
if
'csv'
in
f
:
dh
[
'dataframe'
]
=
read_csv
(
file_path
)
if
'xls'
in
f
:
dh
[
'dataframe'
]
=
read_excel
(
file_path
)
dataframes
=
[]
for
path
,
dirs
,
files
in
os
.
walk
(
cwd
):
for
f
in
files
:
file_path
=
path
+
'/'
+
f
dh
=
{
'name'
:
f
,
'dataframe'
:
None
}
if
'csv'
in
f
:
dh
[
'dataframe'
]
=
read_csv
(
file_path
)
if
'xls'
in
f
:
dh
[
'dataframe'
]
=
read_excel
(
file_path
)
if
dh
[
'dataframe'
]
is
not
None
:
dataframes
.
append
(
dh
)
if
dh
[
'dataframe'
]
is
not
None
:
dataframes
.
append
(
dh
)
dataframe
=
fix_dataframes
(
dataframes
)
dataframe
=
fix_dataframes
(
dataframes
)
dh
=
DataframeHolder
(
dataframe
)
#~ dh.students.aggregate(teste)
# print(dh.students['MEDIA_FINAL'].aggregate(teste))
return
dataframe
return
dataframe
def
read_excel
(
path
,
planilha
=
'Planilha1'
):
return
pd
.
read_excel
(
path
)
return
pd
.
read_excel
(
path
)
def
read_csv
(
path
):
return
pd
.
read_csv
(
path
)
return
pd
.
read_csv
(
path
)
def
fix_dataframes
(
dataframes
):
for
df
in
dataframes
:
if
df
[
'name'
]
==
'historico.xls'
or
df
[
'name'
]
==
'historico.csv'
:
history
=
df
[
'dataframe'
]
if
df
[
'name'
]
==
'matricula.xls'
or
df
[
'name'
]
==
'matricula.csv'
:
register
=
df
[
'dataframe'
]
clean_history
(
history
)
clean_register
(
register
)
#~ df.dropna(axis=0, how='all')
history
[
"MEDIA_FINAL"
]
=
pd
.
to_numeric
(
history
[
"MEDIA_FINAL"
],
errors
=
'coerce'
)
history
=
history
[
np
.
isfinite
(
history
[
'MEDIA_FINAL'
])]
merged
=
pd
.
merge
(
history
,
register
,
how
=
'right'
,
on
=
[
'MATR_ALUNO'
])
#~ print(merged)
fix_situation
(
merged
)
# fix_admission(merged)
fix_evasion
(
merged
)
return
merged
for
df
in
dataframes
:
if
df
[
'name'
]
==
'historico.xls'
or
df
[
'name'
]
==
'historico.csv'
:
history
=
df
[
'dataframe'
]
if
df
[
'name'
]
==
'matricula.xls'
or
df
[
'name'
]
==
'matricula.csv'
:
register
=
df
[
'dataframe'
]
clean_history
(
history
)
clean_register
(
register
)
# ~ df.dropna(axis=0, how='all')
history
[
"MEDIA_FINAL"
]
=
pd
.
to_numeric
(
history
[
"MEDIA_FINAL"
],
errors
=
'coerce'
)
history
=
history
[
np
.
isfinite
(
history
[
'MEDIA_FINAL'
])]
merged
=
pd
.
merge
(
history
,
register
,
how
=
'right'
,
on
=
[
'MATR_ALUNO'
])
# ~ print(merged)
fix_situation
(
merged
)
# fix_admission(merged)
fix_evasion
(
merged
)
return
merged
def
clean_history
(
df
):
df
.
drop
([
'ID_NOTA'
,
'CONCEITO'
,
'ID_LOCAL_DISPENSA'
,
'SITUACAO_CURRICULO'
,
'ID_CURSO_ALUNO'
,
'ID_VERSAO_CURSO'
,
'ID_CURRIC_ALUNO'
,
'ID_ATIV_CURRIC'
,
'SITUACAO_ITEM'
,
'ID_ESTRUTURA_CUR'
],
axis
=
1
,
inplace
=
True
)
df
[
'PERIODO'
]
=
df
[
'PERIODO'
].
str
.
split
(
'o'
).
str
[
0
]
df
.
drop
([
'ID_NOTA'
,
'CONCEITO'
,
'ID_LOCAL_DISPENSA'
,
'SITUACAO_CURRICULO'
,
'ID_CURSO_ALUNO'
,
'ID_VERSAO_CURSO'
,
'ID_CURRIC_ALUNO'
,
'ID_ATIV_CURRIC'
,
'SITUACAO_ITEM'
,
'ID_ESTRUTURA_CUR'
],
axis
=
1
,
inplace
=
True
)
df
[
'PERIODO'
]
=
df
[
'PERIODO'
].
str
.
split
(
'o'
).
str
[
0
]
def
clean_register
(
df
):
df_split
=
df
[
'PERIODO_INGRESSO'
].
str
.
split
(
'/'
)
df
[
'ANO_INGRESSO'
]
=
df_split
.
str
[
0
]
df
[
'SEMESTRE_INGRESSO'
]
=
df_split
.
str
[
1
].
str
.
split
(
'o'
).
str
[
0
]
df_split
=
df
[
'PERIODO_EVASAO'
].
str
.
split
(
'/'
)
df
[
'ANO_EVASAO'
]
=
df_split
.
str
[
0
]
df
[
'SEMESTRE_EVASAO'
]
=
df_split
.
str
[
1
].
str
.
split
(
'o'
).
str
[
0
]
df_split
=
df
[
'PERIODO_INGRESSO'
].
str
.
split
(
'/'
)
df
[
'ANO_INGRESSO'
]
=
df_split
.
str
[
0
]
df
[
'SEMESTRE_INGRESSO'
]
=
df_split
.
str
[
1
].
str
.
split
(
'o'
).
str
[
0
]
df_split
=
df
[
'PERIODO_EVASAO'
].
str
.
split
(
'/'
)
df
[
'ANO_EVASAO'
]
=
df_split
.
str
[
0
]
df
[
'SEMESTRE_EVASAO'
]
=
df_split
.
str
[
1
].
str
.
split
(
'o'
).
str
[
0
]
df
.
drop
([
'ID_PESSOA'
,
'NOME_PESSOA'
,
'DT_NASCIMENTO'
,
'NOME_UNIDADE'
,
'COD_CURSO'
,
'NUM_VERSAO'
,
'PERIODO_INGRESSO'
,
'PERIODO_EVASAO'
,
],
axis
=
1
,
inplace
=
True
)
df
.
drop
([
'ID_PESSOA'
,
'NOME_PESSOA'
,
'DT_NASCIMENTO'
,
'NOME_UNIDADE'
,
'COD_CURSO'
,
'NUM_VERSAO'
,
'PERIODO_INGRESSO'
,
'PERIODO_EVASAO'
,
],
axis
=
1
,
inplace
=
True
)
def
fix_situation
(
df
):
for
situation
in
Situation
.
SITUATIONS
:
df
.
loc
[
df
.
SITUACAO
==
situation
[
1
],
'SITUACAO'
]
=
situation
[
0
]
for
situation
in
Situation
.
SITUATIONS
:
df
.
loc
[
df
.
SITUACAO
==
situation
[
1
],
'SITUACAO'
]
=
situation
[
0
]
def
fix_admission
(
df
):
for
adm
in
AdmissionType
.
ADMISSION_FORM
:
df
.
loc
[
df
.
FORMA_INGRESSO
==
adm
[
1
],
'FORMA_INGRESSO'
]
=
adm
[
0
]
for
adm
in
AdmissionType
.
ADMISSION_FORM
:
df
.
loc
[
df
.
FORMA_INGRESSO
==
adm
[
1
],
'FORMA_INGRESSO'
]
=
adm
[
0
]
def
fix_evasion
(
df
):
evasionForms
=
[
x
[
1
]
for
x
in
EvasionForm
.
EVASION_FORM
]
df
.
loc
[
~
df
.
FORMA_EVASAO
.
isin
(
evasionForms
),
'FORMA_EVASAO'
]
=
100
for
evasion
in
EvasionForm
.
EVASION_FORM
:
#
~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df
.
loc
[
df
.
FORMA_EVASAO
==
evasion
[
1
],
'FORMA_EVASAO'
]
=
evasion
[
0
]
#
~ if(evasion[0] == 100):
#
~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#
~ if(x != 0.0):
#
~ print(x)
#~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
#~ print(df[['MATR_ALUNO','FORMA_EVASAO']])
evasionForms
=
[
x
[
1
]
for
x
in
EvasionForm
.
EVASION_FORM
]
df
.
loc
[
~
df
.
FORMA_EVASAO
.
isin
(
evasionForms
),
'FORMA_EVASAO'
]
=
100
for
evasion
in
EvasionForm
.
EVASION_FORM
:
#
~ df.loc[df.FORMA_EVASAO.str.contains(evasion[1]).fillna(1.0), 'FORMA_EVASAO'] = evasion[0]
df
.
loc
[
df
.
FORMA_EVASAO
==
evasion
[
1
],
'FORMA_EVASAO'
]
=
evasion
[
0
]
#
~ if(evasion[0] == 100):
#
~ for x in df.FORMA_EVASAO.str.contains(evasion[1]).fillna(False):
#
~ if(x != 0.0):
#
~ print(x)
#
~ print(df.FORMA_EVASAO.str.contains(evasion[1]).fillna(5))
#
~ print(df[['MATR_ALUNO','FORMA_EVASAO']])
src/script/main.py
View file @
2d0f7ecf
...
...
@@ -3,25 +3,38 @@ import time
from
datetime
import
timedelta
from
script.base.dataframe_base
import
load_dataframes
from
script.build_cache
import
build_cache
from
script.analysis.degree_analysis
import
*
def
analyze
(
submission
):
start_time
=
time
.
clock
()
start_time_exec
=
time
.
time
()
dataframe
=
load_dataframes
(
submission
.
path
())
build_cache
(
dataframe
)
cpu_time
=
timedelta
(
seconds
=
round
(
time
.
clock
()
-
start_time
))
run_time
=
timedelta
(
seconds
=
round
(
time
.
time
()
-
start_time_exec
))
print
(
"--- Tempo de CPU: {} ---"
.
format
(
cpu_time
))
print
(
"--- Tempo total: {} ---"
.
format
(
run_time
))
def
main
():
start_time
=
time
.
clock
()
start_time_exec
=
time
.
time
()
start_time
=
time
.
clock
()
start_time_exec
=
time
.
time
()
dataframe
=
load_dataframes
(
os
.
getcwd
()
+
'/script/'
+
'base'
)
# ~ for i, line in enumerate(dataframe):
# ~ print(type(dataframe["MEDIA_FINAL"][i]))
# ~ print(dataframe["MEDIA_FINAL"][i])
# ~ print(dataframe)
build_cache
(
dataframe
)
dataframe
=
load_dataframes
(
os
.
getcwd
()
+
'/script/'
+
'base'
)
#~ for i, line in enumerate(dataframe):
#~ print(type(dataframe["MEDIA_FINAL"][i]))
#~ print(dataframe["MEDIA_FINAL"][i])
#~ print(dataframe)
build_cache
(
dataframe
)
cpu_time
=
timedelta
(
seconds
=
round
(
time
.
clock
()
-
start_time
))
run_time
=
timedelta
(
seconds
=
round
(
time
.
time
()
-
start_time_exec
))
print
(
"--- Tempo de CPU: {} ---"
.
format
(
cpu_time
))
print
(
"--- Tempo total: {} ---"
.
format
(
run_time
))
cpu_time
=
timedelta
(
seconds
=
round
(
time
.
clock
()
-
start_time
))
run_time
=
timedelta
(
seconds
=
round
(
time
.
time
()
-
start_time_exec
))
print
(
"--- Tempo de CPU: {} ---"
.
format
(
cpu_time
))
print
(
"--- Tempo total: {} ---"
.
format
(
run_time
))
if
__name__
==
"__main__"
:
main
()
main
()
src/uploads/admin.py
0 → 100644
View file @
2d0f7ecf
from
django.contrib
import
admin
from
.models
import
Submission
from
script.main
import
analyze
from
traceback
import
print_exc
def
make_analysis
(
modeladmin
,
request
,
queryset
):
for
submission
in
queryset
:
try
:
print
(
'analisando: '
+
str
(
submission
))
analyze
(
submission
)
submission
.
processed
=
True
print
(
'salvando'
)
submission
.
save
()
print
(
'OK'
)
except
:
print
(
'Análise falhou'
)
print_exc
()
class
SubmissionAdmin
(
admin
.
ModelAdmin
):
date_hierarchy
=
'timestamp'
list_display
=
(
'author'
,
'course'
,
'processed'
,
'last'
,
'timestamp'
)
actions
=
[
make_analysis
]
admin
.
site
.
register
(
Submission
,
SubmissionAdmin
)
\ No newline at end of file
src/uploads/core/views.py
View file @
2d0f7ecf
from
django.shortcuts
import
render
,
redirect
from
django.conf
import
settings
from
django.core.files.storage
import
FileSystemStorage
from
uploads.models
import
Submission
from
uploads.core.models
import
Document
from
uploads.core.forms
import
DocumentForm
from
script.main
import
main
as
analysis
import
os
def
home
(
request
):
documents
=
Document
.
objects
.
all
()
return
render
(
request
,
'core/home.html'
,
{
'documents'
:
documents
})
documents
=
Document
.
objects
.
all
()
return
render
(
request
,
'core/home.html'
,
{
'documents'
:
documents
})
def
simple_upload
(
request
):
if
request
.
method
==
'POST'
and
request
.
FILES
[
'historico'
]
and
request
.
FILES
[
'matricula'
]:
myfile
=
request
.
FILES
[
'historico'
]
fs
=
FileSystemStorage
()
filename
=
fs
.
save
(
myfile
.
name
,
myfile
)
uploaded_file_url
=
fs
.
url
(
filename
)
myfile
=
request
.
FILES
[
'matricula'
]
fs
=
FileSystemStorage
()
filename
=
fs
.
save
(
myfile
.
name
,
myfile
)
uploaded_file_url
=
fs
.
url
(
filename
)
analysis
()
os
.
system
(
"rm script/base/*.csv; rm script/base/*.xls;"
)
return
render
(
request
,
'core/simple_upload.html'
,
{
'uploaded_file_url'
:
uploaded_file_url
})
return
render
(
request
,
'core/simple_upload.html'
)
if
request
.
method
==
'POST'
and
request
.
FILES
[
'historico'
]
and
request
.
FILES
[
'matricula'
]:
submission
=
Submission
.
objects
.
create
(
author
=
request
.
user
)
submission
.
course
=
'21A'
fs
=
FileSystemStorage
(
location
=
submission
.
path
())
fs
.
save
(
'historico.xls'
,
request
.
FILES
[
'historico'
])
fs
.
save
(
'matricula.xls'
,
request
.
FILES
[
'matricula'
])
submission
.
historico
.
name
=
submission
.
path
()
+
'/historico.xls'
submission
.
matricula
.
name
=
submission
.
path
()
+
'/matricula.xls'
submission
.
save
()
return
render
(
request
,
'core/simple_upload.html'
)
def
model_form_upload
(
request
):
if
request
.
method
==
'POST'
:
form
=
DocumentForm
(
request
.
POST
,
request
.
FILES
)
if
form
.
is_valid
():
form
.
save
()
return
redirect
(
'home'
)
else
:
form
=
DocumentForm
()
return
render
(
request
,
'core/model_form_upload.html'
,
{
'form'
:
form
})
if
request
.
method
==
'POST'
:
form
=
DocumentForm
(
request
.
POST
,
request
.
FILES
)
if
form
.
is_valid
():
form
.
save
()
return
redirect
(
'home'
)
else
:
form
=
DocumentForm
()
return
render
(
request
,
'core/model_form_upload.html'
,
{
'form'
:
form
})
src/uploads/forms.py
0 → 100644
View file @
2d0f7ecf
from
django
import
forms
from
.models
import
Submission
class
SubmissionForm
(
forms
.
ModelForm
):
class
Meta
:
model
=
Submission
\ No newline at end of file
src/uploads/models.py
0 → 100644
View file @
2d0f7ecf
from
django.db
import
models
from
django.contrib.auth.models
import
User
from
django.utils
import
timezone
from
os
import
path
from
django.conf
import
settings
def
get_path
(
instance
,
filename
):
return
'{}/{}/{}'
.
format
(
instance
.
course
,
instance
.
id
,
filename
)
class
Submission
(
models
.
Model
):
author
=
models
.
ForeignKey
(
User
)
historico
=
models
.
FileField
(
upload_to
=
get_path
)
matricula
=
models
.
FileField
(
upload_to
=
get_path
)
course
=
models
.
CharField
(
max_length
=
10
,
default
=
'21A'
)
timestamp
=
models
.
DateTimeField
(
default
=
timezone
.
now
)
last
=
models
.
BooleanField
(
default
=
True
)
processed
=
models
.
BooleanField
(
default
=
False
)
def
path
(
self
):
return
path
.
join
(
settings
.
MEDIA_ROOT
,
self
.
course
,
str
(
self
.
id
))
def
__str__
(
self
):
return
'Submission (from: {}, to: {}, on: {})'
.
format
(
self
.
author
.
first_name
,
self
.
course
,
self
.
timestamp
)
src/uploads/settings.py
View file @
2d0f7ecf
...
...
@@ -38,7 +38,8 @@ INSTALLED_APPS = [
'django.contrib.messages'
,
'django.contrib.staticfiles'
,
'uploads.core'
'uploads.core'
,
'uploads'
]
MIDDLEWARE_CLASSES
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment