From 25502f9cded48dc8d78aa976c2a2b10b664fe5a7 Mon Sep 17 00:00:00 2001
From: Cristian Weiland <cw14@inf.ufpr.br>
Date: Tue, 14 Feb 2017 11:59:12 -0200
Subject: [PATCH] Improve directory tree, fix README and scripts and improve
 documentation

Signed-off-by: Cristian Weiland <cw14@inf.ufpr.br>
---
 .gitignore                                    |   4 +-
 README                                        |  42 ++++--
 insert_register_payment.sh                    |  11 --
 resumo_cadastro.sh                            |  31 -----
 scripts/curl/get.sh                           |  11 ++
 scripts/{ => curl}/rename_index.sh            |   2 +
 scripts/expenses/add_expenses.sh              |  58 --------
 scripts/expenses/create_expenses_config.py    |   8 +-
 scripts/expenses/insert_expenses.sh           |  13 +-
 scripts/expenses/process_expenses.sh          | 130 ++++++++----------
 scripts/expenses/resume_expenses.sh           |  16 ++-
 scripts/expenses/unzip.sh                     |  26 +++-
 scripts/expenses/unzipCaller.sh               | 122 ++++++++--------
 .../workers/add_registers.sh                  |   5 +
 .../workers/config.json.example               |   0
 .../workers/create_config.py                  |  16 ++-
 scripts/workers/insert_register_payment.sh    |  20 +++
 .../workers/logstash_config.example           |   0
 .../workers/logstash_config_2013.example      |   0
 .../workers/merge_files_es.py                 |  39 +++---
 scripts/workers/resume_register.sh            |  39 ++++++
 21 files changed, 306 insertions(+), 287 deletions(-)
 delete mode 100755 insert_register_payment.sh
 delete mode 100755 resumo_cadastro.sh
 create mode 100755 scripts/curl/get.sh
 rename scripts/{ => curl}/rename_index.sh (77%)
 delete mode 100755 scripts/expenses/add_expenses.sh
 rename add_registers.sh => scripts/workers/add_registers.sh (87%)
 rename config.json.example => scripts/workers/config.json.example (100%)
 rename create_config.py => scripts/workers/create_config.py (51%)
 create mode 100755 scripts/workers/insert_register_payment.sh
 rename logstash_config.example => scripts/workers/logstash_config.example (100%)
 rename logstash_config_2013.example => scripts/workers/logstash_config_2013.example (100%)
 rename merge_files_es.py => scripts/workers/merge_files_es.py (67%)
 create mode 100755 scripts/workers/resume_register.sh

diff --git a/.gitignore b/.gitignore
index 074d019..4f351d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,2 @@
-Dados_Servidores
-logstash_configs
+data
 configs
-Favorecidos
diff --git a/README b/README
index ee645d2..29b3761 100644
--- a/README
+++ b/README
@@ -1,17 +1,33 @@
 Projeto utilizando ElasticSearch + Kibana na tentativa de auxiliar a transparÃªncia da Universidade Federal do ParanÃ¡ (UFPR).
 
-Ãrvore de DiretÃ³rios:
+O projeto contÃ©m dados referentes a:
+- Servidores da UFPR
+- Gastos Diretos da UFPR
 
+Ãrvore de DiretÃ³rios:
 .
-â”œâ”€â”€ Dados_Servidores - ContÃ©m uma diretÃ³rio para cada mÃªs.
-â”‚Â Â  â”œâ”€â”€ 2016-12 - DiretÃ³rio que contÃ©m os CSVs referentes a Dezembro de 2016.
-â”‚Â Â  â”œâ”€â”€ 2016-11 - DiretÃ³rio que contÃ©m os CSVs referentes a Novembro de 2016.
-â”‚Â Â  â”œâ”€â”€ ...
-â”‚Â Â  â””â”€â”€ Processados - DiretÃ³rio que contÃ©m CSVs resultantes da uniÃ£o de CSVs do portal transparÃªncia.
-â”œâ”€â”€ config.json.example - Exemplo de arquivo do diretÃ³rio 'configs'.
-â”œâ”€â”€ logstash_configs - DiretÃ³rio com arquivos de configuraÃ§Ã£o do Logstash para inserÃ§Ã£o de dados no Kibana/ElasticSearch.
-â”œâ”€â”€ logstash_config.example - Exemplo de arquivo do diretÃ³rio 'logstash_configs'. Ã‰ usado pelo script 'create_config.py' para gerar o arquivo de configuraÃ§Ã£o do logstash.
-â”œâ”€â”€ create_config.py - Script que cria arquivos de configuraÃ§Ã£o que ficam contidos nos diretÃ³rios 'configs' e 'logstash_configs'.
-â”œâ”€â”€ resumo_cadastro.sh - Script que filtra dados do CSV de Cadastro do Portal TransparÃªncia, selecionando dados das Universidades interessantes para este projeto.
-â”œâ”€â”€ merge_files_es.py - Script que usa um arquivo de configuraÃ§Ã£o do diretÃ³rio 'configs' para unir dois CSVs (Cadastro e RemuneraÃ§Ã£o) do portal transparÃªncia em um sÃ³ e salvÃ¡-lo no diretÃ³rio Dados_Servidores/Processados
-â””â”€â”€ insert_data.sh - Script que gerencia os outros scripts.
+â”œâ”€â”€ data - ContÃ©m uma diretÃ³rio para cada mÃªs.
+â”‚Â Â  â”œâ”€â”€ workers - ContÃ©m dados de Servidores do Portal TransparÃªncia (obtidos em http://www.portaldatransparencia.gov.br/downloads/servidores.asp#exercicios2016).
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ 2016-12 - DiretÃ³rio que contÃ©m os CSVs referentes a Dezembro de 2016.
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ 2016-11 - DiretÃ³rio que contÃ©m os CSVs referentes a Novembro de 2016.
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ ...
+â”‚Â Â  â”‚Â Â  â””â”€â”€ processed - DiretÃ³rio que contÃ©m CSVs resultantes da uniÃ£o de CSVs do portal transparÃªncia.
+â”‚Â Â  â”‚
+â”‚Â Â  â””â”€â”€ expenses - ContÃ©m dados de Gastos do Portal TransparÃªncia (obtidos em http://www.portaldatransparencia.gov.br/downloads/servidores.asp#exercicios2016).
+â”‚Â Â   Â Â  â”œâ”€â”€ 2016-12 - DiretÃ³rio que contÃ©m os CSVs referentes a Dezembro de 2016.
+â”‚Â Â   Â Â  â”œâ”€â”€ 2016-11 - DiretÃ³rio que contÃ©m os CSVs referentes a Novembro de 2016.
+â”‚Â Â   Â Â  â”œâ”€â”€ ...
+â”‚Â Â   Â Â  â””â”€â”€ processed - DiretÃ³rio que contÃ©m CSVs resultantes da uniÃ£o de CSVs do portal transparÃªncia.
+â”‚
+â”œâ”€â”€ configs - ContÃ©m arquivos de configuraÃ§Ã£o gerados por scripts.
+â”‚Â Â  â”œâ”€â”€ workers - ContÃ©m arquivos de configuraÃ§Ã£o de Servidores.
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ JSON - ContÃ©m arquivos de configuraÃ§Ã£o no formato JSON, usados pelo script merge_files_es.py.
+â”‚Â Â  â”‚Â Â  â””â”€â”€ logstash - ContÃ©m arquivos de configuraÃ§Ã£o para o logstash.
+â”‚Â Â  â”‚
+â”‚Â Â  â””â”€â”€ expenses - ContÃ©m arquivos de configuraÃ§Ã£o de Gastos.
+â”‚Â Â   Â Â  â””â”€â”€ logstash - ContÃ©m arquivos de configuraÃ§Ã£o para o logstash.
+â”‚
+â””â”€â”€ scripts - ContÃ©m scripts que auxiliam no projeto.
+ Â Â  â”œâ”€â”€ workers - ContÃ©m scripts para gerenciar dados de Servidores.
+ Â Â  â”œâ”€â”€ expenses - ContÃ©m scripts para gerenciar dados de Gastos.
+ Â Â  â””â”€â”€ curl - ContÃ©m scripts que auxiliam na utilizaÃ§Ã£o do curl.
diff --git a/insert_register_payment.sh b/insert_register_payment.sh
deleted file mode 100755
index 342fe79..0000000
--- a/insert_register_payment.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-if [ "$#" -ne 5 ]; then
-	echo "Usage: $0 <year> <month> <day> <user> <password>"
-	echo "Example: $0 2016 12 01"
-	exit
-fi
-
-./create_config.py $1 $2 $3 $4 $5
-./merge_files_es.py configs/config-${1}-${2}.json
-logstash -f logstash_configs/config-${1}-${2} < ~/transparencia/Dados_Servidores/Processados/${1}${2}.csv
diff --git a/resumo_cadastro.sh b/resumo_cadastro.sh
deleted file mode 100755
index 532d791..0000000
--- a/resumo_cadastro.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-# Setembro 2016
-path=$1
-date=$2
-
-if [ "$#" -ne 2 ]; then
-	echo "Usage: $0 <path> <date>"
-	exit
-fi
-
-echo "Processing data with args = ${path} and ${date}"
-
-input="${path}${date}_Cadastro.csv"
-output="${path}${date}_Cadastro_Ufpr_Unique.csv"
-
-columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42"
-
-# About this command:
-# - Sed wraps fields in double quotes.
-# - Grep removes everyone that does not work in UFPR.
-# - Cut selects the important columns.
-# - Uniq removes repeated values.
-# - Tr removes null characters (ctrl + @).
-
-# Get data from all universities.
-#cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL	NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output
-
-# Get data only from UFPR, and wraps it in double quotes (").
-# cat $input | egrep --binary-files=text "(UNIVERSIDADE FEDERAL DO PARANA|Id_SERVIDOR_PORTAL	NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output
-
-# Same as above, but does not wrap data in double quotes (").
-cat $input | egrep --binary-files=text "(UNIVERSIDADE FEDERAL DO PARANA|Id_SERVIDOR_PORTAL	NOME)" | tr -d '\000' > $output
diff --git a/scripts/curl/get.sh b/scripts/curl/get.sh
new file mode 100755
index 0000000..37afe71
--- /dev/null
+++ b/scripts/curl/get.sh
@@ -0,0 +1,11 @@
+# Input: The name of a file containing a curl query.
+# Output: The output for the curl query.
+
+if [ "$#" -ne 1 ]; then
+	echo "Usage: $0 <query-file>"
+	exit
+fi
+
+query=$(cat $1)
+
+echo curl -u cw14:123mudar -XGET node1.c3sl.ufpr.br:9200/ufpr-servidores-*/_search?pretty -d \'"$query"\'
diff --git a/scripts/rename_index.sh b/scripts/curl/rename_index.sh
similarity index 77%
rename from scripts/rename_index.sh
rename to scripts/curl/rename_index.sh
index 5fcdbc2..3afc4fb 100755
--- a/scripts/rename_index.sh
+++ b/scripts/curl/rename_index.sh
@@ -1,3 +1,5 @@
+# Input: Kibana/ElasticSearch's user and password and two index names: the script will rename the index with the first name to the second one.
+
 if [ "$#" -ne 3 ]; then
     echo "Usage: $0 <user:password> <old-index> <new-index>"
     echo "Example: $0 myuser:mypass ufpr-csv-2016-11 ufpr-servidores-2016-11"
diff --git a/scripts/expenses/add_expenses.sh b/scripts/expenses/add_expenses.sh
deleted file mode 100755
index 2435a53..0000000
--- a/scripts/expenses/add_expenses.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-if [ "$#" -ne 2 ]; then
-	echo "Usage: $0 <user> <password>"
-	echo "Example: $0 myuser mypass"
-	exit
-fi
-
-./insert_expenses.sh 2016 11 30 $1 $2
-./insert_expenses.sh 2016 10 31 $1 $2
-./insert_expenses.sh 2016 09 30 $1 $2
-./insert_expenses.sh 2016 08 31 $1 $2
-./insert_expenses.sh 2016 07 31 $1 $2
-./insert_expenses.sh 2016 06 30 $1 $2
-./insert_expenses.sh 2016 05 31 $1 $2
-./insert_expenses.sh 2016 04 30 $1 $2
-./insert_expenses.sh 2016 03 31 $1 $2
-./insert_expenses.sh 2016 02 29 $1 $2
-./insert_expenses.sh 2016 01 31 $1 $2
-
-./insert_expenses.sh 2015 12 31 $1 $2
-./insert_expenses.sh 2015 11 30 $1 $2
-./insert_expenses.sh 2015 10 31 $1 $2
-./insert_expenses.sh 2015 09 30 $1 $2
-./insert_expenses.sh 2015 08 31 $1 $2
-./insert_expenses.sh 2015 07 31 $1 $2
-./insert_expenses.sh 2015 06 30 $1 $2
-./insert_expenses.sh 2015 05 31 $1 $2
-./insert_expenses.sh 2015 04 30 $1 $2
-./insert_expenses.sh 2015 03 31 $1 $2
-./insert_expenses.sh 2015 02 28 $1 $2
-./insert_expenses.sh 2015 01 31 $1 $2
-
-./insert_expenses.sh 2014 12 31 $1 $2
-./insert_expenses.sh 2014 11 30 $1 $2
-./insert_expenses.sh 2014 10 31 $1 $2
-./insert_expenses.sh 2014 09 30 $1 $2
-./insert_expenses.sh 2014 08 31 $1 $2
-./insert_expenses.sh 2014 07 31 $1 $2
-./insert_expenses.sh 2014 06 30 $1 $2
-./insert_expenses.sh 2014 05 31 $1 $2
-./insert_expenses.sh 2014 04 30 $1 $2
-./insert_expenses.sh 2014 03 31 $1 $2
-./insert_expenses.sh 2014 02 28 $1 $2
-./insert_expenses.sh 2014 01 31 $1 $2
-
-./insert_expenses.sh 2013 12 31 $1 $2
-./insert_expenses.sh 2013 11 30 $1 $2
-./insert_expenses.sh 2013 10 31 $1 $2
-./insert_expenses.sh 2013 09 30 $1 $2
-./insert_expenses.sh 2013 08 31 $1 $2
-./insert_expenses.sh 2013 07 31 $1 $2
-./insert_expenses.sh 2013 06 30 $1 $2
-./insert_expenses.sh 2013 05 31 $1 $2
-./insert_expenses.sh 2013 04 30 $1 $2
-./insert_expenses.sh 2013 03 31 $1 $2
-./insert_expenses.sh 2013 02 28 $1 $2
-./insert_expenses.sh 2013 01 31 $1 $2
diff --git a/scripts/expenses/create_expenses_config.py b/scripts/expenses/create_expenses_config.py
index 3f6ac7f..a40f907 100755
--- a/scripts/expenses/create_expenses_config.py
+++ b/scripts/expenses/create_expenses_config.py
@@ -1,5 +1,11 @@
 #!/usr/bin/env python3
 
+# WARNING: This script should not be called directly. Look at 'insert_expenses.sh' before calling this script.
+
+# This script is used to create a Logstash Config file.
+
+# Input: year, month and day, ElasticSearch's username and password.
+
 import sys, csv, json, math, subprocess
 from pathlib import Path
 from subprocess import call
@@ -16,5 +22,5 @@ output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.ar
 					 , "user": sys.argv[4]
 					 , "password": sys.argv[5] }
 
-with open('logstash_configs/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile:
+with open('../../configs/expenses/logstash/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile:
 	outfile.write(output)
diff --git a/scripts/expenses/insert_expenses.sh b/scripts/expenses/insert_expenses.sh
index 900c967..862b9ae 100755
--- a/scripts/expenses/insert_expenses.sh
+++ b/scripts/expenses/insert_expenses.sh
@@ -1,5 +1,15 @@
 #!/bin/bash
 
+# This script is the one that should be called to insert data from one month.
+
+# Input: Year, month and day from the data to be inserted, ElasticSearch's user and password.
+# Example: ./insert_expenses.sh 2016 10 31 myuser mypass
+# It has 3 steps:
+#   1- Generate logstash config file via create_expenses_config.py.
+#   2- Generate CSV with only UFPR data via resume_expenses.sh, which is stored in transparencia/data/expenses/processed/year-month.csv
+#   3- Insert data in ElasticSearch via logstash, using the config file created and the CSV created by resume_expenses.sh.
+# Output: The commands/scripts outputs.
+
 if [ "$#" -ne 5 ]; then
 	echo "Usage: $0 <year> <month> <day> <user> <password>"
 	echo "Example: $0 2016 12 31 myuser mypass"
@@ -7,4 +17,5 @@ if [ "$#" -ne 5 ]; then
 fi
 
 ./create_expenses_config.py $1 $2 $3 $4 $5
-logstash -f logstash_configs/config-${1}-${2} < ~/transparencia/Favorecidos/Processados/${1}${2}.csv
+./resume_expenses.sh ../../data/expenses/ ${1}-${2}
+logstash -f ../../configs/expenses/logstash/config-${1}-${2} < ../../data/expenses/processed/${1}${2}.csv
diff --git a/scripts/expenses/process_expenses.sh b/scripts/expenses/process_expenses.sh
index 1e3ca45..b158da8 100755
--- a/scripts/expenses/process_expenses.sh
+++ b/scripts/expenses/process_expenses.sh
@@ -1,76 +1,64 @@
-./resumo_gastos.sh ../../Favorecidos/ 2016-11
-./resumo_gastos.sh ../../Favorecidos/ 2016-10
-./resumo_gastos.sh ../../Favorecidos/ 2016-09
-./resumo_gastos.sh ../../Favorecidos/ 2016-08
-./resumo_gastos.sh ../../Favorecidos/ 2016-07
-./resumo_gastos.sh ../../Favorecidos/ 2016-06
-./resumo_gastos.sh ../../Favorecidos/ 2016-05
-./resumo_gastos.sh ../../Favorecidos/ 2016-04
-./resumo_gastos.sh ../../Favorecidos/ 2016-03
-./resumo_gastos.sh ../../Favorecidos/ 2016-02
-./resumo_gastos.sh ../../Favorecidos/ 2016-01
+#!/bin/bash
 
-./resumo_gastos.sh ../../Favorecidos/ 2015-12
-./resumo_gastos.sh ../../Favorecidos/ 2015-11
-./resumo_gastos.sh ../../Favorecidos/ 2015-10
-./resumo_gastos.sh ../../Favorecidos/ 2015-09
-./resumo_gastos.sh ../../Favorecidos/ 2015-08
-./resumo_gastos.sh ../../Favorecidos/ 2015-07
-./resumo_gastos.sh ../../Favorecidos/ 2015-06
-./resumo_gastos.sh ../../Favorecidos/ 2015-05
-./resumo_gastos.sh ../../Favorecidos/ 2015-04
-./resumo_gastos.sh ../../Favorecidos/ 2015-03
-./resumo_gastos.sh ../../Favorecidos/ 2015-02
-./resumo_gastos.sh ../../Favorecidos/ 2015-01
+# WARNING: This script should not be called unless the database is erased. Its still here for 2 reasons:
+# 1- Log: To know what months of data have been inserted.
+# 2- Example: To give example of how to call script insert_expenses.sh.
 
-./resumo_gastos.sh ../../Favorecidos/ 2014-12
-./resumo_gastos.sh ../../Favorecidos/ 2014-11
-./resumo_gastos.sh ../../Favorecidos/ 2014-10
-./resumo_gastos.sh ../../Favorecidos/ 2014-09
-./resumo_gastos.sh ../../Favorecidos/ 2014-08
-./resumo_gastos.sh ../../Favorecidos/ 2014-07
-./resumo_gastos.sh ../../Favorecidos/ 2014-06
-./resumo_gastos.sh ../../Favorecidos/ 2014-05
-./resumo_gastos.sh ../../Favorecidos/ 2014-04
-./resumo_gastos.sh ../../Favorecidos/ 2014-03
-./resumo_gastos.sh ../../Favorecidos/ 2014-02
-./resumo_gastos.sh ../../Favorecidos/ 2014-01
+# This script only calls insert_expenses for all years and months.
 
-./resumo_gastos.sh ../../Favorecidos/ 2013-12
-./resumo_gastos.sh ../../Favorecidos/ 2013-11
-./resumo_gastos.sh ../../Favorecidos/ 2013-10
-./resumo_gastos.sh ../../Favorecidos/ 2013-09
-./resumo_gastos.sh ../../Favorecidos/ 2013-08
-./resumo_gastos.sh ../../Favorecidos/ 2013-07
-./resumo_gastos.sh ../../Favorecidos/ 2013-06
-./resumo_gastos.sh ../../Favorecidos/ 2013-05
-./resumo_gastos.sh ../../Favorecidos/ 2013-04
-./resumo_gastos.sh ../../Favorecidos/ 2013-03
-./resumo_gastos.sh ../../Favorecidos/ 2013-02
-./resumo_gastos.sh ../../Favorecidos/ 2013-01
+if [ "$#" -ne 2 ]; then
+	echo "Usage: $0 <user> <password>"
+	echo "Example: $0 myuser mypass"
+	exit
+fi
 
-./resumo_gastos.sh ../../Favorecidos/ 2012-12
-./resumo_gastos.sh ../../Favorecidos/ 2012-11
-./resumo_gastos.sh ../../Favorecidos/ 2012-10
-./resumo_gastos.sh ../../Favorecidos/ 2012-09
-./resumo_gastos.sh ../../Favorecidos/ 2012-08
-./resumo_gastos.sh ../../Favorecidos/ 2012-07
-./resumo_gastos.sh ../../Favorecidos/ 2012-06
-./resumo_gastos.sh ../../Favorecidos/ 2012-05
-./resumo_gastos.sh ../../Favorecidos/ 2012-04
-./resumo_gastos.sh ../../Favorecidos/ 2012-03
-./resumo_gastos.sh ../../Favorecidos/ 2012-02
-./resumo_gastos.sh ../../Favorecidos/ 2012-01
+./insert_expenses.sh 2016 11 30 $1 $2
+./insert_expenses.sh 2016 10 31 $1 $2
+./insert_expenses.sh 2016 09 30 $1 $2
+./insert_expenses.sh 2016 08 31 $1 $2
+./insert_expenses.sh 2016 07 31 $1 $2
+./insert_expenses.sh 2016 06 30 $1 $2
+./insert_expenses.sh 2016 05 31 $1 $2
+./insert_expenses.sh 2016 04 30 $1 $2
+./insert_expenses.sh 2016 03 31 $1 $2
+./insert_expenses.sh 2016 02 29 $1 $2
+./insert_expenses.sh 2016 01 31 $1 $2
 
-./resumo_gastos.sh ../../Favorecidos/ 2011-12
-./resumo_gastos.sh ../../Favorecidos/ 2011-11
-./resumo_gastos.sh ../../Favorecidos/ 2011-10
-./resumo_gastos.sh ../../Favorecidos/ 2011-09
-./resumo_gastos.sh ../../Favorecidos/ 2011-08
-./resumo_gastos.sh ../../Favorecidos/ 2011-07
-./resumo_gastos.sh ../../Favorecidos/ 2011-06
-./resumo_gastos.sh ../../Favorecidos/ 2011-05
-./resumo_gastos.sh ../../Favorecidos/ 2011-04
-./resumo_gastos.sh ../../Favorecidos/ 2011-03
-./resumo_gastos.sh ../../Favorecidos/ 2011-02
-./resumo_gastos.sh ../../Favorecidos/ 2011-01
+./insert_expenses.sh 2015 12 31 $1 $2
+./insert_expenses.sh 2015 11 30 $1 $2
+./insert_expenses.sh 2015 10 31 $1 $2
+./insert_expenses.sh 2015 09 30 $1 $2
+./insert_expenses.sh 2015 08 31 $1 $2
+./insert_expenses.sh 2015 07 31 $1 $2
+./insert_expenses.sh 2015 06 30 $1 $2
+./insert_expenses.sh 2015 05 31 $1 $2
+./insert_expenses.sh 2015 04 30 $1 $2
+./insert_expenses.sh 2015 03 31 $1 $2
+./insert_expenses.sh 2015 02 28 $1 $2
+./insert_expenses.sh 2015 01 31 $1 $2
+
+./insert_expenses.sh 2014 12 31 $1 $2
+./insert_expenses.sh 2014 11 30 $1 $2
+./insert_expenses.sh 2014 10 31 $1 $2
+./insert_expenses.sh 2014 09 30 $1 $2
+./insert_expenses.sh 2014 08 31 $1 $2
+./insert_expenses.sh 2014 07 31 $1 $2
+./insert_expenses.sh 2014 06 30 $1 $2
+./insert_expenses.sh 2014 05 31 $1 $2
+./insert_expenses.sh 2014 04 30 $1 $2
+./insert_expenses.sh 2014 03 31 $1 $2
+./insert_expenses.sh 2014 02 28 $1 $2
+./insert_expenses.sh 2014 01 31 $1 $2
+
+./insert_expenses.sh 2013 12 31 $1 $2
+./insert_expenses.sh 2013 11 30 $1 $2
+./insert_expenses.sh 2013 10 31 $1 $2
+./insert_expenses.sh 2013 09 30 $1 $2
+./insert_expenses.sh 2013 08 31 $1 $2
+./insert_expenses.sh 2013 07 31 $1 $2
+./insert_expenses.sh 2013 06 30 $1 $2
+./insert_expenses.sh 2013 05 31 $1 $2
+./insert_expenses.sh 2013 04 30 $1 $2
+./insert_expenses.sh 2013 03 31 $1 $2
+./insert_expenses.sh 2013 02 28 $1 $2
+./insert_expenses.sh 2013 01 31 $1 $2
diff --git a/scripts/expenses/resume_expenses.sh b/scripts/expenses/resume_expenses.sh
index 319a7e0..fa89c3e 100755
--- a/scripts/expenses/resume_expenses.sh
+++ b/scripts/expenses/resume_expenses.sh
@@ -1,5 +1,13 @@
-# Setembro 2016
-# Path example: ../../Favorecidos/
+#!/bin/bash
+
+# WARNING: This script should not be called directly. Look at 'insert_expenses.sh' before calling this script.
+
+# Input: First parameter is the path to data files and the second one is the date in the name of the files. Data files can be found in: http://transparencia.gov.br/downloads/mensal.asp?c=GastosDiretos
+# Example: ./resume_expenses.sh ../../data/expenses/ 2016-11
+
+# Output: A CSV file in folder processed, filtering the data to get only relevant data (in our case, from UFPR).
+
+# Path example: ../../data/expenses/
 path=$1
 # Date example: 2016-11
 date=$2
@@ -13,8 +21,8 @@ fi
 
 echo "Processing data with args = $path and ${date}"
 
-input="${path}${date}/${dateWithoutHyphen}_GastosDiretos.csv"
-output="${path}/Processados/${dateWithoutHyphen}.csv"
+input="${path}${date}/AplicacoesDiretasDespesaOrgao.csv"
+output="${path}processed/${dateWithoutHyphen}.csv"
 
 # About this command:
 # - Grep removes everyone that does not work in UFPR.
diff --git a/scripts/expenses/unzip.sh b/scripts/expenses/unzip.sh
index 922585b..eb5696e 100755
--- a/scripts/expenses/unzip.sh
+++ b/scripts/expenses/unzip.sh
@@ -1,9 +1,21 @@
-echo Running with args $1 and $2
+#!/bin/bash
 
-path="../../Favorecidos/"
+# This scripts gets a zip file in ~/Downloads, moves it to a folder in path (probably transparencia/data/expenses), unzips it and removes the zip file.
 
-mkdir ${path}$1
-mv ~/Downloads/$2_GastosDiretos.zip ${path}$1
-unzip ${path}$1/$2_GastosDiretos.zip
-mv ${path}$2_GastosDiretos.csv ${path}$1
-rm ${path}$1/$2_GastosDiretos.zip
+# Input: Date (year and month, separated by hyphen).
+# Ex: ./unzip.sh 2015-12 201512
+
+if [ "$#" -ne 2 ]; then
+	echo "Usage $0 <date>"
+	exit
+fi
+
+date=$1
+path="../../data/expenses/"
+dateWithoutHyphen=${date//-}
+
+mkdir $path$date
+mv ~/Downloads/$dateWithoutHyphen_GastosDiretos.zip $path$date
+unzip $path$date/$dateWithoutHyphen_GastosDiretos.zip
+mv $path$dateWithoutHyphen_GastosDiretos.csv $path$date
+rm $path$date/$dateWithoutHyphen_GastosDiretos.zip
diff --git a/scripts/expenses/unzipCaller.sh b/scripts/expenses/unzipCaller.sh
index 4d49afe..ec6088a 100755
--- a/scripts/expenses/unzipCaller.sh
+++ b/scripts/expenses/unzipCaller.sh
@@ -1,66 +1,68 @@
 #!/bin/bash
 
-./unzip.sh 2015-12 201512
-./unzip.sh 2015-11 201511
-./unzip.sh 2015-10 201510
-./unzip.sh 2015-09 201509
-./unzip.sh 2015-08 201508
-./unzip.sh 2015-07 201507
-./unzip.sh 2015-06 201506
-./unzip.sh 2015-05 201505
-./unzip.sh 2015-04 201504
-./unzip.sh 2015-03 201503
-./unzip.sh 2015-02 201502
-./unzip.sh 2015-01 201501
+# This script only calls unzip.sh for all months.
 
-./unzip.sh 2014-12 201412
-./unzip.sh 2014-11 201411
-./unzip.sh 2014-10 201410
-./unzip.sh 2014-09 201409
-./unzip.sh 2014-08 201408
-./unzip.sh 2014-07 201407
-./unzip.sh 2014-06 201406
-./unzip.sh 2014-05 201405
-./unzip.sh 2014-04 201404
-./unzip.sh 2014-03 201403
-./unzip.sh 2014-02 201402
-./unzip.sh 2014-01 201401
+./unzip.sh 2015-12
+./unzip.sh 2015-11
+./unzip.sh 2015-10
+./unzip.sh 2015-09
+./unzip.sh 2015-08
+./unzip.sh 2015-07
+./unzip.sh 2015-06
+./unzip.sh 2015-05
+./unzip.sh 2015-04
+./unzip.sh 2015-03
+./unzip.sh 2015-02
+./unzip.sh 2015-01
 
-./unzip.sh 2013-12 201312
-./unzip.sh 2013-11 201311
-./unzip.sh 2013-10 201310
-./unzip.sh 2013-09 201309
-./unzip.sh 2013-08 201308
-./unzip.sh 2013-07 201307
-./unzip.sh 2013-06 201306
-./unzip.sh 2013-05 201305
-./unzip.sh 2013-04 201304
-./unzip.sh 2013-03 201303
-./unzip.sh 2013-02 201302
-./unzip.sh 2013-01 201301
+./unzip.sh 2014-12
+./unzip.sh 2014-11
+./unzip.sh 2014-10
+./unzip.sh 2014-09
+./unzip.sh 2014-08
+./unzip.sh 2014-07
+./unzip.sh 2014-06
+./unzip.sh 2014-05
+./unzip.sh 2014-04
+./unzip.sh 2014-03
+./unzip.sh 2014-02
+./unzip.sh 2014-01
 
-./unzip.sh 2012-12 201212
-./unzip.sh 2012-11 201211
-./unzip.sh 2012-10 201210
-./unzip.sh 2012-09 201209
-./unzip.sh 2012-08 201208
-./unzip.sh 2012-07 201207
-./unzip.sh 2012-06 201206
-./unzip.sh 2012-05 201205
-./unzip.sh 2012-04 201204
-./unzip.sh 2012-03 201203
-./unzip.sh 2012-02 201202
-./unzip.sh 2012-01 201201
+./unzip.sh 2013-12
+./unzip.sh 2013-11
+./unzip.sh 2013-10
+./unzip.sh 2013-09
+./unzip.sh 2013-08
+./unzip.sh 2013-07
+./unzip.sh 2013-06
+./unzip.sh 2013-05
+./unzip.sh 2013-04
+./unzip.sh 2013-03
+./unzip.sh 2013-02
+./unzip.sh 2013-01
 
-./unzip.sh 2011-12 201112
-./unzip.sh 2011-11 201111
-./unzip.sh 2011-10 201110
-./unzip.sh 2011-09 201109
-./unzip.sh 2011-08 201108
-./unzip.sh 2011-07 201107
-./unzip.sh 2011-06 201106
-./unzip.sh 2011-05 201105
-./unzip.sh 2011-04 201104
-./unzip.sh 2011-03 201103
-./unzip.sh 2011-02 201102
-./unzip.sh 2011-01 201101
+./unzip.sh 2012-12
+./unzip.sh 2012-11
+./unzip.sh 2012-10
+./unzip.sh 2012-09
+./unzip.sh 2012-08
+./unzip.sh 2012-07
+./unzip.sh 2012-06
+./unzip.sh 2012-05
+./unzip.sh 2012-04
+./unzip.sh 2012-03
+./unzip.sh 2012-02
+./unzip.sh 2012-01
+
+./unzip.sh 2011-12
+./unzip.sh 2011-11
+./unzip.sh 2011-10
+./unzip.sh 2011-09
+./unzip.sh 2011-08
+./unzip.sh 2011-07
+./unzip.sh 2011-06
+./unzip.sh 2011-05
+./unzip.sh 2011-04
+./unzip.sh 2011-03
+./unzip.sh 2011-02
+./unzip.sh 2011-01
diff --git a/add_registers.sh b/scripts/workers/add_registers.sh
similarity index 87%
rename from add_registers.sh
rename to scripts/workers/add_registers.sh
index 57be767..90a236e 100755
--- a/add_registers.sh
+++ b/scripts/workers/add_registers.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+# WARNING: This was used to insert data, and should not be executed again (unless someone deleted the whole database).
+
+# Input: Kibana/ElasticSearch's user and password.
+# Output: Nothing, if it executes correctly. It will insert csv's from http://www.portaldatransparencia.gov.br/downloads/servidores.asp, from 2013-01 to 2016-11.
+
 if [ "$#" -ne 2 ]; then
 	echo "Usage: $0 <user> <password>"
 	exit
diff --git a/config.json.example b/scripts/workers/config.json.example
similarity index 100%
rename from config.json.example
rename to scripts/workers/config.json.example
diff --git a/create_config.py b/scripts/workers/create_config.py
similarity index 51%
rename from create_config.py
rename to scripts/workers/create_config.py
index 7aa5709..5e5f74f 100755
--- a/create_config.py
+++ b/scripts/workers/create_config.py
@@ -1,5 +1,13 @@
 #!/usr/bin/env python3
 
+# WARNING: This script should not be called if you dont know what you're doing! Look for 'insert_register_payment.sh'.
+
+# Input: Year, month and day from a CSV file, username and password.
+# Ex (inserting data from file 20130930_Cadastro.csv): ./create_config.py 2013 09 30 myuser mypassword
+# Output: This script will create two config files:
+#    - JSON: This config will be used for script merge_files_es.py, and will be stored in transparencia/configs/workers/JSON, with its name being config-year-month.
+#    - Logstash: This config will be used by logstash to insert the resulting CSV from merge_files_es.py into ElasticSearch.
+
 import sys, csv, json, math, subprocess
 from pathlib import Path
 from subprocess import call
@@ -9,7 +17,7 @@ if len(sys.argv) != 6:
     sys.exit()
 
 data = {
-	"path": "Dados_Servidores/" + sys.argv[1] + "-" + sys.argv[2] + "/"
+	"path": "../../data/workers/" + sys.argv[1] + "-" + sys.argv[2] + "/"
 	, "date": sys.argv[1] + sys.argv[2] + sys.argv[3]
 	, "file1": "_Remuneracao.csv"
 	, "file2": "_Cadastro_Ufpr_Unique.csv"
@@ -18,10 +26,10 @@ data = {
 	, "quotechar": "\""
 	, "delimiter": "\t"
 	, "lineterminator": "\n"
-	, "outputFile": "Dados_Servidores/Processados/" + sys.argv[1] + sys.argv[2] + ".csv"
+	, "outputFile": "../../data/workers/processed/" + sys.argv[1] + sys.argv[2] + ".csv"
 }
 
-with open('configs/config-' + sys.argv[1] + '-' + sys.argv[2] + '.json', 'w') as outfile:
+with open('../../configs/workers/json/config-' + sys.argv[1] + '-' + sys.argv[2] + '.json', 'w') as outfile:
     json.dump(data, outfile, indent=4, sort_keys=True)
 
 if int(sys.argv[1]) <= 2014 or (int(sys.argv[1]) == 2015 and int(sys.argv[2]) <= 3):
@@ -36,5 +44,5 @@ output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.ar
 					 , "user": sys.argv[4]
 					 , "password": sys.argv[5] }
 
-with open('logstash_configs/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile:
+with open('../../configs/workers/logstash/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile:
 	outfile.write(output)
diff --git a/scripts/workers/insert_register_payment.sh b/scripts/workers/insert_register_payment.sh
new file mode 100755
index 0000000..24c3cf0
--- /dev/null
+++ b/scripts/workers/insert_register_payment.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Script to help using other scripts. Note that calling it to a data that has already been inserted will DUPLICATE it (which we probably dont want).
+# This scripts does 3 things:
+#   1- Create config files via create_config.py
+#   2- Merge CSV data and create a new CSV file via merge_files_es.py.
+#   3- Insert CSV file generated in step 2 into ElasticSearch via Logstash.
+# Input: Year, Month and Day from CSV file, ElasticSearch's user and password.
+# Example (inserting data from file 20130930_Cadastro.csv): ./insert_register_payment.sh 2013 09 30 myuser mypassword
+# Output: The same output as the scripts and commands called.
+
+if [ "$#" -ne 5 ]; then
+	echo "Usage: $0 <year> <month> <day> <user> <password>"
+	echo "Example: $0 2016 12 01 myuser mypassword"
+	exit
+fi
+
+./create_config.py $1 $2 $3 $4 $5
+./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json
+logstash -f ../../configs/workers/logstash/config-${1}-${2} < ../../data/workers/processed/${1}${2}.csv
diff --git a/logstash_config.example b/scripts/workers/logstash_config.example
similarity index 100%
rename from logstash_config.example
rename to scripts/workers/logstash_config.example
diff --git a/logstash_config_2013.example b/scripts/workers/logstash_config_2013.example
similarity index 100%
rename from logstash_config_2013.example
rename to scripts/workers/logstash_config_2013.example
diff --git a/merge_files_es.py b/scripts/workers/merge_files_es.py
similarity index 67%
rename from merge_files_es.py
rename to scripts/workers/merge_files_es.py
index 8cb0015..6dccc24 100755
--- a/merge_files_es.py
+++ b/scripts/workers/merge_files_es.py
@@ -1,27 +1,22 @@
 #!/usr/bin/env python3
 
-"""
-VersÃ£o feita visando inserÃ§Ã£o no ElasticSearch.
-Isso significa que eu vou escrever TODOS os dados que eu achar do segundo arquivo, mas do primeiro arquivo sÃ³ escrevo os que estiverem no segundo.
+# WARNING: This script should not be called if you dont know what you're doing! Look for 'insert_register_payment.sh'.
 
+# Script made to create a CSV that will be inserted in ElasticSearch.
+# This script is being used to merge two files: a Remuneration report (ex: 20161031_Remuneracao.csv) with a file that contains the Portal ID from UFPR people
+# (ex: 20161031_Cadastro_Ufpr_Unique.csv). This second file can be obtained filtering a Register report (ex: 20161031_Cadastro.csv) using resume_register.sh.
 
-Recebe como parÃ¢metro um arquivo de configuraÃ§Ã£o, no mesmo formato que o exemplo.
+# Input: A configuration file, in the same format as the example. This configuration file can be generated by create_config.py.
 
-DocumentaÃ§Ã£o do config.json.example:
-file1 and file2 are the files that will be merged.
-The variables that end with number 1 represent something in the first file and the ones that one with 2 represent the same thing in the second file.
-idColumn represent the common column in both files.
-columnsToAdd1 are the ids of the columns that will be printed in the output file.
-	We might want to add columns 4, 13, 16 and 22 in columnsToAdd2, but this does not work right now.
-delimiter is the CSV's delimiter.
-lineterminator is the CSV's line terminator.
-outputFile is the name of the output file.
-notFoundFile is the name of a file with errors: they represent columns that were in one file but not in the other. In this case, notFoundFile1 are the columns that are in the second file but not in t    he first file.
+# Documentation of config.json.example:
+# - Variables ending with number 1 represent something from the first file, while the ones that end with number 2 represent the same thing in the second file.
+# - File1 and File2 are the files that will be merged. File1 name is "*_Cadastro_Ufpr_Unique.csv", File2 name is "*_Remuneracao.csv".
+# - IdColumn1 and IdColumn2 represent the common column for each CSV (Ex: an ID column).
+# - Quotechar, Delimiter and LineTerminator are the CSV's quote char, delimiter and line terminator, respectively.
+# - OutputFile is the name of the output file (the result CSV).
 
-
-Nesse momento, ele tÃ¡ sendo usado pra unir dois arquivos: um relatÃ³rio de RemuneraÃ§Ã£o (ex: 201610_Remuneracao.csv) com um arquivo que contÃ©m o ID do portal das pessoas da UFPR.
-Esse segundo arquivo pode ser obtido a partir da filtragem do arquivo de Cadastros (ex: 201610_Cadastros.csv). A filtragem Ã© feita com o resumo_cadastro.sh.
-"""
+# Output: A CSV that will contain every row from the second file (*_Cadastro_Ufpr_Unique.csv). From the first file (*_Remuneracao.csv),
+# I will get only data thats in the second file as well. This means some people in our data will not have data from Remuneracao.csv.
 
 import sys, csv, json, math, subprocess
 from pathlib import Path
@@ -52,8 +47,8 @@ title1 = csv_1.pop(0)
 
 file_exists = Path(file2)
 if not file_exists.is_file():
-	print("File2 does not exist. Calling script to create it...")
-	call(["./resumo_cadastro.sh " +  params['path'] + " " + params['date']], shell=True)
+	print("File2 does not exist. Calling script resume_register to create it...")
+	call(["./resume_register.sh " +  params['path'] + " " + params['date']], shell=True)
 
 with open(file2, newline='', encoding='Windows-1252') as f:
     csv_2 = [ i for i in csv.reader(f, 'dialect') ]
@@ -96,7 +91,6 @@ print("Preparing data...")
 columns1 = len(csv_1[0])
 
 # Separate id_point from useless data in file 2 and append points in result array.
-# This for takes about 50% of the total time.
 
 for row2 in csv_2:
     count += 1
@@ -129,8 +123,7 @@ count = 0
 const = 50 / len(csv_1)
 
 print("Number of rows in file 2 but not in file 1: " + str(errors))
-
-print("Saving data to file result.csv...")
+print("Saving data...")
 
 with open(params['outputFile'], 'w', newline='') as csvfile:
     writer = csv.writer(csvfile, delimiter='\t')
diff --git a/scripts/workers/resume_register.sh b/scripts/workers/resume_register.sh
new file mode 100755
index 0000000..eb5cf9d
--- /dev/null
+++ b/scripts/workers/resume_register.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# WARNING: This script should not be called if you dont know what you're doing! Look for 'merge_files_es.py'.
+
+# This scripts purpose is to filter data and get only data related to UFPR.
+
+# Input: Path to data files and date from data files.
+# Example (inserting data from 2016-10): ./resume_register.sh ../../data/workers/2016-10/ 20161031
+
+# Output: CSV file named YearMonthDay_Cadastro_Ufpr_Unique.csv, in the $path folder.
+# Example of CSV location (using same parameters as input): ../../data/workers/2016-10/20161031_Cadastro_Ufpr_Unique.csv
+
+path=$1
+date=$2
+
+if [ "$#" -ne 2 ]; then
+	echo "Usage: $0 <path> <date>"
+	exit
+fi
+
+echo "Processing data with args = ${path} and ${date}"
+
+input="${path}${date}_Cadastro.csv"
+output="${path}${date}_Cadastro_Ufpr_Unique.csv"
+
+columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42"
+
+# About this command:
+# - Sed wraps fields in double quotes.
+# - Grep removes everyone that does not work in UFPR.
+# - Cut selects the important columns.
+# - Uniq removes repeated values.
+# - Tr removes null characters (ctrl + @).
+
+# Get data from all universities.
+# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL	NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output
+
+# Get only data from UFPR.
+cat $input | egrep --binary-files=text "(UNIVERSIDADE FEDERAL DO PARANA|Id_SERVIDOR_PORTAL	NOME)" | tr -d '\000' > $output
-- 
GitLab