Skip to content
Snippets Groups Projects
Commit 8c27487e authored by Cristian Weiland's avatar Cristian Weiland
Browse files

Merge branch 'issue/14' into 'master'

Issue #19: Add scripts to insert travel allowance data

See merge request !2
parents de18d131 5d2b38e9
No related branches found
No related tags found
No related merge requests found
The easiest way to insert travel allowance data is to use 'insert_travel_allowance.sh'.
Script's input: Year and month from the data to be inserted, ElasticSearch's user and password.
Example: ./insert_travel_allowance.sh 2016 10 myuser mypass
Example 2: ./insert_travel_allowance.sh 2014 11 myuser mypass
The other script's will be called by 'insert_travel_allowance.sh' correctly.
#!/usr/bin/env python3
# WARNING: This script should not be called directly. Look at 'insert_travel_allowance.sh' before calling this script.
# This script is used to create a Logstash Config file.
# Input: year, month and day, ElasticSearch's username and password.
import sys, csv, json, math, subprocess
from pathlib import Path
from subprocess import call
if len(sys.argv) != 6:
print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <username> <password>")
sys.exit()
with open('logstash_config.example') as infile:
example = infile.read()
output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00'
, "date": sys.argv[1] + '-' + sys.argv[2]
, "user": sys.argv[4]
, "password": sys.argv[5] }
with open('../../configs/travel_allowance/logstash/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile:
outfile.write(output)
ym=$(date +%Y%m -d "$(date +%Y%m15) next month")
temp=$(date -d "${ym}01")
d=$(date -d "$temp - 1 day" "+%d")
echo $d
#!/bin/bash
# This script is the one that should be called to insert data from one month.
# Input: Year, month and day from the data to be inserted, ElasticSearch's user and password. The day should be the last day of the month.
# Example: ./insert_travel_allowance.sh 2016 10 myuser mypass
# It has 4 steps:
# 1- Download files and put them in the right location.
# 2- Generate logstash config file via create_travel_allowance_config.py.
# 3- Generate a CSV with only UFPR data via resume_travel_allowance.sh, which is stored in transparencia/data/travel_allowance/processed/year-month.csv
# 4- Insert data in ElasticSearch via logstash, using the config file created and the CSV created by resume_travel_allowance.sh.
# Output: The commands/scripts outputs.
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypass"
exit
fi
# Getting the Last day of this month (Using date 2016-05-15 as example):
# First, get next month (201606).
aux=$(date +%Y%m -d "$(date +%Y%m15) next month")
# Append day 01 (20160601).
temp=$(date -d "${aux}01")
# Remove 1 day: 20160531, get only day: 31.
day=$(date -d "$temp - 1 day" "+%d")
ym=$1-$2
dataPath="../../data/"
path="../../data/travel_allowance/"
configPath="../../configs/travel_allowance/logstash/"
if [ ! -d "$dataPath" ]; then
mkdir "$dataPath"
fi
if [ ! -d "$path/processed" ]; then
mkdir -p "$path/processed"
fi
if [ ! -d "$configPath" ]; then
mkdir -p "$configPath"
fi
# Step 1:
# Create directory to store files
mkdir $path$ym
# Download files
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias'
curl -o $path$ym/${1}${2}_Diarias.zip $request -H 'Accept-Encoding: gzip, deflate, sdch' -H 'Accept-Language: en-US,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' -H 'Referer: http://transparencia.gov.br/downloads/mensal.asp?c=GastosDiretos' -H 'Cookie: ASPSESSIONIDAQRABSAD=OJDLNBCANLIDINCHJHELHHFB; ASPSESSIONIDAQSDCQAD=BOKBKPNCDKOBJKGAMMEKADFL; _ga=GA1.3.1927288562.1481545643; ASPSESSIONIDSCSBBTCD=IGJLJBBCEEJBGLOOJKGNMHBH' -H 'Connection: keep-alive' --compressed
# Unzip them
unzip $path$ym/${1}${2}_Diarias.zip -d $path$ym/
# Remove zip file
rm $path$ym/${1}${2}_Diarias.zip
# Step 2:
./create_travel_allowance_config.py $1 $2 $day $3 $4
# Step 3:
./resume_travel_allowance.sh $path ${1}-${2}
# Step 4:
logstash -f ../../configs/travel_allowance/logstash/config-${1}-${2} < ${path}processed/${1}${2}.csv
input {
stdin {
codec => plain {
charset => "Windows-1252"
}
}
}
filter {
csv {
columns => ["Código Órgão Superior","Nome Órgão Superior","Código Órgão","Nome Órgao","Código Unidade Gestora","Nome Unidade Gestora","Código Função","Nome Função","Código Subfunção","Nome Subfunção","Código Programa","Nome Programa","Código Ação","Nome Ação","Linguagem Cidadã","CPF Favorecido","Nome Favorecido","Número Documento","Gestão Pagamento","Data Pagamento","Valor"]
separator => " "
add_field => { "timestamp" => "%(timestamp)s" }
}
mutate {
convert => { "Código Órgão Superior" => "integer" }
convert => { "Código Órgão" => "integer" }
convert => { "Código Unidade Gestora" => "integer" }
convert => { "Código Grupo Despesa" => "integer" }
convert => { "Código Elemento Despesa" => "integer" }
convert => { "Código Função" => "integer" }
convert => { "Código Subfunção" => "integer" }
convert => { "Código Programa" => "integer" }
convert => { "Código Ação" => "integer" }
convert => { "Código Favorecido" => "integer" }
convert => { "Gestão Pagamento" => "integer" }
convert => { "Valor" => "float" }
}
date {
match => [ "timestamp", "dd/MM/YYYY HH:mm:ss", "ISO8601" ]
target => [ "@timestamp" ]
}
date {
match => [ "Data Pagamento", "dd/MM/YYYY" ]
target => [ "Data Pagamento Timestamp" ]
}
}
output {
elasticsearch {
action => "index"
user => "%(user)s"
password => "%(password)s"
hosts => "localhost:9200"
index => "ufpr-gastos-diarias-%(date)s"
workers => 1
}
stdout {}
}
#!/bin/bash
# WARNING: This script should not be called unless the database is erased. Its still here for 2 reasons:
# 1- Log: To know what months of data have been inserted.
# 2- Example: To give example of how to call script insert_travel_allowance.sh.
# This script only calls insert_travel_allowance for all years and months.
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <user> <password>"
echo "Example: $0 myuser mypass"
exit
fi
./insert_travel_allowance.sh 2016 11 $1 $2
./insert_travel_allowance.sh 2016 10 $1 $2
./insert_travel_allowance.sh 2016 09 $1 $2
./insert_travel_allowance.sh 2016 08 $1 $2
./insert_travel_allowance.sh 2016 07 $1 $2
./insert_travel_allowance.sh 2016 06 $1 $2
./insert_travel_allowance.sh 2016 05 $1 $2
./insert_travel_allowance.sh 2016 04 $1 $2
./insert_travel_allowance.sh 2016 03 $1 $2
./insert_travel_allowance.sh 2016 02 $1 $2
./insert_travel_allowance.sh 2016 01 $1 $2
./insert_travel_allowance.sh 2015 12 $1 $2
./insert_travel_allowance.sh 2015 11 $1 $2
./insert_travel_allowance.sh 2015 10 $1 $2
./insert_travel_allowance.sh 2015 09 $1 $2
./insert_travel_allowance.sh 2015 08 $1 $2
./insert_travel_allowance.sh 2015 07 $1 $2
./insert_travel_allowance.sh 2015 06 $1 $2
./insert_travel_allowance.sh 2015 05 $1 $2
./insert_travel_allowance.sh 2015 04 $1 $2
./insert_travel_allowance.sh 2015 03 $1 $2
./insert_travel_allowance.sh 2015 02 $1 $2
./insert_travel_allowance.sh 2015 01 $1 $2
./insert_travel_allowance.sh 2014 12 $1 $2
./insert_travel_allowance.sh 2014 11 $1 $2
./insert_travel_allowance.sh 2014 10 $1 $2
./insert_travel_allowance.sh 2014 09 $1 $2
./insert_travel_allowance.sh 2014 08 $1 $2
./insert_travel_allowance.sh 2014 07 $1 $2
./insert_travel_allowance.sh 2014 06 $1 $2
./insert_travel_allowance.sh 2014 05 $1 $2
./insert_travel_allowance.sh 2014 04 $1 $2
./insert_travel_allowance.sh 2014 03 $1 $2
./insert_travel_allowance.sh 2014 02 $1 $2
./insert_travel_allowance.sh 2014 01 $1 $2
./insert_travel_allowance.sh 2013 12 $1 $2
./insert_travel_allowance.sh 2013 11 $1 $2
./insert_travel_allowance.sh 2013 10 $1 $2
./insert_travel_allowance.sh 2013 09 $1 $2
./insert_travel_allowance.sh 2013 08 $1 $2
./insert_travel_allowance.sh 2013 07 $1 $2
./insert_travel_allowance.sh 2013 06 $1 $2
./insert_travel_allowance.sh 2013 05 $1 $2
./insert_travel_allowance.sh 2013 04 $1 $2
./insert_travel_allowance.sh 2013 03 $1 $2
./insert_travel_allowance.sh 2013 02 $1 $2
./insert_travel_allowance.sh 2013 01 $1 $2
#!/bin/bash
# WARNING: This script should not be called directly. Look at 'insert_travel_allowance.sh' before calling this script.
# Input: First parameter is the path to data files and the second one is the date in the name of the files. Data files can be found in: http://transparencia.gov.br/downloads/mensal.asp?c=Diarias
# Example: ./resume_travel_allowance.sh ../../data/travel_allowance/ 2016-11
# Output: A CSV file in folder processed, filtering the data to get only relevant data (in our case, from UFPR).
# Path example: ../../data/travel_allowance/
path=$1
# Date example: 2016-11
date=$2
# dateWithoutHyphen example: 201611
dateWithoutHyphen=${date//-}
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <path> <date>"
exit
fi
echo "Processing data with args = $path and ${date}"
input="${path}${date}/${dateWithoutHyphen}_Diarias.csv"
output="${path}processed/${dateWithoutHyphen}.csv"
# About this command:
# - Grep removes everyone that does not work in UFPR.
# - Tr removes null characters (ctrl + @).
# - Head -n1 gets first line (column names). Then, I append the data.
head -n1 $input > $output
cat $input | egrep --binary-files=text "UNIVERSIDADE FEDERAL DO PARANA" | tr -d '\000' >> $output
#!/bin/bash
# This scripts gets a zip file in ~/Downloads, moves it to a folder in path (probably transparencia/data/travel_allowance), unzips it and removes the zip file.
# Input: Date (year and month, separated by hyphen).
# Ex: ./unzip.sh 2015-12
if [ "$#" -ne 2 ]; then
echo "Usage $0 <date>"
exit
fi
date=$1
path="../../data/travel_allowance/"
dateWithoutHyphen=${date//-}
mkdir $path$date
mv ~/Downloads/${dateWithoutHyphen}_Diarias.zip $path$date
unzip $path$date/${dateWithoutHyphen}_Diarias.zip
mv $path${dateWithoutHyphen}_Diarias.csv $path$date
rm $path$date/${dateWithoutHyphen}_Diarias.zip
#!/bin/bash
# This script only calls unzip.sh for all months.
./unzip.sh 2015-12
./unzip.sh 2015-11
./unzip.sh 2015-10
./unzip.sh 2015-09
./unzip.sh 2015-08
./unzip.sh 2015-07
./unzip.sh 2015-06
./unzip.sh 2015-05
./unzip.sh 2015-04
./unzip.sh 2015-03
./unzip.sh 2015-02
./unzip.sh 2015-01
./unzip.sh 2014-12
./unzip.sh 2014-11
./unzip.sh 2014-10
./unzip.sh 2014-09
./unzip.sh 2014-08
./unzip.sh 2014-07
./unzip.sh 2014-06
./unzip.sh 2014-05
./unzip.sh 2014-04
./unzip.sh 2014-03
./unzip.sh 2014-02
./unzip.sh 2014-01
./unzip.sh 2013-12
./unzip.sh 2013-11
./unzip.sh 2013-10
./unzip.sh 2013-09
./unzip.sh 2013-08
./unzip.sh 2013-07
./unzip.sh 2013-06
./unzip.sh 2013-05
./unzip.sh 2013-04
./unzip.sh 2013-03
./unzip.sh 2013-02
./unzip.sh 2013-01
./unzip.sh 2012-12
./unzip.sh 2012-11
./unzip.sh 2012-10
./unzip.sh 2012-09
./unzip.sh 2012-08
./unzip.sh 2012-07
./unzip.sh 2012-06
./unzip.sh 2012-05
./unzip.sh 2012-04
./unzip.sh 2012-03
./unzip.sh 2012-02
./unzip.sh 2012-01
./unzip.sh 2011-12
./unzip.sh 2011-11
./unzip.sh 2011-10
./unzip.sh 2011-09
./unzip.sh 2011-08
./unzip.sh 2011-07
./unzip.sh 2011-06
./unzip.sh 2011-05
./unzip.sh 2011-04
./unzip.sh 2011-03
./unzip.sh 2011-02
./unzip.sh 2011-01
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment