Commit d5242c8f authored by Luiza Wille's avatar Luiza Wille

Issue #63: Changed the download calls to the minimum parameters necessary

Signed-off-by: Luiza Wille's avatarLuiza Wille <lmwc14@inf.ufpr.br>
parent 3851a7c7
......@@ -6,10 +6,10 @@ if [ "$#" -ne 3 ]; then
exit
fi
node1c3sl='http://node1.c3sl.ufpr.br:9200/'
dbHostname='http://node1.c3sl.ufpr.br:9200/'
# Copy old index to new index...
curl -u $1 -XPOST "${node1c3sl}_reindex?pretty" -H 'Content-Type: application/json' -d'
curl -u $1 -XPOST "${dbHostname}_reindex?pretty" -H 'Content-Type: application/json' -d'
{
"source": {
"index": "'$2'"
......@@ -21,4 +21,4 @@ curl -u $1 -XPOST "${node1c3sl}_reindex?pretty" -H 'Content-Type: application/js
'
# Delete old index...
curl -u $1 -XDELETE "${node1c3sl}$2?pretty"
curl -u $1 -XDELETE "${dbHostname}$2?pretty"
......@@ -12,6 +12,11 @@
# 5- Insert data in ElasticSearch via logstash, using the config file created and the CSV created by resume_expenses.sh.
# Output: The commands/scripts outputs.
function inputError(){
echo "Var ${1} is unset. Set in file '${2}'."
return 0
}
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypass"
......@@ -21,23 +26,23 @@ fi
source ./config.sh
# Check if all variables in config file are set:
setInFile="Set it in file 'scripts/expenses/config.sh'."
setInFile='scripts/expenses/config.sh'
if [ -z "${index}" ]; then
echo "Var 'index' is unset. ${setInFile}";
exit;
inputError "index" $setInFile
exit;
fi
if [ -z "${host}" ]; then
echo "Var 'host' is unset. ${setInFile}";
inputError "host" $setInFile
exit;
fi
if [ -z "${columnName}" ]; then
echo "Var 'columnName' is unset. ${setInFile}";
inputError "columnName" $setInFile
exit;
fi
size=${#filter[@]}
if [ "$size" -lt 1 ]; then
echo "Var 'filter' is unset. ${setInFile}";
inputError "filter" $setInFile
exit;
fi
......@@ -58,21 +63,14 @@ mkdir -p "$path"
# Download files
downloadLink='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='
acceptedEncoding='Accept-Encoding: gzip, deflate, sdch'
userAgent='User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
acceptedFormat='Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
connection='Connection: keep-alive'
expensesCookie='Cookie: ASPSESSIONIDAQRABSAD=OJDLNBCANLIDINCHJHELHHFB; ASPSESSIONIDAQSDCQAD=BOKBKPNCDKOBJKGAMMEKADFL; _ga=GA1.3.1927288562.1481545643; ASPSESSIONIDSCSBBTCD=IGJLJBBCEEJBGLOOJKGNMHBH'
expensesReferer='Referer: http://transparencia.gov.br/downloads/mensal.asp?c=GastosDiretos'
companyCookie='Cookie: ASPSESSIONIDSCBRBBTT=KPBDKGCAENJIEFBMMPOACBHJ'
companyReferer='Referer: http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=FavorecidosGastosDiretos'
# Download expenses file:
request="${downloadLink}${1}&m=${2}&consulta=GastosDiretos"
curl -o $path/${1}${2}_GastosDiretos.zip $request -H "${acceptedEncoding}" -H 'Accept-Language: en-US,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H "${userAgent} Chrome/53.0.2785.143 Safari/537.36" -H "${acceptedFormat}" -H "${expensesReferer}" -H "${expensesCookie}" -H "${connection}" --compressed
curl -o $path/${1}${2}_GastosDiretos.zip $request --compressed
# Download file with information about company:
request="${downloadLink}${1}&m=${2}&consulta=FavorecidosGastosDiretos"
curl -o $path/${1}${2}_Favorecidos.zip $request -H "${acceptedEncoding}" -H 'Accept-Language: en-US,en;q=0.8,pt;q=0.6' -H 'Upgrade-Insecure-Requests: 1' -H "${userAgent} Ubuntu Chromium/56.0.2924.76 Chrome/56.0.2924.76 Safari/537.36" -H "${acceptedFormat}" -H "${companyReferer}" -H "${companyCookie}" -H "${connection}" --compressed
curl -o $path/${1}${2}_Favorecidos.zip $request --compressed
# Unzip them
unzip -o $path/${1}${2}_GastosDiretos.zip -d $path/
......
......@@ -11,6 +11,11 @@
# 4- Insert data in ElasticSearch via logstash, using the config file created and the CSV created by resume_travel_allowance.sh.
# Output: The commands/scripts outputs.
function inputError(){
echo "Var ${1} is unset. Set in file '${2}'."
return 0
}
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypass"
......@@ -20,22 +25,23 @@ fi
source ./config.sh
# Check if all variables in config file are set:
setInFile='scripts/travel_allowance/config.sh'
if [ -z "${index}" ]; then
echo "Var 'index' is unset. Set it in file 'scripts/travel_allowance/config.sh'.";
inputError "index" $setInFile
exit;
fi
if [ -z "${host}" ]; then
echo "Var 'host' is unset. Set it in file 'scripts/travel_allowance/config.sh'.";
inputError "host" $setInFile
exit;
fi
if [ -z "${columnName}" ]; then
echo "Var 'host' is unset. Set it in file 'scripts/travel_allowance/config.sh'.";
inputError "columnName" $setInFile
exit;
fi
size=${#filter[@]}
if [ "$size" -lt 1 ]; then
echo "Var 'filter' is unset. Set it in file 'scripts/expenses/config.sh'.";
inputError "filter" $setInFile
exit;
fi
......@@ -55,16 +61,9 @@ path="./tmp_$ym"
mkdir -p "$path"
# Download files
acceptedEncoding='Accept-Encoding: gzip, deflate, sdch'
acceptedLanguage='Accept-Language: en-US,en;q=0.8'
userAgent='User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
acceptedFormat='Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
referer='Referer: http://transparencia.gov.br/downloads/mensal.asp?c=GastosDiretos'
cookie='Cookie: ASPSESSIONIDAQRABSAD=OJDLNBCANLIDINCHJHELHHFB; ASPSESSIONIDAQSDCQAD=BOKBKPNCDKOBJKGAMMEKADFL; _ga=GA1.3.1927288562.1481545643; ASPSESSIONIDSCSBBTCD=IGJLJBBCEEJBGLOOJKGNMHBH'
connection='Connection: keep-alive'
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias'
curl $request -H "${acceptedEncoding}" -H "${acceptedLanguage}" -H 'Upgrade-Insecure-Requests: 1' -H "${userAgent}" -H "${acceptedFormat}" -H "${referer}" -H "${cookie}" -H "${connection}" --compressed > $path/${1}${2}_Diarias.zip
curl $request --compressed > $path/${1}${2}_Diarias.zip
# Unzip them
unzip -o $path/${1}${2}_Diarias.zip -d $path/
......
......@@ -15,6 +15,12 @@
# WARNING: We get the day from the CSV file by using cut in characters 7 and 8. This means we assume they will write something like 01 as day 1. If they change it to 1, this script will not work!
function inputError(){
echo "Var ${1} is unset. Set in file '${2}'."
return 0
}
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypassword"
......@@ -22,23 +28,23 @@ if [ "$#" -ne 4 ]; then
fi
source ./config.sh
setInFile='scripts/workers/config.sh'
if [ -z "${index}" ]; then
echo "Var 'index' is unset. Set it in file 'scripts/workers/config.sh'.";
inputError "index" $setInFile
exit;
fi
if [ -z "${host}" ]; then
echo "Var 'host' is unset. Set it in file 'scripts/workers/config.sh'.";
inputError "host" $setInFile
exit;
fi
if [ -z "${columnName}" ]; then
echo "Var 'columnName' is unset. Set it in file 'scripts/workers/config.sh'.";
inputError "columnName" $setInFile
exit;
fi
size=${#filter[@]}
if [ "$size" -lt 1 ]; then
echo "Var 'filter' is unset. Set it in file 'scripts/expenses/config.sh'.";
inputError "filter" $setInFile
exit;
fi
......@@ -50,16 +56,9 @@ path="./tmp_$ym"
mkdir -p "$path"
# Download files
acceptedEncoding='Accept-Encoding: gzip, deflate, sdch'
acceptedLanguage='Accept-Language: en-US,en;q=0.8'
userAgent='User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
acceptedFormat='Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
referer='Referer: http://www.portaldatransparencia.gov.br/downloads/servidores.asp'
cookie='Cookie: ASPSESSIONIDAQRABSAD=OJDLNBCANLIDINCHJHELHHFB; ASPSESSIONIDAQSDCQAD=BOKBKPNCDKOBJKGAMMEKADFL; _ga=GA1.3.1927288562.1481545643; ASPSESSIONIDSCSBBTCD=IGJLJBBCEEJBGLOOJKGNMHBH'
connection='Connection: keep-alive'
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&d=C&consulta=Servidores'
curl $request -H "${acceptedEncoding}" -H "${acceptedLanguage}" -H 'Upgrade-Insecure-Requests: 1' -H "${userAgent}" -H "${acceptedFormat}" -H "${referer}" -H "${cookie}" -H "${connection}" --compressed > $path/${1}${2}_Servidores.zip
curl $request --compressed > $path/${1}${2}_Servidores.zip
# Unzip them
unzip -o $path/${1}${2}_Servidores.zip -d $path/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment