Commit 00c96f1f authored by Luiza Wille's avatar Luiza Wille

Issue #58: Changed the variable names to more intuitive names

Signed-off-by: Luiza Wille's avatarLuiza Wille <lmwc14@inf.ufpr.br>
parent 2c861934
indexName=$1
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <index-name>"
exit
fi
curl -XDELETE "localhost:9200/$1?pretty"
curl -XDELETE "localhost:9200/$indexName?pretty"
# Input: Kibana/ElasticSearch's user and password and two index names: the script will rename the index with the first name to the second one.
userAndPasswd=$1
sourceIndex=$2
destIndex=$3
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <user:password> <old-index> <new-index>"
echo "Example: $0 myuser:mypass ufpr-csv-2016-11 ufpr-servidores-2016-11"
......@@ -9,16 +13,16 @@ fi
source ./config.sh
# Copy old index to new index...
curl -XPOST -u $1 "${dbHostname}_reindex?pretty" -H 'Content-Type: application/json' -d'
curl -XPOST -u $userAndPasswd "${dbHostname}_reindex?pretty" -H 'Content-Type: application/json' -d'
{
"source": {
"index": "'$2'"
"index": "'$sourceIndex'"
},
"dest": {
"index": "'$3'"
"index": "'$destIndex'"
}
}
'
# Delete old index...
curl -XDELETE -u $1 "${dbHostname}$2?pretty"
curl -XDELETE -u $userAndPasswd "${dbHostname}$sourceIndex?pretty"
......@@ -10,14 +10,24 @@ import sys, csv, json, math, subprocess
from pathlib import Path
from subprocess import call
year = sys.argv[1]
month = sys.argv[2]
day = sys.argv[3]
index = sys.argv[4]
host = sys.argv[5]
entity = sys.argv[6]
username = sys.argv[7]
passwd = sys.argv[8]
path = sys.argv[9]
if len(sys.argv) != 10:
print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <index> <host> <entity> <username> <password> <path>")
sys.exit()
# Generate JSON CNPJ merge config file:
data = {
"path": sys.argv[9]
, "date": sys.argv[1] + sys.argv[2]
"path": path
, "date": "" + year + month
, "file1": "_CNPJ.csv"
, "file2": "_GastosDiretosFiltered.csv"
, "encoding1": "Windows-1252"
......@@ -29,16 +39,16 @@ data = {
, "quotechar": "\""
, "delimiter": "\t"
, "lineterminator": "\n"
, "outputFile": sys.argv[9] + '/' + sys.argv[1] + sys.argv[2] + "_merged_by_cnpj.csv"
, "outputFile": path + '/' + year + month + "_merged_by_cnpj.csv"
}
with open(sys.argv[9] + '/config-cnpj-' + sys.argv[1] + '-' + sys.argv[2] + '.json', 'w') as outfile:
json.dump(data, outfile, indent=4, sort_keys=True)
with open(path + '/config-cnpj-' + year + '-' + month + '.json', 'w') as outfile:
json.dump(data, outfile, indent = 4, sort_keys=True)
# Generate JSON CNAE merge config file:
data = {
"path": sys.argv[9]
, "date": sys.argv[1] + sys.argv[2]
"path": path
, "date": "" + year + month
, "file1": "_CNAE.csv"
, "file2": "_merged_by_cnpj.csv"
, "encoding1": "Windows-1252"
......@@ -50,16 +60,16 @@ data = {
, "quotechar": "\""
, "delimiter": "\t"
, "lineterminator": "\n"
, "outputFile": sys.argv[9] + '/' + sys.argv[1] + sys.argv[2] + "_merged_by_cnae.csv"
, "outputFile": path + '/' + year + month + "_merged_by_cnae.csv"
}
with open(sys.argv[9] + '/config-cnae-' + sys.argv[1] + '-' + sys.argv[2] + '.json', 'w') as outfile:
with open(path + '/config-cnae-' + year + '-' + month + '.json', 'w') as outfile:
json.dump(data, outfile, indent=4, sort_keys=True)
# Generate JSON config file to get data from Natureza Juridica:
data = {
"path": sys.argv[9]
, "date": sys.argv[1] + sys.argv[2]
"path": path
, "date": "" + year + month
, "file1": "_NaturezaJuridica.csv"
, "file2": "_merged_by_cnae.csv"
, "encoding1": "Windows-1252"
......@@ -71,23 +81,23 @@ data = {
, "quotechar": "\""
, "delimiter": "\t"
, "lineterminator": "\n"
, "outputFile": sys.argv[9] + '/' + sys.argv[1] + sys.argv[2] + ".csv"
, "outputFile": path + '/' + year + month + ".csv"
}
with open(sys.argv[9] + '/config-natjur-' + sys.argv[1] + '-' + sys.argv[2] + '.json', 'w') as outfile:
with open(path + '/config-natjur-' + year + '-' + month + '.json', 'w') as outfile:
json.dump(data, outfile, indent=4, sort_keys=True)
# Generate logstash config file:
with open('logstash_config.example') as infile:
example = infile.read()
output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00'
, "date": sys.argv[1] + '-' + sys.argv[2]
, "index": sys.argv[4] + '-' + sys.argv[6]
, "host": sys.argv[5]
, "user": sys.argv[7]
, "password": sys.argv[8] }
output = example % { "timestamp": day + '/' + month + '/' + year + ' 00:00:00'
, "date": year + '-' + month
, "index": index + '-' + entity
, "host": host
, "user": username
, "password": passwd }
date = sys.argv[1] + '-' + sys.argv[2]
with open(sys.argv[9] + '/config-' + date, 'w') as outfile:
date = year + '-' + month
with open(path + '/config-' + date, 'w') as outfile:
outfile.write(output)
#!/bin/bash
# This script is the one that should be called to insert data from one month.
# Input: Year, month and day from the data to be inserted, ElasticSearch's user and password. The day should be the last day of the month.
# Example: ./insert_expenses.sh 2016 10 myuser mypass
# It has 5 steps:
# 1- Download files and put them in the right location (a temporary directory, inside this directory).
# 2- Generate logstash config file and config files to merge downloaded CSVs, via create_expenses_config.py.
# 3- Generate a CSV with the filtered data via resume_expenses.sh.
# 4- Merge CSVs using merge_files.py, based on config files created by create_expenses_config.py.
# 5- Insert data in ElasticSearch via logstash, using the config file created and the CSV created by resume_expenses.sh.
# Output: The commands/scripts outputs.
function inputError(){
echo "Var ${1} is unset. Set in file '${2}'."
return 0
}
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypass"
exit
fi
source ./config.sh
# Check if all variables in config file are set:
configFile='scripts/expenses/config.sh'
if [ -z "${index}" ]; then
inputError "index" $configFile
exit;
fi
if [ -z "${host}" ]; then
inputError "host" $configFile
exit;
fi
if [ -z "${columnName}" ]; then
inputError "columnName" $configFile
exit;
fi
size=${#filter[@]}
if [ "$size" -lt 1 ]; then
inputError "filter" $configFile
exit;
fi
# Getting the Last day of this month (Using date 2016-05-15 as example):
# First, get next month (201606).
nxtMonth=$(date +%Y%m -d "$(date +${1}${2}15) next month")
# Append day 01 (20160601).
tempDate=$(date +%Y%m%d -d "${nxtMonth}01")
# Remove 1 day: 20160531, get only day: 31.
day=$(date -d "$tempDate - 1 day" "+%d")
ym=$1-$2
path="./tmp_$ym"
# Step 1:
# Create directory to store files
mkdir -p "$path"
# Download files
downloadLink='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='
# Download expenses file:
request="${downloadLink}${1}&m=${2}&consulta=GastosDiretos"
curl -o $path/${1}${2}_GastosDiretos.zip $request --compressed
# Download file with information about company:
request="${downloadLink}${1}&m=${2}&consulta=FavorecidosGastosDiretos"
curl -o $path/${1}${2}_Favorecidos.zip $request --compressed
# Unzip them
unzip -o $path/${1}${2}_GastosDiretos.zip -d $path/
unzip -o $path/${1}${2}_Favorecidos.zip -d $path/
# Remove zip file
rm $path/${1}${2}_GastosDiretos.zip
rm $path/${1}${2}_Favorecidos.zip
# Remove null bytes
cat $path/${1}${2}_CNPJ.csv | tr -d '\000' > $path/${1}{$2}_CNPJ_NotNull.csv
cat $path/${1}${2}_NaturezaJuridica.csv | tr -d '\000' > $path/${1}{$2}_NatJur_NotNull.csv
mv $path/${1}{$2}_CNPJ_NotNull.csv $path/${1}${2}_CNPJ.csv
mv $path/${1}{$2}_NatJur_NotNull.csv $path/${1}${2}_NaturezaJuridica.csv
for key in "${!filter[@]}"
do
# Step 2:
./create_expenses_config.py $1 $2 "$day" "$index" "$host" "$key" $3 $4 "${path}"
#!/bin/bash
# This script is the one that should be called to insert data from one month.
# Input: Year, month and day from the data to be inserted, ElasticSearch's user and password. The day should be the last day of the month.
# Example: ./insert_expenses.sh 2016 10 myuser mypass
# It has 5 steps:
# 1- Download files and put them in the right location (a temporary directory, inside this directory).
# 2- Generate logstash config file and config files to merge downloaded CSVs, via create_expenses_config.py.
# 3- Generate a CSV with the filtered data via resume_expenses.sh.
# 4- Merge CSVs using merge_files.py, based on config files created by create_expenses_config.py.
# 5- Insert data in ElasticSearch via logstash, using the config file created and the CSV created by resume_expenses.sh.
# Output: The commands/scripts outputs.
function inputError(){
echo "Var ${1} is unset. Set in file '${2}'."
return 0
}
year=$1
month=$2
user=$3
passwd=$4
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypass"
exit
fi
source ./config.sh
# Check if all variables in config file are set:
configFile='scripts/expenses/config.sh'
if [ -z "${index}" ]; then
inputError "index" $configFile
exit;
fi
if [ -z "${host}" ]; then
inputError "host" $configFile
exit;
fi
if [ -z "${columnName}" ]; then
inputError "columnName" $configFile
exit;
fi
size=${#filter[@]}
if [ "$size" -lt 1 ]; then
inputError "filter" $configFile
exit;
fi
# Getting the Last day of this month (Using date 2016-05-15 as example):
# First, get next month (201606).
nxtMonth=$(date +%Y%m -d "$(date +${year}${month}15) next month")
# Append day 01 (20160601).
tempDate=$(date +%Y%m%d -d "${nxtMonth}01")
# Remove 1 day: 20160531, get only day: 31.
day=$(date -d "$tempDate - 1 day" "+%d")
ym=$year-$month
path="./tmp_$ym"
# Step 1:
# Create directory to store files
mkdir -p "$path"
# Download files
downloadLink='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='
# Download expenses file:
request="${downloadLink}${year}&m=${month}&consulta=GastosDiretos"
curl -o $path/${year}${month}_GastosDiretos.zip $request --compressed
# Download file with information about company:
request="${downloadLink}${year}&m=${month}&consulta=FavorecidosGastosDiretos"
curl -o $path/${year}${month}_Favorecidos.zip $request --compressed
# Unzip them
unzip -o $path/${year}${month}_GastosDiretos.zip -d $path/
unzip -o $path/${year}${month}_Favorecidos.zip -d $path/
# Remove zip file
rm $path/${year}${month}_GastosDiretos.zip
rm $path/${year}${month}_Favorecidos.zip
# Remove null bytes
cat $path/${year}${month}_CNPJ.csv | tr -d '\000' > $path/${year}{$month}_CNPJ_NotNull.csv
cat $path/${year}${month}_NaturezaJuridica.csv | tr -d '\000' > $path/${year}{$month}_NatJur_NotNull.csv
mv $path/${year}{$month}_CNPJ_NotNull.csv $path/${year}${month}_CNPJ.csv
mv $path/${year}{$month}_NatJur_NotNull.csv $path/${year}${month}_NaturezaJuridica.csv
for key in "${!filter[@]}"
do
# Step 2:
./create_expenses_config.py $year $month "$day" "$index" "$host" "$key" $user $passwd "${path}"
# Step 3:
./resume_expenses.sh "${path}" ${1}-${2} "${filter[$key]}" "${columnName}"
./resume_expenses.sh "${path}" ${year}-${month} "${filter[$key]}" "${columnName}"
strReplacement=$( echo "${filter[$key]}" | sed 's/ /\\ /g' )
./merge_files.py $path/config-cnpj-${1}-${2}.json "$strReplacement" "${columnName}"
./merge_files.py $path/config-cnae-${1}-${2}.json "$strReplacement" "${columnName}"
./merge_files.py $path/config-natjur-${1}-${2}.json "$strReplacement" "${columnName}"
./merge_files.py $path/config-cnpj-${year}-${month}.json "$strReplacement" "${columnName}"
./merge_files.py $path/config-cnae-${year}-${month}.json "$strReplacement" "${columnName}"
./merge_files.py $path/config-natjur-${year}-${month}.json "$strReplacement" "${columnName}"
# Step 4:
logstash -f ${path}/config-${1}-${2} < ${path}/${1}${2}.csv
logstash -f ${path}/config-${year}-${month} < ${path}/${year}${month}.csv
# Data inserted, we can now remove it.
rm ${path}/${1}${2}.csv
rm ${path}/${1}${2}_merged_by_cnpj.csv
rm ${path}/${1}${2}_merged_by_cnae.csv
rm ${path}/config-${1}-${2}
rm ${path}/config-cnae-${1}-${2}.json
rm ${path}/config-cnpj-${1}-${2}.json
rm ${path}/config-natjur-${1}-${2}.json
rm ${path}/${year}${month}.csv
rm ${path}/${year}${month}_merged_by_cnpj.csv
rm ${path}/${year}${month}_merged_by_cnae.csv
rm ${path}/config-${year}-${month}
rm ${path}/config-cnae-${year}-${month}.json
rm ${path}/config-cnpj-${year}-${month}.json
rm ${path}/config-natjur-${year}-${month}.json
done
# Remove downloaded csvs.
rm -f $path/${1}${2}_GastosDiretos.csv
rm -f $path/${1}${2}_GastosDiretosFiltered.csv
rm -f $path/${1}${2}_CNAE.csv
rm -f $path/${1}${2}_CNPJ.csv
rm -f $path/${1}${2}_NaturezaJuridica.csv
rm -f $path/${year}${month}_GastosDiretos.csv
rm -f $path/${year}${month}_GastosDiretosFiltered.csv
rm -f $path/${year}${month}_CNAE.csv
rm -f $path/${year}${month}_CNPJ.csv
rm -f $path/${year}${month}_NaturezaJuridica.csv
rmdir $path
......@@ -46,11 +46,13 @@ def getDataWithEmptyRow(columns, row):
newRow.append('')
return newRow
configFile = sys.argv[1]
if len(sys.argv) != 4:
print("Usage: " + sys.argv[0] + " <config.json> <filter> <columnId>")
sys.exit()
with open(sys.argv[1]) as f:
with open(configFile) as f:
params = json.load(f)
# Which files should be merged?
......
#!/bin/bash
path=$1
date=$2
filter=$3
column=$4
if [ "$#" -ne 4 ]; then
echo "Usage $0 <path> <date> <filter> <column-name>"
echo "Example: $0 ./tmp_201612 201612 MEC 2"
exit
fi
path=$1
date=$2
filter=$3
dateWithoutHyphen=${date//-}
input="${path}/${dateWithoutHyphen}_GastosDiretos.csv"
......@@ -16,7 +18,7 @@ output="${path}/${dateWithoutHyphen}_GastosDiretosFiltered.csv"
head -n1 "${input}" > "$output"
iconv -f WINDOWS-1252 -t UTF-8 -o $path/tmp.csv "$output"
columnId=$(sed s/"${4}".*$/"${4}"/ $path/tmp.csv | sed -e 's/\t/\n/g' | wc -l)
columnId=$(sed s/"${column}".*$/"${column}"/ $path/tmp.csv | sed -e 's/\t/\n/g' | wc -l)
rm -f $path/tmp.csv
cmd="\$$columnId == \"${filter}\""
......
......@@ -7,27 +7,33 @@
# - Filter: An array of n values, that will create n indexes in ElasticSearch, each one filtering data from Portal Transparencia using its corresponding string. Ex: "UNIVERSIDADE FEDERAL DO PARANA"
# - University: An array of n values, with n being the same n as Filter's array. This array should contain the initials from Universities declared in Filter array, in the same order.
if [[ "$#" -ne 5 || "$1" == "-help" || "$1" == "--help" ]]; then
year=$1
month=$2
user=$3
passwd=$4
insertions=$5
if [[ "$#" -ne 5 || "$year" == "-help" || "$year" == "--help" ]]; then
echo " Usage: $0 <year> <month> <user> <password> <insertions>"
echo " Insertions must be one between: expenses, travel_allowances, workers, ministry_of_health, all"
echo " Example: $0 2016 12 myuser mypass all"
exit
fi
if [[ "$5" == "all" || "$5" == "expenses" ]]; then
if [[ "$insertions" == "all" || "$insertions" == "expenses" ]]; then
# First, insert Expenses data.
echo "Inserting Expenses from ${1}-${2}..."
(cd expenses && ./insert_expenses.sh $1 $2 $3 $4)
echo "Inserting Expenses from ${year}-${month}..."
(cd expenses && ./insert_expenses.sh $year $month $user $passwd)
fi
if [[ "$5" == "all" || "$5" == "travel_allowances" ]]; then
if [[ "$insertions" == "all" || "$insertions" == "travel_allowances" ]]; then
# We should now insert Travel allowance data.
echo "Inserting Travel Allowances from ${1}-${2}..."
(cd travel_allowances && ./insert_travel_allowances.sh $1 $2 $3 $4)
echo "Inserting Travel Allowances from ${year}-${month}..."
(cd travel_allowances && ./insert_travel_allowances.sh $year $month $user $passwd)
fi
if [[ "$5" == "all" || "$5" == "workers" ]]; then
if [[ "$insertions" == "all" || "$insertions" == "workers" ]]; then
# Now, insert Workers data.
echo "Inserting Workers from ${1}-${2}..."
(cd workers && ./insert_register_payment.sh $1 $2 $3 $4)
echo "Inserting Workers from ${year}-${month}..."
(cd workers && ./insert_register_payment.sh $year $month $user $passwd)
fi
......@@ -10,6 +10,16 @@ import sys, csv, json, math, subprocess
from pathlib import Path
from subprocess import call
year = sys.argv[1]
month = sys.argv[2]
day = sys.argv[3]
index = sys.argv[4]
host = sys.argv[5]
entity = sys.argv[6]
username = sys.argv[7]
passwd = sys.argv[8]
path = sys.argv[9]
if len(sys.argv) != 10:
print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <index> <host> <entity> <username> <password> <path>")
sys.exit()
......@@ -17,13 +27,13 @@ if len(sys.argv) != 10:
with open('logstash_config.example') as infile:
example = infile.read()
output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00'
, "date": sys.argv[1] + '-' + sys.argv[2]
, "index": sys.argv[4] + '-' + sys.argv[6]
, "host": sys.argv[5]
, "user": sys.argv[7]
, "password": sys.argv[8] }
output = example % { "timestamp": day + '/' + month + '/' + year + ' 00:00:00'
, "date": year + '-' + month
, "index": index + '-' + entity
, "host": host
, "user": username
, "password": passwd }
date = sys.argv[1] + '-' + sys.argv[2]
with open(sys.argv[9] + '/config-' + date, 'w') as outfile:
date = year + '-' + month
with open(path + '/config-' + date, 'w') as outfile:
outfile.write(output)
......@@ -12,10 +12,15 @@
# Output: The commands/scripts outputs.
function inputError(){
echo "Var ${1} is unset. Set in file '${2}'."
echo "Var ${year} is unset. Set in file '${month}'."
return 0
}
year=$1
month=$2
user=$3
passwd=$4
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <year> <month> <user> <password>"
echo "Example: $0 2016 12 myuser mypass"
......@@ -47,13 +52,13 @@ fi
# Getting the Last day of this month (Using date 2016-05-15 as example):
# First, get next month (201606).
nxtMonth=$(date +%Y%m -d "$(date +${1}${2}15) next month")
nxtMonth=$(date +%Y%m -d "$(date +${year}${month}15) next month")
# Append day 01 (20160601).
tempDate=$(date -d "${nxtMonth}01")
# Remove 1 day: 20160531, get only day: 31.
day=$(date -d "$tempDate - 1 day" "+%d")
ym=$1-$2
ym=$year-$month
path="./tmp_$ym"
# Step 1:
......@@ -61,28 +66,28 @@ path="./tmp_$ym"
mkdir -p "$path"
# Download files
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias'
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${year}'&m='${month}'&consulta=Diarias'
curl $request --compressed > $path/${1}${2}_Diarias.zip
curl $request --compressed > $path/${year}${month}_Diarias.zip
# Unzip them
unzip -o $path/${1}${2}_Diarias.zip -d $path/
unzip -o $path/${year}${month}_Diarias.zip -d $path/
# Remove zip file
rm $path/${1}${2}_Diarias.zip
rm $path/${year}${month}_Diarias.zip
for key in "${!filter[@]}"
do
# Step 2:
./create_travel_allowance_config.py $1 $2 "$day" "$index" "$host" "$key" $3 $4 "${path}"
./create_travel_allowance_config.py $year $month "$day" "$index" "$host" "$key" $user $passwd "${path}"
# Step 3:
./resume_travel_allowance.sh "$path" ${1}-${2} "${filter[$key]}" "${columnName}"
./resume_travel_allowance.sh "$path" ${year}-${month} "${filter[$key]}" "${columnName}"
# Step 4:
logstash -f ${path}/config-${1}-${2} < ${path}/${1}${2}.csv
logstash -f ${path}/config-${year}-${month} < ${path}/${year}${month}.csv
# Remove processed file
rm ${path}/${1}${2}.csv
rm ${path}/config-${1}-${2}
rm ${path}/${year}${month}.csv
rm ${path}/config-${year}-${month}
done
rm $path/${1}${2}_Diarias.csv
rm $path/${year}${month}_Diarias.csv
rmdir $path
#!/bin/bash
path=$1
date=$2
filter=$3
column=$4
if [ "$#" -ne 4 ]; then
echo "Usage $0 <path> <date> <filter> <column-name>"
echo "Example: $0 ./tmp_201612 201612 MEC 2"
exit
fi
path=$1
date=$2
filter=$3
dateWithoutHyphen=${date//-}
input="${path}/${dateWithoutHyphen}_Diarias.csv"
......@@ -16,7 +18,7 @@ output="${path}/${dateWithoutHyphen}.csv"
head -n1 ${input} > $path/header.csv
iconv -f WINDOWS-1252 -t UTF-8 -o $path/tmp.csv $path/header.csv
columnId=$(sed s/"${4}".*$/"${4}"/ $path/tmp.csv | sed -e 's/\t/\n/g' | wc -l)
columnId=$(sed s/"${column}".*$/"${column}"/ $path/tmp.csv | sed -e 's/\t/\n/g' | wc -l)
rm -f $path/tmp.csv $path/header.csv
cmd="\$$columnId == \"${filter}\""
......
......@@ -12,13 +12,23 @@ import sys, csv, json, math, subprocess
from pathlib import Path
from subprocess import call
year = sys.argv[1]
month = sys.argv[2]
day = sys.argv[3]
index = sys.argv[4]
host = sys.argv[5]
entity = sys.argv[6]
username = sys.argv[7]
passwd = sys.argv[8]
path = sys.argv[9]
if len(sys.argv) != 10:
print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <index> <host> <entity> <username> <password> <path>")
sys.exit()
data = {
"path": sys.argv[9]
, "date": sys.argv[1] + sys.argv[2] + sys.argv[3]
"path": path
, "date": year + month + day
, "file1": "_Remuneracao.csv"
, "file2": "_Cadastro_Unique.csv"
, "idColumn1": 2
......@@ -26,25 +36,25 @@ data = {
, "quotechar": "\""
, "delimiter": "\t"
, "lineterminator": "\n"
, "outputFile": sys.argv[9] + '/' + sys.argv[1] + sys.argv[2] + sys.argv[3] + ".csv"
, "outputFile": path + '/' + year + month + day + ".csv"
}
with open(sys.argv[9] + '/config-' + sys.argv[1] + '-' + sys.argv[2] + '.json', 'w') as outfile:
with open(path + '/config-' + year + '-' + month + '.json', 'w') as outfile:
json.dump(data, outfile, indent=4, sort_keys=True)
if int(sys.argv[1]) <= 2014 or (int(sys.argv[1]) == 2015 and int(sys.argv[2]) <= 3):
if int(year) <= 2014 or (int(year) == 2015 and int(month) <= 3):
with open('previous_logstash_config.example') as infile:
example = infile.read()
else:
with open('logstash_config.example') as infile:
example = infile.read()
output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00'
, "date": sys.argv[1] + '-' + sys.argv[2]
, "index": sys.argv[4] + '-' + sys.argv[6]
, "host": sys.argv[5]
, "user": sys.argv[7]
, "password": sys.argv[8] }
output = example % { "timestamp": day + '/' + month + '/' + year + ' 00:00:00'
, "date": year + '-' + month
, "index": index + '-' + entity
, "host": host
, "user": username
, "password": passwd }
with open(sys.argv[9] + '/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile:
with open(path + '/config-' + year + '-' + month, 'w') as outfile:
outfile.write(output)
......@@ -16,10 +16,14 @@
# WARNING: We get the day from the CSV file by using cut in characters 7 and 8. This means we assume they will write something like 01 as day 1. If they change it to 1, this script wil