Skip to content
Snippets Groups Projects
Commit c43721c2 authored by Cristian Weiland's avatar Cristian Weiland
Browse files

Issue #25: Fix another scripts small bugs

parent 5f970c9e
No related branches found
No related tags found
No related merge requests found
......@@ -28,17 +28,18 @@ day=$(date -d "$temp - 1 day" "+%d")
ym=$1-$2
dataPath="../../data/"
path="../../data/expenses/"
configPath="../../configs/expenses/logstash/"
if [ ! -d "$dataPath" ]; then
mkdir "$dataPath"
fi
if [ ! -d "$path" ]; then
mkdir "$path"
mkdir -p "$path"
fi
if [ ! -d "$configPath" ]; then
mkdir -p "$configPath"
fi
# Step 1:
# Create directory to store files
mkdir $path$ym
mkdir -p $path$ym
# Download files
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=GastosDiretos'
......@@ -50,12 +51,14 @@ unzip $path$ym/${1}${2}_GastosDiretos.zip -d $path$ym/
# Remove zip file
rm $path$ym/${1}${2}_GastosDiretos.zip
source config.sh
source ./config.sh
echo $filter
# Step 2:
./create_expenses_config.py $1 $2 $day $index $host $3 $4
# Step 3:
./resume_expenses.sh ../../data/expenses/ ${1}-${2} $filter
./resume_expenses.sh ../../data/expenses/ ${1}-${2} "$filter"
# Step 4:
logstash -f ../../configs/expenses/logstash/config-${1}-${2} < ../../data/expenses/processed/${1}${2}.csv
# Data inserted, we can now remove it.
......
......@@ -21,11 +21,13 @@ filter=$3
# dateWithoutHyphen example: 201611
dateWithoutHyphen=${date//-}
echo "Processing data with args = $path and ${date}"
input="${path}${date}/${dateWithoutHyphen}_GastosDiretos.csv"
output="${path}processed/${dateWithoutHyphen}.csv"
if [ ! -d "${path}processsed" ]; then
mkdir -p "${path}processed"
fi
# About this command:
# - Grep removes everyone that does not work in UFPR.
# - Tr removes null characters (ctrl + @).
......
......@@ -9,8 +9,8 @@ fi
# First, insert Expenses data.
(cd expenses && ./insert_expenses.sh $1 $2 $3 $4)
# Now, insert Workers data.
(cd workers && ./insert_register_payment.sh $1 $2 $3 $4)
# We should now insert Travel allowance data.
(cd travel_allowances && ./insert_travel_allowances.sh $1 $2 $3 $4)
# Now, insert Workers data.
(cd workers && ./insert_register_payment.sh $1 $2 $3 $4)
......@@ -32,9 +32,6 @@ configPath="../../configs/travel_allowance/logstash/"
source config.sh
if [ ! -d "$dataPath" ]; then
mkdir "$dataPath"
fi
if [ ! -d "$path/processed" ]; then
mkdir -p "$path/processed"
fi
......@@ -44,7 +41,7 @@ fi
# Step 1:
# Create directory to store files
mkdir $path$ym
mkdir -p $path$ym
# Download files
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias'
......@@ -57,9 +54,9 @@ unzip $path$ym/${1}${2}_Diarias.zip -d $path$ym/
rm $path$ym/${1}${2}_Diarias.zip
# Step 2:
./create_travel_allowance_config.py $1 $2 $day $index $host $3 $4
./create_travel_allowance_config.py $1 $2 "$day" "$index" "$host" $3 $4
# Step 3:
./resume_travel_allowance.sh $path ${1}-${2} $filter
./resume_travel_allowance.sh $path ${1}-${2} "$filter"
# Step 4:
logstash -f ../../configs/travel_allowance/logstash/config-${1}-${2} < ${path}processed/${1}${2}.csv
......
......@@ -19,11 +19,13 @@ if [ "$#" -ne 3 ]; then
exit
fi
echo "Processing data with args = $path and ${date}"
input="${path}${date}/${dateWithoutHyphen}_Diarias.csv"
output="${path}processed/${dateWithoutHyphen}.csv"
if [ ! -d "${path}processsed" ]; then
mkdir -p "${path}processed"
fi
# About this command:
# - Grep removes everyone that does not work in UFPR.
# - Tr removes null characters (ctrl + @).
......
......@@ -25,20 +25,24 @@ fi
ym=$1-$2
dataPath="../../data/"
path="../../data/workers/"
configPath="../../configs/workers/"
source config.sh
# Check if Data and Workers directories already exist:
if [ ! -d "$dataPath" ]; then
mkdir "$dataPath"
fi
if [ ! -d "$path" ]; then
mkdir "$path"
mkdir -p "$path"
fi
if [ ! -d "$configPath/json" ]; then
mkdir -p "$configPath/json"
fi
if [ ! -d "$configPath/logstash" ]; then
mkdir -p "$configPath/logstash"
fi
# Step 1:
# Create directory to store files
mkdir $path$ym
mkdir -p $path$ym
# Download files
request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&d=C&consulta=Servidores'
......@@ -55,11 +59,11 @@ day=$(ls $path$ym | grep -m 1 $1$2 | cut -c 7,8)
# Step 2:
# Create config files
./create_config.py $1 $2 $day $index $host $3 $4
./create_config.py $1 $2 "$day" "$index" "$host" $3 $4
# Step 3:
# Start processing
./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json $filter
./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json "$filter"
# Step 4:
# Insert data in ElasticSearch
......
......@@ -19,22 +19,21 @@ path=$1
date=$2
filter=$3
echo "Processing data with args = ${path} and ${date}"
input="${path}${date}_Cadastro.csv"
output="${path}${date}_Cadastro_Ufpr_Unique.csv"
columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42"
if [ ! -d "${path}" ]; then
mkdir -p "${path}"
fi
# About this command:
# - Sed wraps fields in double quotes.
# - Sed wraps fields in double quotes. Its not needed anymore.
# - Grep removes everyone that does not work in UFPR.
# - Cut selects the important columns.
# - Uniq removes repeated values.
# - Tr removes null characters (ctrl + @).
# Get data from all universities.
# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output
# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output
# Get only data from UFPR.
cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment