From c43721c2173d1d0952cef4d610e990b77496eb52 Mon Sep 17 00:00:00 2001 From: Cristian Weiland <cw14@inf.ufpr.br> Date: Thu, 16 Mar 2017 13:25:21 -0300 Subject: [PATCH] Issue #25: Fix another scripts small bugs Signed-off-by: Cristian Weiland <cw14@inf.ufpr.br> --- scripts/expenses/insert_expenses.sh | 17 ++++++++++------- scripts/expenses/resume_expenses.sh | 6 ++++-- scripts/insert_data.sh | 6 +++--- .../insert_travel_allowances.sh | 9 +++------ .../resume_travel_allowance.sh | 6 ++++-- scripts/workers/insert_register_payment.sh | 18 +++++++++++------- scripts/workers/resume_register.sh | 11 +++++------ 7 files changed, 40 insertions(+), 33 deletions(-) diff --git a/scripts/expenses/insert_expenses.sh b/scripts/expenses/insert_expenses.sh index 1878082..877b683 100755 --- a/scripts/expenses/insert_expenses.sh +++ b/scripts/expenses/insert_expenses.sh @@ -28,17 +28,18 @@ day=$(date -d "$temp - 1 day" "+%d") ym=$1-$2 dataPath="../../data/" path="../../data/expenses/" +configPath="../../configs/expenses/logstash/" -if [ ! -d "$dataPath" ]; then - mkdir "$dataPath" -fi if [ ! -d "$path" ]; then - mkdir "$path" + mkdir -p "$path" +fi +if [ ! -d "$configPath" ]; then + mkdir -p "$configPath" fi # Step 1: # Create directory to store files -mkdir $path$ym +mkdir -p $path$ym # Download files request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=GastosDiretos' @@ -50,12 +51,14 @@ unzip $path$ym/${1}${2}_GastosDiretos.zip -d $path$ym/ # Remove zip file rm $path$ym/${1}${2}_GastosDiretos.zip -source config.sh +source ./config.sh + +echo $filter # Step 2: ./create_expenses_config.py $1 $2 $day $index $host $3 $4 # Step 3: -./resume_expenses.sh ../../data/expenses/ ${1}-${2} $filter +./resume_expenses.sh ../../data/expenses/ ${1}-${2} "$filter" # Step 4: logstash -f ../../configs/expenses/logstash/config-${1}-${2} < ../../data/expenses/processed/${1}${2}.csv # Data inserted, we can now remove it. diff --git a/scripts/expenses/resume_expenses.sh b/scripts/expenses/resume_expenses.sh index a0edea1..a389304 100755 --- a/scripts/expenses/resume_expenses.sh +++ b/scripts/expenses/resume_expenses.sh @@ -21,11 +21,13 @@ filter=$3 # dateWithoutHyphen example: 201611 dateWithoutHyphen=${date//-} -echo "Processing data with args = $path and ${date}" - input="${path}${date}/${dateWithoutHyphen}_GastosDiretos.csv" output="${path}processed/${dateWithoutHyphen}.csv" +if [ ! -d "${path}processsed" ]; then + mkdir -p "${path}processed" +fi + # About this command: # - Grep removes everyone that does not work in UFPR. # - Tr removes null characters (ctrl + @). diff --git a/scripts/insert_data.sh b/scripts/insert_data.sh index 55dd4a9..f50741e 100755 --- a/scripts/insert_data.sh +++ b/scripts/insert_data.sh @@ -9,8 +9,8 @@ fi # First, insert Expenses data. (cd expenses && ./insert_expenses.sh $1 $2 $3 $4) -# Now, insert Workers data. -(cd workers && ./insert_register_payment.sh $1 $2 $3 $4) - # We should now insert Travel allowance data. (cd travel_allowances && ./insert_travel_allowances.sh $1 $2 $3 $4) + +# Now, insert Workers data. +(cd workers && ./insert_register_payment.sh $1 $2 $3 $4) diff --git a/scripts/travel_allowances/insert_travel_allowances.sh b/scripts/travel_allowances/insert_travel_allowances.sh index 1fb48b6..844870e 100755 --- a/scripts/travel_allowances/insert_travel_allowances.sh +++ b/scripts/travel_allowances/insert_travel_allowances.sh @@ -32,9 +32,6 @@ configPath="../../configs/travel_allowance/logstash/" source config.sh -if [ ! -d "$dataPath" ]; then - mkdir "$dataPath" -fi if [ ! -d "$path/processed" ]; then mkdir -p "$path/processed" fi @@ -44,7 +41,7 @@ fi # Step 1: # Create directory to store files -mkdir $path$ym +mkdir -p $path$ym # Download files request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias' @@ -57,9 +54,9 @@ unzip $path$ym/${1}${2}_Diarias.zip -d $path$ym/ rm $path$ym/${1}${2}_Diarias.zip # Step 2: -./create_travel_allowance_config.py $1 $2 $day $index $host $3 $4 +./create_travel_allowance_config.py $1 $2 "$day" "$index" "$host" $3 $4 # Step 3: -./resume_travel_allowance.sh $path ${1}-${2} $filter +./resume_travel_allowance.sh $path ${1}-${2} "$filter" # Step 4: logstash -f ../../configs/travel_allowance/logstash/config-${1}-${2} < ${path}processed/${1}${2}.csv diff --git a/scripts/travel_allowances/resume_travel_allowance.sh b/scripts/travel_allowances/resume_travel_allowance.sh index a64699e..ecac412 100755 --- a/scripts/travel_allowances/resume_travel_allowance.sh +++ b/scripts/travel_allowances/resume_travel_allowance.sh @@ -19,11 +19,13 @@ if [ "$#" -ne 3 ]; then exit fi -echo "Processing data with args = $path and ${date}" - input="${path}${date}/${dateWithoutHyphen}_Diarias.csv" output="${path}processed/${dateWithoutHyphen}.csv" +if [ ! -d "${path}processsed" ]; then + mkdir -p "${path}processed" +fi + # About this command: # - Grep removes everyone that does not work in UFPR. # - Tr removes null characters (ctrl + @). diff --git a/scripts/workers/insert_register_payment.sh b/scripts/workers/insert_register_payment.sh index 63fba59..cdc4ee3 100755 --- a/scripts/workers/insert_register_payment.sh +++ b/scripts/workers/insert_register_payment.sh @@ -25,20 +25,24 @@ fi ym=$1-$2 dataPath="../../data/" path="../../data/workers/" +configPath="../../configs/workers/" source config.sh # Check if Data and Workers directories already exist: -if [ ! -d "$dataPath" ]; then - mkdir "$dataPath" -fi if [ ! -d "$path" ]; then - mkdir "$path" + mkdir -p "$path" +fi +if [ ! -d "$configPath/json" ]; then + mkdir -p "$configPath/json" +fi +if [ ! -d "$configPath/logstash" ]; then + mkdir -p "$configPath/logstash" fi # Step 1: # Create directory to store files -mkdir $path$ym +mkdir -p $path$ym # Download files request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&d=C&consulta=Servidores' @@ -55,11 +59,11 @@ day=$(ls $path$ym | grep -m 1 $1$2 | cut -c 7,8) # Step 2: # Create config files -./create_config.py $1 $2 $day $index $host $3 $4 +./create_config.py $1 $2 "$day" "$index" "$host" $3 $4 # Step 3: # Start processing -./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json $filter +./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json "$filter" # Step 4: # Insert data in ElasticSearch diff --git a/scripts/workers/resume_register.sh b/scripts/workers/resume_register.sh index 71f132c..ea36653 100755 --- a/scripts/workers/resume_register.sh +++ b/scripts/workers/resume_register.sh @@ -19,22 +19,21 @@ path=$1 date=$2 filter=$3 -echo "Processing data with args = ${path} and ${date}" - input="${path}${date}_Cadastro.csv" output="${path}${date}_Cadastro_Ufpr_Unique.csv" -columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42" +if [ ! -d "${path}" ]; then + mkdir -p "${path}" +fi # About this command: -# - Sed wraps fields in double quotes. +# - Sed wraps fields in double quotes. Its not needed anymore. # - Grep removes everyone that does not work in UFPR. -# - Cut selects the important columns. # - Uniq removes repeated values. # - Tr removes null characters (ctrl + @). # Get data from all universities. -# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output +# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output # Get only data from UFPR. cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output -- GitLab