Issue #25: Fix another scripts small bugs

Signed-off-by: Cristian Weiland <cw14@inf.ufpr.br>

Issue #25: Fix another scripts small bugs
c43721c2 · Cristian Weiland · 5f970c9e · c43721c2 · c43721c2 · c43721c2
Commit c43721c2 authored 8 years ago by Cristian Weiland
--- a/scripts/expenses/insert_expenses.sh
+++ b/scripts/expenses/insert_expenses.sh
@@ -28,17 +28,18 @@ day=$(date -d "$temp - 1 day" "+%d")
 ym=$1-$2
 dataPath="../../data/"
 path="../../data/expenses/"
+configPath="../../configs/expenses/logstash/"

-if [ ! -d "$dataPath" ]; then
-	mkdir "$dataPath"
-fi
 if [ ! -d "$path" ]; then
-	mkdir "$path"
+	mkdir -p "$path"
+fi
+if [ ! -d "$configPath" ]; then
+	mkdir -p "$configPath"
 fi

 # Step 1:
 # Create directory to store files
-mkdir $path$ym
+mkdir -p $path$ym

 # Download files
 request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=GastosDiretos'
@@ -50,12 +51,14 @@ unzip $path$ym/${1}${2}_GastosDiretos.zip -d $path$ym/
 # Remove zip file
 rm $path$ym/${1}${2}_GastosDiretos.zip

-source config.sh
+source ./config.sh
+
+echo $filter

 # Step 2:
 ./create_expenses_config.py $1 $2 $day $index $host $3 $4
 # Step 3:
-./resume_expenses.sh ../../data/expenses/ ${1}-${2} $filter
+./resume_expenses.sh ../../data/expenses/ ${1}-${2} "$filter"
 # Step 4:
 logstash -f ../../configs/expenses/logstash/config-${1}-${2} < ../../data/expenses/processed/${1}${2}.csv
 # Data inserted, we can now remove it.

--- a/scripts/expenses/resume_expenses.sh
+++ b/scripts/expenses/resume_expenses.sh
@@ -21,11 +21,13 @@ filter=$3
 # dateWithoutHyphen example: 201611
 dateWithoutHyphen=${date//-}

-echo "Processing data with args = $path and ${date}"
-
 input="${path}${date}/${dateWithoutHyphen}_GastosDiretos.csv"
 output="${path}processed/${dateWithoutHyphen}.csv"

+if [ ! -d "${path}processsed" ]; then
+    mkdir -p "${path}processed"
+fi
+
 # About this command:
 # - Grep removes everyone that does not work in UFPR.
 # - Tr removes null characters (ctrl + @).

--- a/scripts/insert_data.sh
+++ b/scripts/insert_data.sh
@@ -9,8 +9,8 @@ fi
 # First, insert Expenses data.
 (cd expenses && ./insert_expenses.sh $1 $2 $3 $4)

-# Now, insert Workers data.
-(cd workers && ./insert_register_payment.sh $1 $2 $3 $4)
-
 # We should now insert Travel allowance data.
 (cd travel_allowances && ./insert_travel_allowances.sh $1 $2 $3 $4)
+
+# Now, insert Workers data.
+(cd workers && ./insert_register_payment.sh $1 $2 $3 $4)
--- a/scripts/travel_allowances/insert_travel_allowances.sh
+++ b/scripts/travel_allowances/insert_travel_allowances.sh
@@ -32,9 +32,6 @@ configPath="../../configs/travel_allowance/logstash/"

 source config.sh

-if [ ! -d "$dataPath" ]; then
-	mkdir "$dataPath"
-fi
 if [ ! -d "$path/processed" ]; then
 	mkdir -p "$path/processed"
 fi
@@ -44,7 +41,7 @@ fi

 # Step 1:
 # Create directory to store files
-mkdir $path$ym
+mkdir -p $path$ym

 # Download files
 request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias'
@@ -57,9 +54,9 @@ unzip $path$ym/${1}${2}_Diarias.zip -d $path$ym/
 rm $path$ym/${1}${2}_Diarias.zip

 # Step 2:
-./create_travel_allowance_config.py $1 $2 $day $index $host $3 $4
+./create_travel_allowance_config.py $1 $2 "$day" "$index" "$host" $3 $4
 # Step 3:
-./resume_travel_allowance.sh $path ${1}-${2} $filter
+./resume_travel_allowance.sh $path ${1}-${2} "$filter"
 # Step 4:
 logstash -f ../../configs/travel_allowance/logstash/config-${1}-${2} < ${path}processed/${1}${2}.csv


--- a/scripts/travel_allowances/resume_travel_allowance.sh
+++ b/scripts/travel_allowances/resume_travel_allowance.sh
@@ -19,11 +19,13 @@ if [ "$#" -ne 3 ]; then
 	exit
 fi

-echo "Processing data with args = $path and ${date}"
-
 input="${path}${date}/${dateWithoutHyphen}_Diarias.csv"
 output="${path}processed/${dateWithoutHyphen}.csv"

+if [ ! -d "${path}processsed" ]; then
+    mkdir -p "${path}processed"
+fi
+
 # About this command:
 # - Grep removes everyone that does not work in UFPR.
 # - Tr removes null characters (ctrl + @).

--- a/scripts/workers/insert_register_payment.sh
+++ b/scripts/workers/insert_register_payment.sh
@@ -25,20 +25,24 @@ fi
 ym=$1-$2
 dataPath="../../data/"
 path="../../data/workers/"
+configPath="../../configs/workers/"

 source config.sh

 # Check if Data and Workers directories already exist:
-if [ ! -d "$dataPath" ]; then
-	mkdir "$dataPath"
-fi
 if [ ! -d "$path" ]; then
-	mkdir "$path"
+	mkdir -p "$path"
+fi
+if [ ! -d "$configPath/json" ]; then
+	mkdir -p "$configPath/json"
+fi
+if [ ! -d "$configPath/logstash" ]; then
+	mkdir -p "$configPath/logstash"
 fi

 # Step 1:
 # Create directory to store files
-mkdir $path$ym
+mkdir -p $path$ym

 # Download files
 request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&d=C&consulta=Servidores'
@@ -55,11 +59,11 @@ day=$(ls $path$ym | grep -m 1 $1$2 | cut -c 7,8)

 # Step 2:
 # Create config files
-./create_config.py $1 $2 $day $index $host $3 $4
+./create_config.py $1 $2 "$day" "$index" "$host" $3 $4

 # Step 3:
 # Start processing
-./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json $filter
+./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json "$filter"

 # Step 4:
 # Insert data in ElasticSearch

--- a/scripts/workers/resume_register.sh
+++ b/scripts/workers/resume_register.sh
@@ -19,22 +19,21 @@ path=$1
 date=$2
 filter=$3

-echo "Processing data with args = ${path} and ${date}"
-
 input="${path}${date}_Cadastro.csv"
 output="${path}${date}_Cadastro_Ufpr_Unique.csv"

-columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42"
+if [ ! -d "${path}" ]; then
+    mkdir -p "${path}"
+fi

 # About this command:
-# - Sed wraps fields in double quotes.
+# - Sed wraps fields in double quotes. Its not needed anymore.
 # - Grep removes everyone that does not work in UFPR.
-# - Cut selects the important columns.
 # - Uniq removes repeated values.
 # - Tr removes null characters (ctrl + @).

 # Get data from all universities.
-# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL	NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output
+# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output

 # Get only data from UFPR.
 cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output