diff --git a/scripts/expenses/insert_expenses.sh b/scripts/expenses/insert_expenses.sh index 1878082d9eb6fce75281a61f9c188dbc49e23709..877b683dc9d8ad995756296ba1ee9783b28f4174 100755 --- a/scripts/expenses/insert_expenses.sh +++ b/scripts/expenses/insert_expenses.sh @@ -28,17 +28,18 @@ day=$(date -d "$temp - 1 day" "+%d") ym=$1-$2 dataPath="../../data/" path="../../data/expenses/" +configPath="../../configs/expenses/logstash/" -if [ ! -d "$dataPath" ]; then - mkdir "$dataPath" -fi if [ ! -d "$path" ]; then - mkdir "$path" + mkdir -p "$path" +fi +if [ ! -d "$configPath" ]; then + mkdir -p "$configPath" fi # Step 1: # Create directory to store files -mkdir $path$ym +mkdir -p $path$ym # Download files request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=GastosDiretos' @@ -50,12 +51,14 @@ unzip $path$ym/${1}${2}_GastosDiretos.zip -d $path$ym/ # Remove zip file rm $path$ym/${1}${2}_GastosDiretos.zip -source config.sh +source ./config.sh + +echo $filter # Step 2: ./create_expenses_config.py $1 $2 $day $index $host $3 $4 # Step 3: -./resume_expenses.sh ../../data/expenses/ ${1}-${2} $filter +./resume_expenses.sh ../../data/expenses/ ${1}-${2} "$filter" # Step 4: logstash -f ../../configs/expenses/logstash/config-${1}-${2} < ../../data/expenses/processed/${1}${2}.csv # Data inserted, we can now remove it. diff --git a/scripts/expenses/resume_expenses.sh b/scripts/expenses/resume_expenses.sh index a0edea1d06c8e7e02b8a1d4d40856f928b7e6a59..a389304e070ada79745669626a244e262b73f8d5 100755 --- a/scripts/expenses/resume_expenses.sh +++ b/scripts/expenses/resume_expenses.sh @@ -21,11 +21,13 @@ filter=$3 # dateWithoutHyphen example: 201611 dateWithoutHyphen=${date//-} -echo "Processing data with args = $path and ${date}" - input="${path}${date}/${dateWithoutHyphen}_GastosDiretos.csv" output="${path}processed/${dateWithoutHyphen}.csv" +if [ ! -d "${path}processsed" ]; then + mkdir -p "${path}processed" +fi + # About this command: # - Grep removes everyone that does not work in UFPR. # - Tr removes null characters (ctrl + @). diff --git a/scripts/insert_data.sh b/scripts/insert_data.sh index 55dd4a9246c142703833c0b46aa4970b81b083ce..f50741ea2d080bf64b436f29b0524b3f705757f4 100755 --- a/scripts/insert_data.sh +++ b/scripts/insert_data.sh @@ -9,8 +9,8 @@ fi # First, insert Expenses data. (cd expenses && ./insert_expenses.sh $1 $2 $3 $4) -# Now, insert Workers data. -(cd workers && ./insert_register_payment.sh $1 $2 $3 $4) - # We should now insert Travel allowance data. (cd travel_allowances && ./insert_travel_allowances.sh $1 $2 $3 $4) + +# Now, insert Workers data. +(cd workers && ./insert_register_payment.sh $1 $2 $3 $4) diff --git a/scripts/travel_allowances/insert_travel_allowances.sh b/scripts/travel_allowances/insert_travel_allowances.sh index 1fb48b6e5b50e76e82f80291f6fbf35b61270b0c..844870e5fbfd2476261a866ee33d33b3001653b6 100755 --- a/scripts/travel_allowances/insert_travel_allowances.sh +++ b/scripts/travel_allowances/insert_travel_allowances.sh @@ -32,9 +32,6 @@ configPath="../../configs/travel_allowance/logstash/" source config.sh -if [ ! -d "$dataPath" ]; then - mkdir "$dataPath" -fi if [ ! -d "$path/processed" ]; then mkdir -p "$path/processed" fi @@ -44,7 +41,7 @@ fi # Step 1: # Create directory to store files -mkdir $path$ym +mkdir -p $path$ym # Download files request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&consulta=Diarias' @@ -57,9 +54,9 @@ unzip $path$ym/${1}${2}_Diarias.zip -d $path$ym/ rm $path$ym/${1}${2}_Diarias.zip # Step 2: -./create_travel_allowance_config.py $1 $2 $day $index $host $3 $4 +./create_travel_allowance_config.py $1 $2 "$day" "$index" "$host" $3 $4 # Step 3: -./resume_travel_allowance.sh $path ${1}-${2} $filter +./resume_travel_allowance.sh $path ${1}-${2} "$filter" # Step 4: logstash -f ../../configs/travel_allowance/logstash/config-${1}-${2} < ${path}processed/${1}${2}.csv diff --git a/scripts/travel_allowances/resume_travel_allowance.sh b/scripts/travel_allowances/resume_travel_allowance.sh index a64699e0b02249803f485a3a884323e33729779b..ecac41260db8bf8dc5ef84c144d9400ee0b8c30c 100755 --- a/scripts/travel_allowances/resume_travel_allowance.sh +++ b/scripts/travel_allowances/resume_travel_allowance.sh @@ -19,11 +19,13 @@ if [ "$#" -ne 3 ]; then exit fi -echo "Processing data with args = $path and ${date}" - input="${path}${date}/${dateWithoutHyphen}_Diarias.csv" output="${path}processed/${dateWithoutHyphen}.csv" +if [ ! -d "${path}processsed" ]; then + mkdir -p "${path}processed" +fi + # About this command: # - Grep removes everyone that does not work in UFPR. # - Tr removes null characters (ctrl + @). diff --git a/scripts/workers/insert_register_payment.sh b/scripts/workers/insert_register_payment.sh index 63fba591e92f1c802f762328bbc2aaacdf999898..cdc4ee323c3a7de30bcde69caa5ad37c5a7f063e 100755 --- a/scripts/workers/insert_register_payment.sh +++ b/scripts/workers/insert_register_payment.sh @@ -25,20 +25,24 @@ fi ym=$1-$2 dataPath="../../data/" path="../../data/workers/" +configPath="../../configs/workers/" source config.sh # Check if Data and Workers directories already exist: -if [ ! -d "$dataPath" ]; then - mkdir "$dataPath" -fi if [ ! -d "$path" ]; then - mkdir "$path" + mkdir -p "$path" +fi +if [ ! -d "$configPath/json" ]; then + mkdir -p "$configPath/json" +fi +if [ ! -d "$configPath/logstash" ]; then + mkdir -p "$configPath/logstash" fi # Step 1: # Create directory to store files -mkdir $path$ym +mkdir -p $path$ym # Download files request='http://arquivos.portaldatransparencia.gov.br/downloads.asp?a='${1}'&m='${2}'&d=C&consulta=Servidores' @@ -55,11 +59,11 @@ day=$(ls $path$ym | grep -m 1 $1$2 | cut -c 7,8) # Step 2: # Create config files -./create_config.py $1 $2 $day $index $host $3 $4 +./create_config.py $1 $2 "$day" "$index" "$host" $3 $4 # Step 3: # Start processing -./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json $filter +./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json "$filter" # Step 4: # Insert data in ElasticSearch diff --git a/scripts/workers/resume_register.sh b/scripts/workers/resume_register.sh index 71f132c296d396b92126eb09894b0bf750ebf37c..ea36653ea152363bb7c44ef88cad1769787cca0d 100755 --- a/scripts/workers/resume_register.sh +++ b/scripts/workers/resume_register.sh @@ -19,22 +19,21 @@ path=$1 date=$2 filter=$3 -echo "Processing data with args = ${path} and ${date}" - input="${path}${date}_Cadastro.csv" output="${path}${date}_Cadastro_Ufpr_Unique.csv" -columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42" +if [ ! -d "${path}" ]; then + mkdir -p "${path}" +fi # About this command: -# - Sed wraps fields in double quotes. +# - Sed wraps fields in double quotes. Its not needed anymore. # - Grep removes everyone that does not work in UFPR. -# - Cut selects the important columns. # - Uniq removes repeated values. # - Tr removes null characters (ctrl + @). # Get data from all universities. -# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output +# cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output # Get only data from UFPR. cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output