diff --git a/scripts/expenses/config.sh b/scripts/expenses/config.sh new file mode 100644 index 0000000000000000000000000000000000000000..970088920ef966d3bce9b5d68538b02600f0baa2 --- /dev/null +++ b/scripts/expenses/config.sh @@ -0,0 +1,19 @@ +# This file only contains some config variables: + +# Index prefix: The prefix of the index in elasticsearch. Ex: gastos + +index="gastos-pagamentos" + +# Filter: The string that will be used on 'egrep' to filter data to get only relevant universities. +# Ex: Getting only UFPR: +# filter="UNIVERSIDADE FEDERAL DO PARANA" +# Getting UFPR and UFMG: +# filter="UNIVERSIDADE FEDERAL DO PARANA|UNIVERSIDADE FEDERAL DE MINAS GERAIS" +# Getting all universities: +# filter="UNIVERSIDADE FEDERAL*" + +filter="UNIVERSIDADE FEDERAL DO PARANA|UNIVERSIDADE FEDERAL DE MINAS GERAIS|UNIVERSIDADE FEDERAL DE SANTA CATARINA|UNIVERSIDADE FEDERAL DE PERNAMBUCO|UNIVERSIDADE FEDERAL DE SANTA MARIA" + +# Host: ElasticSearch's host. Ex: "localhost" + +host="localhost" diff --git a/scripts/expenses/create_expenses_config.py b/scripts/expenses/create_expenses_config.py index a40f90735e2863be17e56236dd755098b29b9adf..0c86e6a41477c07151c2a3bf6ce7dca3ea55d48d 100755 --- a/scripts/expenses/create_expenses_config.py +++ b/scripts/expenses/create_expenses_config.py @@ -10,8 +10,8 @@ import sys, csv, json, math, subprocess from pathlib import Path from subprocess import call -if len(sys.argv) != 6: - print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <username> <password>") +if len(sys.argv) != 8: + print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <index> <host> <username> <password>") sys.exit() with open('logstash_config.example') as infile: @@ -19,8 +19,10 @@ with open('logstash_config.example') as infile: output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00' , "date": sys.argv[1] + '-' + sys.argv[2] - , "user": sys.argv[4] - , "password": sys.argv[5] } + , "index": sys.argv[4] + , "host": sys.argv[5] + , "user": sys.argv[6] + , "password": sys.argv[7] } with open('../../configs/expenses/logstash/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile: outfile.write(output) diff --git a/scripts/expenses/insert_expenses.sh b/scripts/expenses/insert_expenses.sh index a05e7a72584142e9db7ba98811a67caf687f058b..1878082d9eb6fce75281a61f9c188dbc49e23709 100755 --- a/scripts/expenses/insert_expenses.sh +++ b/scripts/expenses/insert_expenses.sh @@ -19,7 +19,7 @@ fi # Getting the Last day of this month (Using date 2016-05-15 as example): # First, get next month (201606). -aux=$(date +%Y%m -d "$(date +%Y%m15) next month") +aux=$(date +%Y%m -d "$(date +${1}${2}15) next month") # Append day 01 (20160601). temp=$(date -d "${aux}01") # Remove 1 day: 20160531, get only day: 31. @@ -50,9 +50,13 @@ unzip $path$ym/${1}${2}_GastosDiretos.zip -d $path$ym/ # Remove zip file rm $path$ym/${1}${2}_GastosDiretos.zip +source config.sh + # Step 2: -./create_expenses_config.py $1 $2 $day $3 $4 +./create_expenses_config.py $1 $2 $day $index $host $3 $4 # Step 3: -./resume_expenses.sh ../../data/expenses/ ${1}-${2} +./resume_expenses.sh ../../data/expenses/ ${1}-${2} $filter # Step 4: logstash -f ../../configs/expenses/logstash/config-${1}-${2} < ../../data/expenses/processed/${1}${2}.csv +# Data inserted, we can now remove it. +rm ../../data/expenses/processed/${1}${2}.csv diff --git a/scripts/expenses/logstash_config.example b/scripts/expenses/logstash_config.example index c5b0ee6c8f3f1f5957dee510c2cf5dcbb767d602..b1f680f832409f4a5bac56aabe7fe24406e3a2fd 100644 --- a/scripts/expenses/logstash_config.example +++ b/scripts/expenses/logstash_config.example @@ -41,8 +41,8 @@ output { action => "index" user => "%(user)s" password => "%(password)s" - hosts => "localhost:9200" - index => "ufpr-gastos-pagamentos-%(date)s" + hosts => "http://%(host)s:9200" + index => "ufpr-%(index)s-%(date)s" workers => 1 } stdout {} diff --git a/scripts/expenses/resume_expenses.sh b/scripts/expenses/resume_expenses.sh index 7ffe50f2f52d6f80d82e6ee74d4d10bdab387f2d..a0edea1d06c8e7e02b8a1d4d40856f928b7e6a59 100755 --- a/scripts/expenses/resume_expenses.sh +++ b/scripts/expenses/resume_expenses.sh @@ -7,18 +7,20 @@ # Output: A CSV file in folder processed, filtering the data to get only relevant data (in our case, from UFPR). +if [ "$#" -ne 3 ]; then + echo "Usage: $0 <path> <date> <filter>" + exit +fi + # Path example: ../../data/expenses/ path=$1 # Date example: 2016-11 date=$2 +# Filter example: UNIVERSIDADE FEDERAL DO PARANA +filter=$3 # dateWithoutHyphen example: 201611 dateWithoutHyphen=${date//-} -if [ "$#" -ne 2 ]; then - echo "Usage: $0 <path> <date>" - exit -fi - echo "Processing data with args = $path and ${date}" input="${path}${date}/${dateWithoutHyphen}_GastosDiretos.csv" @@ -27,7 +29,6 @@ output="${path}processed/${dateWithoutHyphen}.csv" # About this command: # - Grep removes everyone that does not work in UFPR. # - Tr removes null characters (ctrl + @). -# - Head -n1 gets first line (column names). Then, I append the data. -head -n1 $input > $output -cat $input | egrep --binary-files=text "UNIVERSIDADE FEDERAL DO PARANA" | tr -d '\000' >> $output +cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output +rm $input diff --git a/scripts/travel_allowances/config.sh b/scripts/travel_allowances/config.sh new file mode 100644 index 0000000000000000000000000000000000000000..a1897f5da8f7d6dabac198352f7631ddf2b6faf4 --- /dev/null +++ b/scripts/travel_allowances/config.sh @@ -0,0 +1,19 @@ +# This file only contains some config variables: + +# Index prefix: The prefix of the index in elasticsearch. Ex: gastos + +index="gastos-diarias" + +# Filter: The string that will be used on 'egrep' to filter data to get only relevant universities. +# Ex: Getting only UFPR: +# filter="UNIVERSIDADE FEDERAL DO PARANA" +# Getting UFPR and UFMG: +# filter="UNIVERSIDADE FEDERAL DO PARANA|UNIVERSIDADE FEDERAL DE MINAS GERAIS" +# Getting all universities: +# filter="UNIVERSIDADE FEDERAL*" + +filter="UNIVERSIDADE FEDERAL DO PARANA|UNIVERSIDADE FEDERAL DE MINAS GERAIS|UNIVERSIDADE FEDERAL DE SANTA CATARINA|UNIVERSIDADE FEDERAL DE PERNAMBUCO|UNIVERSIDADE FEDERAL DE SANTA MARIA" + +# Host: ElasticSearch's host. Examples: "localhost" + +host="localhost" diff --git a/scripts/travel_allowances/create_travel_allowance_config.py b/scripts/travel_allowances/create_travel_allowance_config.py index 10e00d090feff213bbbf2b0ed3d79c6b8d2b63c2..57434484eb935d692418a751286e87010dcacdab 100755 --- a/scripts/travel_allowances/create_travel_allowance_config.py +++ b/scripts/travel_allowances/create_travel_allowance_config.py @@ -10,8 +10,8 @@ import sys, csv, json, math, subprocess from pathlib import Path from subprocess import call -if len(sys.argv) != 6: - print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <username> <password>") +if len(sys.argv) != 8: + print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <index> <host> <username> <password>") sys.exit() with open('logstash_config.example') as infile: @@ -19,8 +19,10 @@ with open('logstash_config.example') as infile: output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00' , "date": sys.argv[1] + '-' + sys.argv[2] - , "user": sys.argv[4] - , "password": sys.argv[5] } + , "index": sys.argv[4] + , "host": sys.argv[5] + , "user": sys.argv[6] + , "password": sys.argv[7] } with open('../../configs/travel_allowance/logstash/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile: outfile.write(output) diff --git a/scripts/travel_allowances/insert_travel_allowances.sh b/scripts/travel_allowances/insert_travel_allowances.sh index d850b385edba13a1c6f98e513d63cbde428998d1..1fb48b6e5b50e76e82f80291f6fbf35b61270b0c 100755 --- a/scripts/travel_allowances/insert_travel_allowances.sh +++ b/scripts/travel_allowances/insert_travel_allowances.sh @@ -19,7 +19,7 @@ fi # Getting the Last day of this month (Using date 2016-05-15 as example): # First, get next month (201606). -aux=$(date +%Y%m -d "$(date +%Y%m15) next month") +aux=$(date +%Y%m -d "$(date +${1}${2}15) next month") # Append day 01 (20160601). temp=$(date -d "${aux}01") # Remove 1 day: 20160531, get only day: 31. @@ -30,6 +30,8 @@ dataPath="../../data/" path="../../data/travel_allowance/" configPath="../../configs/travel_allowance/logstash/" +source config.sh + if [ ! -d "$dataPath" ]; then mkdir "$dataPath" fi @@ -55,8 +57,11 @@ unzip $path$ym/${1}${2}_Diarias.zip -d $path$ym/ rm $path$ym/${1}${2}_Diarias.zip # Step 2: -./create_travel_allowance_config.py $1 $2 $day $3 $4 +./create_travel_allowance_config.py $1 $2 $day $index $host $3 $4 # Step 3: -./resume_travel_allowance.sh $path ${1}-${2} +./resume_travel_allowance.sh $path ${1}-${2} $filter # Step 4: logstash -f ../../configs/travel_allowance/logstash/config-${1}-${2} < ${path}processed/${1}${2}.csv + +# Remove processed file +rm ${path}processed/${1}${2}.csv diff --git a/scripts/travel_allowances/logstash_config.example b/scripts/travel_allowances/logstash_config.example index 50afa9de04a11630f14672bd244323be8170eeb4..20dc7e9163f884756bb30cf9458b08e1513d219a 100644 --- a/scripts/travel_allowances/logstash_config.example +++ b/scripts/travel_allowances/logstash_config.example @@ -41,8 +41,8 @@ output { action => "index" user => "%(user)s" password => "%(password)s" - hosts => "localhost:9200" - index => "ufpr-gastos-diarias-%(date)s" + hosts => "http://%(host)s:9200" + index => "ufpr-%(index)s-%(date)s" workers => 1 } stdout {} diff --git a/scripts/travel_allowances/resume_travel_allowance.sh b/scripts/travel_allowances/resume_travel_allowance.sh index 22c302aceb349a5be0e6b3efd7b1def7225b3978..a64699e0b02249803f485a3a884323e33729779b 100755 --- a/scripts/travel_allowances/resume_travel_allowance.sh +++ b/scripts/travel_allowances/resume_travel_allowance.sh @@ -14,8 +14,8 @@ date=$2 # dateWithoutHyphen example: 201611 dateWithoutHyphen=${date//-} -if [ "$#" -ne 2 ]; then - echo "Usage: $0 <path> <date>" +if [ "$#" -ne 3 ]; then + echo "Usage: $0 <path> <date> <filter>" exit fi @@ -27,7 +27,5 @@ output="${path}processed/${dateWithoutHyphen}.csv" # About this command: # - Grep removes everyone that does not work in UFPR. # - Tr removes null characters (ctrl + @). -# - Head -n1 gets first line (column names). Then, I append the data. -head -n1 $input > $output -cat $input | egrep --binary-files=text "UNIVERSIDADE FEDERAL DO PARANA" | tr -d '\000' >> $output +cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output diff --git a/scripts/workers/config.sh b/scripts/workers/config.sh new file mode 100644 index 0000000000000000000000000000000000000000..b3fe041cb596b3a562e62ef1448356f37502522e --- /dev/null +++ b/scripts/workers/config.sh @@ -0,0 +1,19 @@ +# This file only contains some config variables: + +# Index prefix: The prefix of the index in elasticsearch. Ex: gastos + +index="servidores" + +# Filter: The string that will be used on 'egrep' to filter data to get only relevant universities. +# Ex: Getting only UFPR: +# filter="UNIVERSIDADE FEDERAL DO PARANA" +# Getting UFPR and UFMG: +# filter="UNIVERSIDADE FEDERAL DO PARANA|UNIVERSIDADE FEDERAL DE MINAS GERAIS" +# Getting all universities: +# filter="UNIVERSIDADE FEDERAL*" + +filter="UNIVERSIDADE FEDERAL DO PARANA|UNIVERSIDADE FEDERAL DE MINAS GERAIS|UNIVERSIDADE FEDERAL DE SANTA CATARINA|UNIVERSIDADE FEDERAL DE PERNAMBUCO|UNIVERSIDADE FEDERAL DE SANTA MARIA" + +# Host: ElasticSearch's host. Examples: "localhost" + +host="localhost" diff --git a/scripts/workers/create_config.py b/scripts/workers/create_config.py index 5e5f74f7a5bedda9d46c1b460703e14831708bf1..9bc0b15dae124b78e73ccf50dc8abdb1f8810a46 100755 --- a/scripts/workers/create_config.py +++ b/scripts/workers/create_config.py @@ -12,8 +12,8 @@ import sys, csv, json, math, subprocess from pathlib import Path from subprocess import call -if len(sys.argv) != 6: - print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <username> <password>") +if len(sys.argv) != 8: + print("Usage: " + sys.argv[0] + " <year (2016)> <month (01)> <day (31)> <index> <host> <username> <password>") sys.exit() data = { @@ -41,8 +41,10 @@ else: output = example % { "timestamp": sys.argv[3] + '/' + sys.argv[2] + '/' + sys.argv[1] + ' 00:00:00' , "date": sys.argv[1] + '-' + sys.argv[2] - , "user": sys.argv[4] - , "password": sys.argv[5] } + , "index": sys.argv[4] + , "host": sys.argv[5] + , "user": sys.argv[6] + , "password": sys.argv[7] } with open('../../configs/workers/logstash/config-' + sys.argv[1] + '-' + sys.argv[2], 'w') as outfile: outfile.write(output) diff --git a/scripts/workers/insert_register_payment.sh b/scripts/workers/insert_register_payment.sh index 3b06efd8e53160492d57faadec13e8117694edc5..63fba591e92f1c802f762328bbc2aaacdf999898 100755 --- a/scripts/workers/insert_register_payment.sh +++ b/scripts/workers/insert_register_payment.sh @@ -26,6 +26,8 @@ ym=$1-$2 dataPath="../../data/" path="../../data/workers/" +source config.sh + # Check if Data and Workers directories already exist: if [ ! -d "$dataPath" ]; then mkdir "$dataPath" @@ -51,15 +53,17 @@ rm $path$ym/${1}${2}_Servidores.zip # Get day day=$(ls $path$ym | grep -m 1 $1$2 | cut -c 7,8) - # Step 2: # Create config files -./create_config.py $1 $2 $day $3 $4 +./create_config.py $1 $2 $day $index $host $3 $4 # Step 3: # Start processing -./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json +./merge_files_es.py ../../configs/workers/json/config-${1}-${2}.json $filter # Step 4: # Insert data in ElasticSearch logstash -f ../../configs/workers/logstash/config-${1}-${2} < ../../data/workers/processed/${1}${2}.csv + +# Remove data +rm ../../data/workers/processed/${1}${2}.csv diff --git a/scripts/workers/logstash_config.example b/scripts/workers/logstash_config.example index 568ac5a3af1596954e21378c9fdf29df24304b83..0383a39101a6e78c4e2bf12ddc6f7ebc8a05ebcf 100644 --- a/scripts/workers/logstash_config.example +++ b/scripts/workers/logstash_config.example @@ -58,8 +58,8 @@ output { action => "index" user => "%(user)s" password => "%(password)s" - hosts => "http://node1.c3sl.ufpr.br:9200" - index => "ufpr-servidores-%(date)s" + hosts => "http://$(host):9200" + index => "ufpr-$(index)s-%(date)s" workers => 1 } stdout {} diff --git a/scripts/workers/logstash_config_2013.example b/scripts/workers/logstash_config_2013.example index f8c90318e3c207a4cd7c442ac40bab3744114194..b7af5c02e6d2429417148f80e85ee5414c09e0d4 100644 --- a/scripts/workers/logstash_config_2013.example +++ b/scripts/workers/logstash_config_2013.example @@ -58,8 +58,8 @@ output { action => "index" user => "%(user)s" password => "%(password)s" - hosts => "http://node1.c3sl.ufpr.br:9200" - index => "ufpr-servidores-%(date)s" + hosts => "http://$(host)s:9200" + index => "ufpr-%(index)s-%(date)s" workers => 1 } stdout {} diff --git a/scripts/workers/merge_files_es.py b/scripts/workers/merge_files_es.py index 6dccc2471b8a343402ffa278f76a5a8a4a68f81a..68b3175186b6577493b519d3ce3a0748eb3dbb3b 100755 --- a/scripts/workers/merge_files_es.py +++ b/scripts/workers/merge_files_es.py @@ -22,8 +22,8 @@ import sys, csv, json, math, subprocess from pathlib import Path from subprocess import call -if len(sys.argv) != 2: - print("Usage: " + sys.argv[0] + " <config.json>") +if len(sys.argv) != 3: + print("Usage: " + sys.argv[0] + " <config.json> <filter>") sys.exit() with open(sys.argv[1]) as f: @@ -48,7 +48,7 @@ title1 = csv_1.pop(0) file_exists = Path(file2) if not file_exists.is_file(): print("File2 does not exist. Calling script resume_register to create it...") - call(["./resume_register.sh " + params['path'] + " " + params['date']], shell=True) + call(["./resume_register.sh " + params['path'] + " " + params['date'] + " " + sys.argv[2]], shell=True) with open(file2, newline='', encoding='Windows-1252') as f: csv_2 = [ i for i in csv.reader(f, 'dialect') ] diff --git a/scripts/workers/resume_register.sh b/scripts/workers/resume_register.sh index eb5cf9de6396cc3742b19278149587caafd332e6..71f132c296d396b92126eb09894b0bf750ebf37c 100755 --- a/scripts/workers/resume_register.sh +++ b/scripts/workers/resume_register.sh @@ -10,14 +10,15 @@ # Output: CSV file named YearMonthDay_Cadastro_Ufpr_Unique.csv, in the $path folder. # Example of CSV location (using same parameters as input): ../../data/workers/2016-10/20161031_Cadastro_Ufpr_Unique.csv -path=$1 -date=$2 - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 <path> <date>" +if [ "$#" -ne 3 ]; then + echo "Usage: $0 <path> <date> <filter>" exit fi +path=$1 +date=$2 +filter=$3 + echo "Processing data with args = ${path} and ${date}" input="${path}${date}_Cadastro.csv" @@ -36,4 +37,4 @@ columns="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 # cat $input | egrep --binary-files=text "(UNIVERSIDADE FED*|Id_SERVIDOR_PORTAL NOME)" | sed -e 's/"//g' -e 's/^\|$/"/g' -e 's/\t/"\t"/g' | tr -d '\000' > $output # Get only data from UFPR. -cat $input | egrep --binary-files=text "(UNIVERSIDADE FEDERAL DO PARANA|Id_SERVIDOR_PORTAL NOME)" | tr -d '\000' > $output +cat $input | egrep --binary-files=text "$filter" | tr -d '\000' > $output