pnad_protocol_from_dic.py 1.65 KB
Newer Older
1 2
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR

This file is part of HOTMapper.

HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
18 19
along with HOTMapper.  If not, see <https://www.gnu.org/licenses/>.
'''
20

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
import sys
import pandas as pd


protocol = pd.read_csv('mapping_protocols/pnad.csv')

dic = pd.read_excel(sys.argv[1])
year = sys.argv[2]

protocol[year] = protocol['2015']
protocol['p0' + year] = ['' for _ in range(len(protocol[year]))]
protocol['pf' + year] = ['' for _ in range(len(protocol[year]))]

dic.columns = list(dic.loc[0])
dic = dic.loc[4:]
dic = dic.fillna('')
dic = dic[dic['Tamanho'] != '']
res_dic = dic[list(dic.columns)[0:3]]
res_dic.index = res_dic['Código de variável']
# print(res_dic)
col_list = list(res_dic.index)

for i, row in protocol.iterrows():
    if row[year] in col_list:
        protocol.loc[i, 'p0' + year] = res_dic.loc[row[year]]['Posição Inicial']
        protocol.loc[i, 'pf' + year] = res_dic.loc[row[year]]['Tamanho']
        # print(res_dic.loc[row[year]]['Posição Inicial'])
        # print(res_dic.loc[row[year]]['Tamanho'])

protocol.to_csv(sys.stdout)