pnad_protocol_from_dic.py 1.65 KB
Newer Older
1 2
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR

This file is part of HOTMapper.

HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
18 19
along with HOTMapper.  If not, see <https://www.gnu.org/licenses/>.
'''
20

Henrique Varella Ehrenfried's avatar
Henrique Varella Ehrenfried committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
import sys
import pandas as pd


protocol = pd.read_csv('mapping_protocols/pnad.csv')

dic = pd.read_excel(sys.argv[1])
year = sys.argv[2]

protocol[year] = protocol['2015']
protocol['p0' + year] = ['' for _ in range(len(protocol[year]))]
protocol['pf' + year] = ['' for _ in range(len(protocol[year]))]

dic.columns = list(dic.loc[0])
dic = dic.loc[4:]
dic = dic.fillna('')
dic = dic[dic['Tamanho'] != '']
res_dic = dic[list(dic.columns)[0:3]]
res_dic.index = res_dic['Código de variável']
# print(res_dic)
col_list = list(res_dic.index)

for i, row in protocol.iterrows():
    if row[year] in col_list:
        protocol.loc[i, 'p0' + year] = res_dic.loc[row[year]]['Posição Inicial']
        protocol.loc[i, 'pf' + year] = res_dic.loc[row[year]]['Tamanho']
        # print(res_dic.loc[row[year]]['Posição Inicial'])
        # print(res_dic.loc[row[year]]['Tamanho'])

protocol.to_csv(sys.stdout)