protocols_comparison.py 1.67 KB
Newer Older
1 2
'''
Copyright (C) 2016 Centro de Computacao Cientifica e Software Livre
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Departamento de Informatica - Universidade Federal do Parana - C3SL/UFPR

This file is part of HOTMapper.

HOTMapper is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

HOTMapper is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
18 19
along with HOTMapper.  If not, see <https://www.gnu.org/licenses/>.
'''
20

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
import pandas as pd
import os

from database.protocol import Protocol


full_frame = pd.DataFrame([])

file_list = [f for f in os.listdir('./mapping_protocols') if os.path.isfile(os.path.join('./mapping_protocols', f))]

for f in file_list:
    if not f.endswith('.csv'):
        continue
    p = Protocol()
    p.load_csv(os.path.join('./mapping_protocols', f))

    df = p._dataframe

    df = df[df['2015'] != '']
    df = df.set_index(df['2015'])
    df = df['Var.Lab']
    df.name = f
    df = df.to_frame()
    full_frame = pd.concat([full_frame, df], axis=1)

full_frame['unique'] = pd.Series(index=full_frame.index)

for i, row in full_frame.iterrows():
    uniques = len(row.dropna().unique())
    if uniques > 1:
        full_frame['unique'][i] = False
    else:
        full_frame['unique'][i] = True

full_frame = full_frame[full_frame['unique'] == False]
open('output.csv', 'w').write(full_frame.to_csv())