Commit 7e71639e authored by Alexandre Peres Arias's avatar Alexandre Peres Arias
Browse files

add distancia hamming


Signed-off-by: Alexandre Peres Arias's avataralexandre <apa13@inf.ufpr.br>
parent 360f936a
Pipeline #6918 failed with stage
in 15 seconds
all:
gcc -o ham hamming.c -std=c99 -lm -Wall -Wno-unused-result -g -O3 -lpthread
#TODO: ajeitar o makefile
int *hamming(int **M, int *user ){
int aval=0, dist=0;
//qntos items avaliados para esse usuario
for(int j=0; j<COL; ++j) aval+=user[j];
//copia chars para unsigned long para o posterior AND
//for(int j=0, int i=0; i<COL; i+=4, ++j)
// memcpy(buf[j],mat[i],(size_t) 4);
for(int i=0; i<LIN; ++i){
buf = M[i];
for(int j=0; j<COL; ++j) //fazer unroll desse laco
dist += user[j] & buf[j];
for(int k=0; k<K; ++k)
if(dist > hamm[k].dis){
hamm[k].dis = dist;
hamm[k].ind = i;
}
}
}
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <float.h>
#include <pthread.h>
#include <unistd.h>
// MUDAR DE DIST EUCL PARA DIST COM COSSENOS OU MAHALANOBIS, MELHOR AINDA É DISTANCIA DE HAMMING!!!
// ESQUECER A BASE DE TREINO
typedef struct eucl{
int ind;
float dist;
} eucl_vet;
int lab_flag;
int N_THREADS,N,DOM;
int H1,H2,W,k;
long unsigned int erros;
int **label, *label_res;
eucl_vet **min_dists;
float *test_base, *train_base;
extern inline float *alloc_data(FILE *f, int h, int lab_flag);
extern inline void* calc_dist(void *in);
extern inline void insere_dist(float dist, int ind, int j);
extern inline float eucl_dist(float *v1, int i1, float *v2, int i2);
extern inline void cmp_dists(int lin, int ind_th);
extern inline void *calc_dist(void *in);
int **conf_matrix(int N);
/* ########################################################## */
/*-----------------------------------------------------------------------------
* Aloca as matrizes de teste e treino já guardando seus labels */
extern inline float *alloc_data(FILE *f, int h, int lab_flag){
float *data = (float*) malloc(sizeof(float)*h*W);
for(int i=0; i<h; ++i){
for(int j=0; j<W; ++j)
fscanf(f, "%f", &data[i*W+j]);
fscanf(f,"%d",&label[lab_flag][i]);
}
return data;
}
/*-----------------------------------------------------------------------------
* Função principal da thread. Invoca e controla o processo. Cada thread tem um
* domínio para calcular e salvar as distâncias*/
extern inline void *calc_dist(void *in){
float dist;
int ind_th = *((int*)in);
int max = ind_th*DOM + DOM;
if(ind_th == (N_THREADS-1) ) max = H1;
printf("***** THREAD %d domini = [%d,%d]\n",ind_th, ind_th*DOM,(max-1));
for(int i=(ind_th*DOM); i<max; ++i){
for(int j=0; j<H2; ++j){
dist = eucl_dist(test_base, i*W, train_base, j*W);
insere_dist(dist, j, ind_th);
}
cmp_dists(i,ind_th);
// zera o buffer das distancias
for(int l=0;l<k;++l){
min_dists[ind_th][l].ind = -1;
min_dists[ind_th][l].dist = INT_MAX;
}
}
return NULL;
}
/*-----------------------------------------------------------------------------
* Cálculo das dist. euclidianas*/
extern inline float eucl_dist(float *v1, int i1, float *v2, int i2){
float dist=0.0;
float sub[6];
int i,j;
for(i=i1,j=i2; i<(i1+W); i+=6, j+=6){
sub[1] = v1[i] - v2[j];
sub[2] = v1[i+1] - v2[j+1];
sub[3] = v1[i+2] - v2[j+2];
sub[4] = v1[i+3] - v2[j+3];
sub[5] = v1[i+4] - v2[j+4];
sub[6] = v1[i+5] - v2[j+5];
dist += sub[1]*sub[1] + sub[2]*sub[2] + sub[3]*sub[3] + sub[4]*sub[4] + sub[5]*sub[5] + sub[6]*sub[6];
}
return sqrt(dist);
}
/*-----------------------------------------------------------------------------
* Insere as dist. euclidianas em vetor temporario para cada thread */
extern inline void insere_dist(float dist, int lin, int ind_th){
int maior=0;
for(int i=1; i<k; ++i)
if(min_dists[ind_th][i].dist > min_dists[ind_th][maior].dist)
maior = i;
if(min_dists[ind_th][maior].dist > dist){
min_dists[ind_th][maior].dist = dist;
min_dists[ind_th][maior].ind = lin;
return;
}
}
/* ----------------------------------------------------------------------------
* Compara qual o vizinho mais frequente do vetor temporario e atribui esse
* label para o dado atual */
extern inline void cmp_dists(int lin, int ind_th){
int max=0, count=1;
int tmp=-1, lab=-1;
for( int i=0; i<k; ++i ){
tmp = label[1][min_dists[ind_th][i].ind];
for( int j=i+1; j<k; ++j )
if( tmp == label[1][min_dists[ind_th][j].ind] )
++count;
if( count > max ){
max = count;
lab = tmp;
}
count=1;
}
label_res[lin] = lab;
}
/*-----------------------------------------------------------------------------
* Cria a matriz de confusao */
int **conf_matrix(int N){
int **m;
int labi, labr;
m = (int**)malloc(sizeof(int*)*N);
for(int i=0; i<N; ++i){
m[i] = (int*)malloc(sizeof(int)*N);
for(int j=0; j<N; ++j)
m[i][j]=0;
}
for(int i=0; i<H1; ++i){
labi = label[0][i];
labr = label_res[i];
if( labi != labr ){
++erros;
//printf("Err em %d labi=%d, labr=%d\n", i, labi, labr);
++m[labi][labr];
}else
++m[labi][labi];
}
return m;
}
/* ##################################################### */
int main(int argc, char *argv[]){
if(argc < 5){puts("ERRO ENTRADA");exit(1);}
FILE *f1 = fopen(argv[1],"r");
FILE *f2 = fopen(argv[2],"r");
k = atoi(argv[3]); //num de vizinhos para comparação
N = 10; //num. de classes
N_THREADS = atoi(argv[4]);
if( fscanf(f1,"%d",&H1) == EOF ) {puts("ERRO DE LEITURA 1"); exit(1);}
if( fscanf(f1,"%d",&W) == EOF ) {puts("ERRO DE LEITURA 2"); exit(1);}
if( fscanf(f2,"%d",&H2) == EOF ) {puts("ERRO DE LEITURA 3"); exit(1);}
if( fscanf(f2,"%d",&W) == EOF ) {puts("ERRO DE LEITURA 4"); exit(1);}
label = (int**) malloc(sizeof(int*)*2);
label[0] = (int*) malloc(sizeof(int)*H1);
label[1] = (int*) malloc(sizeof(int)*H2);
label_res = (int*) malloc(sizeof(int)*H1);
DOM = H1/N_THREADS;
min_dists = (eucl_vet**) malloc(sizeof(eucl_vet*)*N_THREADS);
for(int i=0; i<N_THREADS; ++i)
min_dists[i] = (eucl_vet*) malloc(sizeof(eucl_vet)*k);
for(int i=0; i<N_THREADS; ++i)
for(int j=0; j<k; ++j)
min_dists[i][j].dist = FLT_MAX;
printf("\n***** Dominio Per Thread = %d \n",DOM);
test_base = alloc_data(f1,H1,0);
puts("***** Teste Alocado");
train_base = alloc_data(f2,H2,1);
puts("***** Treino Alocado ");
printf("***** H1 = %d, H2= %d\n", H1, H2);
fclose(f1);
fclose(f2);
// THREADS
pthread_t *threads = (pthread_t*) malloc(sizeof(pthread_t)*N_THREADS);
int k[N_THREADS];
for(int i=0; i<N_THREADS; ++i){
k[i]=i;
pthread_create(&(threads[i]), NULL, calc_dist, &k[i]);
}
sleep(1);
puts("***** PROCESSANDO...");
for(int i=0; i<N_THREADS; ++i)
pthread_join(threads[i], NULL);
erros=0;
puts("");
puts("***** TERMINOU PROCESSAMENTO!");
puts("");
puts("***** Matriz de Confusão:");
int** matr_conf = conf_matrix(N);
for(int i=0;i<N;++i){
for(int j=0;j<N;++j)
printf(" %5d|",matr_conf[i][j]);
printf("\n");
}
printf("\n##### ");
printf("Taxa de erro comparada com a base de teste: %f\
\nFIM DE EXECUCAO\n",(float)((float)erros/(float)H1));
/*############################################################################*/
free(test_base);
free(train_base);
for(int i=0; i<N_THREADS; ++i) free(min_dists[i]);
free(min_dists);
free(label[0]);
free(label[1]);
free(label_res);
for(int i=0; i<N ;++i) free(matr_conf[i]);
free(matr_conf);
free(threads);
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment