Commit 9ceb03bc authored by Alexandre Peres Arias's avatar Alexandre Peres Arias

add knn

Signed-off-by: Alexandre Peres Arias's avatarAlexandre Peres Arias <apa13@inf.ufpr.br>
parent 81b1ef68
all:
gcc -o knn knn_float_ok.c -std=c99 -lm -Wall -Wno-unused-result -g -O3 -lpthread
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <float.h>
#include <pthread.h>
#include <unistd.h>
// CONSERTAR A PORRA DA MATRIZ DE CONFUSAO !! ERRO NO LABEL_RES TAMBEM
typedef struct eucl{
int ind;
float dist;
} eucl_vet;
int lab_flag;
int N_THREADS,N,DOM;
int H1,H2,W,k;
long unsigned int erros;
int **label, *label_res;
eucl_vet **min_dists;
float *test_base, *train_base;
extern inline float *alloc_data(FILE *f, int h, int lab_flag);
extern inline void* calc_dist(void *in);
extern inline void insere_dist(float dist, int ind, int j);
extern inline float eucl_dist(float *v1, int i1, float *v2, int i2);
extern inline void cmp_dists(int lin, int ind_th);
extern inline void *calc_dist(void *in);
int **conf_matrix(int N);
/* ########################################################## */
/*-----------------------------------------------------------------------------
* Aloca as matrizes de teste e treino já guardando seus labels */
extern inline float *alloc_data(FILE *f, int h, int lab_flag){
float *data = (float*) malloc(sizeof(float)*h*W);
for(int i=0; i<h; ++i){
for(int j=0; j<W; ++j)
fscanf(f, "%f", &data[i*W+j]);
fscanf(f,"%d",&label[lab_flag][i]);
}
return data;
}
/*-----------------------------------------------------------------------------
* Função principal da thread. Invoca e controla o processo. Cada thread tem um
* domínio para calcular e salvar as distâncias*/
extern inline void *calc_dist(void *in){
float dist;
int ind_th = *((int*)in);
int max = ind_th*DOM + DOM;
if(ind_th == (N_THREADS-1) ) max = H1;
printf("***** THREAD %d domini = [%d,%d]\n",ind_th, ind_th*DOM,(max-1));
for(int i=(ind_th*DOM); i<max; ++i){
for(int j=0; j<H2; ++j){
dist = eucl_dist(test_base, i*W, train_base, j*W);
insere_dist(dist, j, ind_th);
}
cmp_dists(i,ind_th);
// zera o buffer das distancias
for(int l=0;l<k;++l){
min_dists[ind_th][l].ind = -1;
min_dists[ind_th][l].dist = INT_MAX;
}
}
return NULL;
}
/*-----------------------------------------------------------------------------
* Cálculo das dist. euclidianas*/
extern inline float eucl_dist(float *v1, int i1, float *v2, int i2){
float dist=0.0;
float sub[6];
int i,j;
for(i=i1,j=i2; i<(i1+W); i+=6, j+=6){
sub[1] = v1[i] - v2[j];
sub[2] = v1[i+1] - v2[j+1];
sub[3] = v1[i+2] - v2[j+2];
sub[4] = v1[i+3] - v2[j+3];
sub[5] = v1[i+4] - v2[j+4];
sub[6] = v1[i+5] - v2[j+5];
dist += sub[1]*sub[1] + sub[2]*sub[2] + sub[3]*sub[3] + sub[4]*sub[4] + sub[5]*sub[5] + sub[6]*sub[6];
}
return sqrt(dist);
}
/*-----------------------------------------------------------------------------
* Insere as dist. euclidianas em vetor temporario para cada thread */
extern inline void insere_dist(float dist, int lin, int ind_th){
for(int i=0; i<k; ++i)
if(min_dists[ind_th][i].dist > dist){
min_dists[ind_th][i].dist = dist;
min_dists[ind_th][i].ind = lin;
return;
}
}
/* ----------------------------------------------------------------------------
* Compara qual o vizinho mais frequente do vetor temporario e atribui esse
* label para o dado atual */
extern inline void cmp_dists(int lin, int ind_th){
int max=0, count=1;
int tmp=-1, lab=-1;
for( int i=0; i<k; ++i ){
tmp = label[1][min_dists[ind_th][i].ind];
for( int j=i+1; j<k; ++j )
if( tmp == label[1][min_dists[ind_th][j].ind] )
++count;
if( count > max ){
max = count;
lab = tmp;
}
count=1;
}
label_res[lin] = lab;
}
/*-----------------------------------------------------------------------------
* Cria a matriz de confusao */
int **conf_matrix(int N){
int **m;
int labi, labr;
m = (int**)malloc(sizeof(int*)*N);
for(int i=0; i<N; ++i){
m[i] = (int*)malloc(sizeof(int)*N);
for(int j=0; j<N; ++j)
m[i][j]=0;
}
for(int i=0; i<H1; ++i){
labi = label[0][i];
labr = label_res[i];
if( labi != labr ){
++erros;
//printf("Err em %d labi=%d, labr=%d\n", i, labi, labr);
++m[labi][labr];
}else
++m[labi][labi];
}
return m;
}
/* ##################################################### */
int main(int argc, char *argv[]){
if(argc < 5){puts("ERRO ENTRADA");exit(1);}
FILE *f1 = fopen(argv[1],"r");
FILE *f2 = fopen(argv[2],"r");
k = atoi(argv[3]); //num de vizinhos para comparação
N = 10; //num. de classes
N_THREADS = atoi(argv[4]);
if( fscanf(f1,"%d",&H1) == EOF ) {puts("ERRO DE LEITURA 1"); exit(1);}
if( fscanf(f1,"%d",&W) == EOF ) {puts("ERRO DE LEITURA 2"); exit(1);}
if( fscanf(f2,"%d",&H2) == EOF ) {puts("ERRO DE LEITURA 3"); exit(1);}
if( fscanf(f2,"%d",&W) == EOF ) {puts("ERRO DE LEITURA 4"); exit(1);}
label = (int**) malloc(sizeof(int*)*2);
label[0] = (int*) malloc(sizeof(int)*H1);
label[1] = (int*) malloc(sizeof(int)*H2);
label_res = (int*) malloc(sizeof(int)*H1);
DOM = H1/N_THREADS;
min_dists = (eucl_vet**) malloc(sizeof(eucl_vet*)*N_THREADS);
for(int i=0; i<N_THREADS; ++i)
min_dists[i] = (eucl_vet*) malloc(sizeof(eucl_vet)*k);
for(int i=0; i<N_THREADS; ++i)
for(int j=0; j<k; ++j)
min_dists[i][j].dist = FLT_MAX;
printf("\n***** Dominio Per Thread = %d \n",DOM);
test_base = alloc_data(f1,H1,0);
puts("***** Teste Alocado");
train_base = alloc_data(f2,H2,1);
puts("***** Treino Alocado ");
printf("***** H1 = %d, H2= %d\n", H1, H2);
fclose(f1);
fclose(f2);
// THREADS
pthread_t *threads = (pthread_t*) malloc(sizeof(pthread_t)*N_THREADS);
int k[N_THREADS];
for(int i=0; i<N_THREADS; ++i){
k[i]=i;
pthread_create(&(threads[i]), NULL, calc_dist, &k[i]);
}
sleep(1);
puts("***** PROCESSANDO...");
for(int i=0; i<N_THREADS; ++i)
pthread_join(threads[i], NULL);
erros=0;
puts("");
puts("***** TERMINOU PROCESSAMENTO!");
puts("");
puts("***** Matriz de Confusão:");
int** matr_conf = conf_matrix(N);
for(int i=0;i<N;++i){
for(int j=0;j<N;++j)
printf(" %5d|",matr_conf[i][j]);
printf("\n");
}
printf("\n##### ");
printf("Taxa de erro: %f\nFIM DE EXECUCAO\n",(float)((float)erros/(float)H1));
/*############################################################################*/
free(test_base);
free(train_base);
for(int i=0; i<N_THREADS; ++i) free(min_dists[i]);
free(min_dists);
free(label[0]);
free(label[1]);
free(label_res);
for(int i=0; i<N ;++i) free(matr_conf[i]);
free(matr_conf);
free(threads);
return 0;
}
File added
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <float.h>
#include <pthread.h>
#include <unistd.h>
// CONSERTAR A PORRA DA MATRIZ DE CONFUSAO !! ERRO NO LABEL_RES TAMBEM
typedef struct eucl{
int ind;
float dist;
} eucl_vet;
int lab_flag;
int N_THREADS,N,DOM;
int H1,H2,W,k;
long unsigned int erros;
int **label, *label_res;
eucl_vet **min_dists;
float *test_base, *train_base;
extern inline float *alloc_data(FILE *f, int h, int lab_flag);
extern inline void* calc_dist(void *in);
extern inline void insere_dist(float dist, int ind, int j);
extern inline float eucl_dist(float *v1, int i1, float *v2, int i2);
extern inline void cmp_dists(int lin, int ind_th);
extern inline void *calc_dist(void *in);
int **conf_matrix(int N);
/* ########################################################## */
/*-----------------------------------------------------------------------------
* Aloca as matrizes de teste e treino já guardando seus labels */
extern inline float *alloc_data(FILE *f, int h, int lab_flag){
float *data = (float*) malloc(sizeof(float)*h*W);
for(int i=0; i<h; ++i){
for(int j=0; j<W; ++j)
fscanf(f, "%f", &data[i*W+j]);
fscanf(f,"%d",&label[lab_flag][i]);
}
return data;
}
/*-----------------------------------------------------------------------------
* Função principal da thread. Invoca e controla o processo. Cada thread tem um
* domínio para calcular e salvar as distâncias*/
extern inline void *calc_dist(void *in){
float dist;
int ind_th = *((int*)in);
int max = ind_th*DOM + DOM;
if(ind_th == (N_THREADS-1) ) max = H1;
printf("***** THREAD %d domini = [%d,%d]\n",ind_th, ind_th*DOM,(max-1));
for(int i=(ind_th*DOM); i<max; ++i){
for(int j=0; j<H2; ++j){
dist = eucl_dist(test_base, i*W, train_base, j*W);
insere_dist(dist, j, ind_th);
}
cmp_dists(i,ind_th);
// zera o buffer das distancias
for(int l=0;l<k;++l){
min_dists[ind_th][l].ind = -1;
min_dists[ind_th][l].dist = INT_MAX;
}
}
return NULL;
}
/*-----------------------------------------------------------------------------
* Cálculo das dist. euclidianas*/
extern inline float eucl_dist(float *v1, int i1, float *v2, int i2){
float dist=0.0;
float sub[6];
int i,j;
for(i=i1,j=i2; i<(i1+W); i+=6, j+=6){
sub[1] = v1[i] - v2[j];
sub[2] = v1[i+1] - v2[j+1];
sub[3] = v1[i+2] - v2[j+2];
sub[4] = v1[i+3] - v2[j+3];
sub[5] = v1[i+4] - v2[j+4];
sub[6] = v1[i+5] - v2[j+5];
dist += sub[1]*sub[1] + sub[2]*sub[2] + sub[3]*sub[3] + sub[4]*sub[4] + sub[5]*sub[5] + sub[6]*sub[6];
}
return sqrt(dist);
}
/*-----------------------------------------------------------------------------
* Insere as dist. euclidianas em vetor temporario para cada thread */
extern inline void insere_dist(float dist, int lin, int ind_th){
int maior=0;
for(int i=1; i<k; ++i)
if(min_dists[ind_th][i].dist > min_dists[ind_th][maior].dist)
maior = i;
if(min_dists[ind_th][maior].dist > dist){
min_dists[ind_th][maior].dist = dist;
min_dists[ind_th][maior].ind = lin;
return;
}
}
/* ----------------------------------------------------------------------------
* Compara qual o vizinho mais frequente do vetor temporario e atribui esse
* label para o dado atual */
extern inline void cmp_dists(int lin, int ind_th){
int max=0, count=1;
int tmp=-1, lab=-1;
for( int i=0; i<k; ++i ){
tmp = label[1][min_dists[ind_th][i].ind];
for( int j=i+1; j<k; ++j )
if( tmp == label[1][min_dists[ind_th][j].ind] )
++count;
if( count > max ){
max = count;
lab = tmp;
}
count=1;
}
label_res[lin] = lab;
}
/*-----------------------------------------------------------------------------
* Cria a matriz de confusao */
int **conf_matrix(int N){
int **m;
int labi, labr;
m = (int**)malloc(sizeof(int*)*N);
for(int i=0; i<N; ++i){
m[i] = (int*)malloc(sizeof(int)*N);
for(int j=0; j<N; ++j)
m[i][j]=0;
}
for(int i=0; i<H1; ++i){
labi = label[0][i];
labr = label_res[i];
if( labi != labr ){
++erros;
//printf("Err em %d labi=%d, labr=%d\n", i, labi, labr);
++m[labi][labr];
}else
++m[labi][labi];
}
return m;
}
/* ##################################################### */
int main(int argc, char *argv[]){
if(argc < 5){puts("ERRO ENTRADA");exit(1);}
FILE *f1 = fopen(argv[1],"r");
FILE *f2 = fopen(argv[2],"r");
k = atoi(argv[3]); //num de vizinhos para comparação
N = 10; //num. de classes
N_THREADS = atoi(argv[4]);
if( fscanf(f1,"%d",&H1) == EOF ) {puts("ERRO DE LEITURA 1"); exit(1);}
if( fscanf(f1,"%d",&W) == EOF ) {puts("ERRO DE LEITURA 2"); exit(1);}
if( fscanf(f2,"%d",&H2) == EOF ) {puts("ERRO DE LEITURA 3"); exit(1);}
if( fscanf(f2,"%d",&W) == EOF ) {puts("ERRO DE LEITURA 4"); exit(1);}
label = (int**) malloc(sizeof(int*)*2);
label[0] = (int*) malloc(sizeof(int)*H1);
label[1] = (int*) malloc(sizeof(int)*H2);
label_res = (int*) malloc(sizeof(int)*H1);
DOM = H1/N_THREADS;
min_dists = (eucl_vet**) malloc(sizeof(eucl_vet*)*N_THREADS);
for(int i=0; i<N_THREADS; ++i)
min_dists[i] = (eucl_vet*) malloc(sizeof(eucl_vet)*k);
for(int i=0; i<N_THREADS; ++i)
for(int j=0; j<k; ++j)
min_dists[i][j].dist = FLT_MAX;
printf("\n***** Dominio Per Thread = %d \n",DOM);
test_base = alloc_data(f1,H1,0);
puts("***** Teste Alocado");
train_base = alloc_data(f2,H2,1);
puts("***** Treino Alocado ");
printf("***** H1 = %d, H2= %d\n", H1, H2);
fclose(f1);
fclose(f2);
// THREADS
pthread_t *threads = (pthread_t*) malloc(sizeof(pthread_t)*N_THREADS);
int k[N_THREADS];
for(int i=0; i<N_THREADS; ++i){
k[i]=i;
pthread_create(&(threads[i]), NULL, calc_dist, &k[i]);
}
sleep(1);
puts("***** PROCESSANDO...");
for(int i=0; i<N_THREADS; ++i)
pthread_join(threads[i], NULL);
erros=0;
puts("");
puts("***** TERMINOU PROCESSAMENTO!");
puts("");
puts("***** Matriz de Confusão:");
int** matr_conf = conf_matrix(N);
for(int i=0;i<N;++i){
for(int j=0;j<N;++j)
printf(" %5d|",matr_conf[i][j]);
printf("\n");
}
printf("\n##### ");
printf("Taxa de erro comparada com a base de teste: %f\
\nFIM DE EXECUCAO\n",(float)((float)erros/(float)H1));
/*############################################################################*/
free(test_base);
free(train_base);
for(int i=0; i<N_THREADS; ++i) free(min_dists[i]);
free(min_dists);
free(label[0]);
free(label[1]);
free(label_res);
for(int i=0; i<N ;++i) free(matr_conf[i]);
free(matr_conf);
free(threads);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment