Commit 527c8a74 authored by Armando Luiz Nicolini Delgado's avatar Armando Luiz Nicolini Delgado 🤓

Arquivos iniciais

parent 27ee2ddc
PROG = matmult
MODULOS = matriz
CC = gcc -std=c11 -g
OBJS = $(addsuffix .o,$(MODULOS))
LIKWID = /home/soft/likwid
LIKWID_FLAGS = -I$(LIKWID)/include
LIKWID_LIBS = -L$(LIKWID)/lib
AVX_FLAGS = -march=native -mavx -O3 -falign-functions=32 -falign-loops=32 -fstrict-aliasing
AVX_LOG_FLAGS = -fopt-info-vec -fopt-info-vec-missed
CFLAGS = $(LIKWID_FLAGS)
LFLAGS = $(LIKWID_LIBS) -lm
.PHONY: all clean limpa purge faxina distclean debug avx likwid
%.o: %.c %.h
$(CC) $(CFLAGS) -c $<
all: $(PROG)
debug: CFLAGS += -DDEBUG
avx: CFLAGS += $(AVX_FLAGS) $(AVX_LOG_FLAGS)
avx likwid: CFLAGS += -DLIKWID_PERFMON
avx likwid: LFLAGS += -llikwid
likwid avx debug: $(PROG)
$(PROG): $(PROG).o
$(PROG): $(OBJS)
$(CC) $(CFLAGS) -o $@ $^ $(LFLAGS)
clean:
@echo "Limpando ...."
@rm -f *~ *.bak *.tmp
purge distclean: clean
@echo "Faxina ...."
@rm -f $(PROG) *.o core a.out
@rm -f *.png marker.out *.log
O enunciado do exercício está <A HREF="https://moodle.c3sl.ufpr.br/mod/assign/view.php?id=13441">aqui</a>
O enunciado do exercício está <A HREF="https://moodle.c3sl.ufpr.br/mod/assign/view.php?id=13441">aqui</a><BR>
<BR>
O diretório <I>gnuplot</I> contém exemplo de utilização desta ferramenta. Os arquivos <B>.dat</B> contem tabelas com valores de abcissas e ordenadas, e os arquivos <B>.gplt</B> contém exemplos de uso.
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "utils.h"
#include "SistemasLineares.h"
//-------------------------------------------------------------------
//
// Não alterar esta parte do código.
//
/// Alocaçao de memória
SistLinear_t* alocaSistLinear (unsigned int n)
{
SistLinear_t *SL = (SistLinear_t *) malloc(sizeof(SistLinear_t));
if ( SL )
{
SL->A = (real_t *) malloc(n * n * sizeof(real_t));
SL->b = (real_t *) malloc(n * sizeof(real_t));
SL->x = (real_t *) malloc(n * sizeof(real_t));
if (!(SL->A) || !(SL->b) || !(SL->x))
liberaSistLinear(SL);
}
return (SL);
}
// Liberacao de memória
void liberaSistLinear (SistLinear_t *SL)
{
free(SL->A);
free(SL->b);
free(SL->x);
free(SL);
}
//-------------------------------------------------------------------
// Inicializa SL
void inicializaSistLinear (SistLinear_t *SL)
{
static double invRandMax = 1.0 / (double)RAND_MAX;
// inicializa a matriz A
for (unsigned int i=0; i<SL->n; ++i)
for (unsigned int j=0; j<SL->n; ++j)
{
SL->A[i*SL->n+j] = ( (i==j) ? (double)(DIAG<<1) : 1.0 ) * (double)rand() * invRandMax;
}
// inicializa vetores b e x
for (unsigned int i=0; i<SL->n; ++i)
{
SL->b[i] = (double)rand() * invRandMax;
SL->x[i] = 0.0;
}
}
// Calcula a normaL2 do resíduo
double normaL2Residuo(SistLinear_t *SL)
{
}
// Método de Gauss-Seidel
int gaussSeidel (SistLinear_t *SL, double eps, double *normaL2, unsigned int *iter)
{
return (0);
}
#ifndef __SISLINEAR_H__
#define __SISLINEAR_H__
// Parâmetros para teste de convergência
#define MAXIT 5
#define EPS 1.0e-4
#define DIAG 32
typedef double real_t;
typedef struct {
real_t *A; // coeficientes
real_t *b; // termos independentes
real_t *x; // solucao
unsigned int n; // tamanho do SL
} SistLinear_t;
// Alocaçao e desalocação de memória
SistLinear_t* alocaSistLinear (unsigned int n);
void liberaSistLinear (SistLinear_t *SL);
void inicializaSistLinear (SistLinear_t *SL);
// Calcula a normaL2 do resíduo
double normaL2Residuo(SistLinear_t *SL);
// Método da Eliminação de Gauss-Jordan
int eliminacaoGauss (SistLinear_t *SL, double *normaL2);
// Método de Gauss-Jacobi
int gaussJacobi (SistLinear_t *SL, double eps, double *normaL2, unsigned int *iter);
// Método de Gauss-Seidel
int gaussSeidel (SistLinear_t *SL, double eps, double *normaL2, unsigned int *iter);
#endif // __SISLINEAR_H__
@@@ DINF LAB12 - h18 ----------------------------------------
CPU name: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz
CPU type: Intel Kabylake processor
CPU clock: 3.41 GHz
CPU stepping: 9
-------------------------------------------------------------
Group name Description
--------------------------------------------------------------------------------
BRANCH Branch prediction miss rate/ratio
CLOCK Power and Energy consumption
CYCLE_ACTIVITY Cycle Activities
DATA Load to store ratio
ENERGY Power and Energy consumption
FALSE_SHARE False sharing
FLOPS_AVX Packed AVX MFLOP/s
FLOPS_DP Double Precision MFLOP/s
FLOPS_SP Single Precision MFLOP/s
ICACHE Instruction cache miss rate/ratio
L2CACHE L2 cache miss rate/ratio
L2 L2 cache bandwidth in MBytes/s
L3CACHE L3 cache miss rate/ratio
L3 L3 cache bandwidth in MBytes/s
PORT_USAGE Execution port utilization
RECOVERY Recovery duration
TLB_DATA L2 data TLB miss rate/ratio
TLB_INSTR L1 Instruction TLB miss rate/ratio
UOPS_EXEC UOPs execution
UOPS_ISSUE UOPs issueing
UOPS_RETIRE UOPs retirement
UOPS UOPs execution info
********************************************************************************
Graphical Topology
********************************************************************************
Socket 0:
+---------------------------------------------+
| +--------+ +--------+ +--------+ +--------+ |
| | 0 | | 1 | | 2 | | 3 | |
| +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ |
| | 32 kB | | 32 kB | | 32 kB | | 32 kB | |
| +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ |
| | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
| +--------+ +--------+ +--------+ +--------+ |
| +-----------------------------------------+ |
| | 6 MB | |
| +-----------------------------------------+ |
+---------------------------------------------+
********************************************************************************
Cache Topology
********************************************************************************
Level: 1
Size: 32 kB
Type: Data cache
Associativity: 8
Number of sets: 64
Cache line size: 64
Cache type: Non Inclusive
Shared by threads: 1
Cache groups: ( 0 ) ( 1 ) ( 2 ) ( 3 )
--------------------------------------------------------------------------------
Level: 2
Size: 256 kB
Type: Unified cache
Associativity: 4
Number of sets: 1024
Cache line size: 64
Cache type: Non Inclusive
Shared by threads: 1
Cache groups: ( 0 ) ( 1 ) ( 2 ) ( 3 )
--------------------------------------------------------------------------------
Level: 3
Size: 6 MB
Type: Unified cache
Associativity: 12
Number of sets: 8192
Cache line size: 64
Cache type: Inclusive
Shared by threads: 4
Cache groups: ( 0 1 2 3 )
--------------------------------------------------------------------------------
********************************************************************************
NUMA Topology
********************************************************************************
NUMA domains: 1
--------------------------------------------------------------------------------
Domain: 0
Processors: ( 0 1 2 3 )
Distances: 10
Free memory: 6460.39 MB
Total memory: 7871.02 MB
--------------------------------------------------------------------------------
********************************************************************************
Hardware Thread Topology
********************************************************************************
Sockets: 1
Cores per socket: 4
Threads per core: 1
--------------------------------------------------------------------------------
HWThread Thread Core Socket Available
0 0 0 0 *
1 0 1 0 *
2 0 2 0 *
3 0 3 0 *
--------------------------------------------------------------------------------
Socket 0: ( 0 1 2 3 )
--------------------------------------------------------------------------------
@@@ DINF LAB3 - i18 ------------------------------------------------------
CPU name: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz
CPU type: Intel Kabylake processor
CPU clock: 3.41 GHz
CPU stepping: 9
---------------------------------------------------------------------------
Group name Description
---------------------------------------------------------------------------
BRANCH Branch prediction miss rate/ratio
CLOCK Power and Energy consumption
CYCLE_ACTIVITY Cycle Activities
DATA Load to store ratio
ENERGY Power and Energy consumption
FALSE_SHARE False sharing
FLOPS_AVX Packed AVX MFLOP/s
FLOPS_DP Double Precision MFLOP/s
FLOPS_SP Single Precision MFLOP/s
ICACHE Instruction cache miss rate/ratio
L2CACHE L2 cache miss rate/ratio
L2 L2 cache bandwidth in MBytes/s
L3CACHE L3 cache miss rate/ratio
L3 L3 cache bandwidth in MBytes/s
PORT_USAGE Execution port utilization
RECOVERY Recovery duration
TLB_DATA L2 data TLB miss rate/ratio
TLB_INSTR L1 Instruction TLB miss rate/ratio
UOPS_EXEC UOPs execution
UOPS_ISSUE UOPs issueing
UOPS_RETIRE UOPs retirement
UOPS UOPs execution info
--------------------------------------------------------------------------------
********************************************************************************
Graphical Topology
********************************************************************************
Socket 0:
+---------------------------------------------+
| +--------+ +--------+ +--------+ +--------+ |
| | 0 | | 1 | | 2 | | 3 | |
| +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ |
| | 32 kB | | 32 kB | | 32 kB | | 32 kB | |
| +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ |
| | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
| +--------+ +--------+ +--------+ +--------+ |
| +-----------------------------------------+ |
| | 6 MB | |
| +-----------------------------------------+ |
+---------------------------------------------+
********************************************************************************
Cache Topology
********************************************************************************
Level: 1
Size: 32 kB
Type: Data cache
Associativity: 8
Number of sets: 64
Cache line size: 64
Cache type: Non Inclusive
Shared by threads: 1
Cache groups: ( 0 ) ( 1 ) ( 2 ) ( 3 )
--------------------------------------------------------------------------------
Level: 2
Size: 256 kB
Type: Unified cache
Associativity: 4
Number of sets: 1024
Cache line size: 64
Cache type: Non Inclusive
Shared by threads: 1
Cache groups: ( 0 ) ( 1 ) ( 2 ) ( 3 )
--------------------------------------------------------------------------------
Level: 3
Size: 6 MB
Type: Unified cache
Associativity: 12
Number of sets: 8192
Cache line size: 64
Cache type: Inclusive
Shared by threads: 4
Cache groups: ( 0 1 2 3 )
--------------------------------------------------------------------------------
********************************************************************************
NUMA Topology
********************************************************************************
NUMA domains: 1
--------------------------------------------------------------------------------
Domain: 0
Processors: ( 0 1 2 3 )
Distances: 10
Free memory: 6201.08 MB
Total memory: 7871.02 MB
--------------------------------------------------------------------------------
********************************************************************************
Hardware Thread Topology
********************************************************************************
Sockets: 1
Cores per socket: 4
Threads per core: 1
--------------------------------------------------------------------------------
HWThread Thread Core Socket Available
0 0 0 0 *
1 0 1 0 *
2 0 2 0 *
3 0 3 0 *
--------------------------------------------------------------------------------
Socket 0: ( 0 1 2 3 )
--------------------------------------------------------------------------------
@@@ DINF LAB3 - j22 --------------------------------------------------------
CPU name: Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz
CPU type: Intel Core Haswell processor
CPU clock: 3.39 GHz
CPU stepping: 3
----------------------------------------------------------------------------
Group name Description
--------------------------------------------------------------------------------
BRANCH Branch prediction miss rate/ratio
CACHES Cache bandwidth in MBytes/s
CLOCK Power and Energy consumption
CYCLE_ACTIVITY Cycle Activities
DATA Load to store ratio
ENERGY Power and Energy consumption
FALSE_SHARE False sharing
FLOPS_AVX Packed AVX MFLOP/s
ICACHE Instruction cache miss rate/ratio
L2CACHE L2 cache miss rate/ratio
L2 L2 cache bandwidth in MBytes/s
L3CACHE L3 cache miss rate/ratio
L3 L3 cache bandwidth in MBytes/s
RECOVERY Recovery duration
TLB_DATA L2 data TLB miss rate/ratio
TLB_INSTR L1 Instruction TLB miss rate/ratio
UOPS_EXEC UOPs execution
UOPS_ISSUE UOPs issueing
UOPS_RETIRE UOPs retirement
UOPS UOPs execution info
********************************************************************************
Graphical Topology
********************************************************************************
Socket 0:
+---------------------------------------------+
| +--------+ +--------+ +--------+ +--------+ |
| | 0 4 | | 1 5 | | 2 6 | | 3 7 | |
| +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ |
| | 32 kB | | 32 kB | | 32 kB | | 32 kB | |
| +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ |
| | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
| +--------+ +--------+ +--------+ +--------+ |
| +-----------------------------------------+ |
| | 8 MB | |
| +-----------------------------------------+ |
+---------------------------------------------+
********************************************************************************
Cache Topology
********************************************************************************
Level: 1
Size: 32 kB
Type: Data cache
Associativity: 8
Number of sets: 64
Cache line size: 64
Cache type: Non Inclusive
Shared by threads: 2
Cache groups: ( 0 4 ) ( 1 5 ) ( 2 6 ) ( 3 7 )
--------------------------------------------------------------------------------
Level: 2
Size: 256 kB
Type: Unified cache
Associativity: 8
Number of sets: 512
Cache line size: 64
Cache type: Non Inclusive
Shared by threads: 2
Cache groups: ( 0 4 ) ( 1 5 ) ( 2 6 ) ( 3 7 )
--------------------------------------------------------------------------------
Level: 3
Size: 8 MB
Type: Unified cache
Associativity: 16
Number of sets: 8192
Cache line size: 64
Cache type: Inclusive
Shared by threads: 8
Cache groups: ( 0 4 1 5 2 6 3 7 )
--------------------------------------------------------------------------------
********************************************************************************
NUMA Topology
********************************************************************************
NUMA domains: 1
--------------------------------------------------------------------------------
Domain: 0
Processors: ( 0 4 1 5 2 6 3 7 )
Distances: 10
Free memory: 6520.99 MB
Total memory: 7875.85 MB
--------------------------------------------------------------------------------
********************************************************************************
Hardware Thread Topology
********************************************************************************
Sockets: 1
Cores per socket: 4
Threads per core: 2
--------------------------------------------------------------------------------
HWThread Thread Core Socket Available
0 0 0 0 *
1 0 1 0 *
2 0 2 0 *
3 0 3 0 *
4 1 0 0 *
5 1 1 0 *
6 1 2 0 *
7 1 3 0 *
--------------------------------------------------------------------------------
Socket 0: ( 0 4 1 5 2 6 3 7 )
--------------------------------------------------------------------------------
#!/usr/bin/gnuplot -c
# Execute o comando:
# ./plot.gp < arquivo_dados
#
# Arquivo de dados recebido pela STDIN deve conter 2 colunas:
# eixo x = coluna 1 = tamanho em Bytes
# eixo y = coluna 2 em diante = indicadores
#
# Gerando gráfico a partir de tabela
set ylabel "FLOPS_DP (MFlops/s)"
set xlabel "N (bytes)"
set style data point
set style function line
set style line 1 lc 3 pt 7 ps 0.3
set terminal qt title "N x FLOPS_DP"
plot '< cat -' using 1:2 ls 1 notitle
pause mouse
# Gerando figura PNG
set terminal png
# set output "NxFLOPS_DP.png"
## plot '< cat -' using 1:2 ls 1 notitle
## replot
# unset output
#!/usr/bin/gnuplot -c
set grid
set style data point
set style function line
set style line 1 lc 3 pt 7 ps 0.3
set boxwidth 1
set xtics
set xrange ["0":]
set xlabel "N (Mbytes)"
set ylabel "<metrica 1>"
set title "<campo[<metrica]>"
set terminal qt 0 title "<campo[<metrica]>"
plot 'plot_exemplo-01.dat' title "<marker 1>" with linespoints, \
'plot_exemplo-02.dat' title "<marker 2>" with linespoints
set ylabel "<metrica 2>"
set title "<campo[<metrica]>"
set terminal qt 1 title "<campo[<metrica]>"
plot 'plot_exemplo-03.dat' title "<marker 3>" with linespoints, \
'plot_exemplo-04.dat' title "<marker 4>" with linespoints
pause mouse
0.5 0.5
1.000161322585 48.433210629261
2.000241249986 21.798359943835
3.0003206090227 21.337482595053
4.0004001520114 14.873424079086
5.0004813269837 7.8612681493985
0.5 0.5
2.000161322585 50.433210629261
3.000241249986 25.798359943835
4.0003206090227 27.337482595053
5.0004001520114 18.873424079086
6.0004813269837 10.8612681493985
0.7 0.7
1.000161322585 48.433210629261
2.000241249986 21.798359943835
3.0003206090227 21.337482595053
4.0004001520114 14.873424079086
5.0004813269837 7.8612681493985
0.7 0.7
2.000161322585 50.433210629261
3.000241249986 25.798359943835
4.0003206090227 27.337482595053
5.0004001520114 18.873424079086
6.0004813269837 10.8612681493985
#include <stdio.h>
#include <stdlib.h> /* exit, malloc, calloc, etc. */
#include <string.h>
#include <getopt.h> /* getopt */
#include <likwid.h>
#include "matriz.h"
/**
* Exibe mensagem de erro indicando forma de uso do programa e termina
* o programa.
*/
static void usage(char *progname)
{
fprintf(stderr, "Forma de uso: %s [ -n <ordem> ] \n", progname);
exit(1);
}
/**
* Programa principal
* Forma de uso: matmult [ -n <ordem> ]
* -n <ordem>: ordem da matriz quadrada e dos vetores
*
*/
int main (int argc, char *argv[])
{
int c, n=DEF_SIZE;
double norma;
MatPtr mPtr;
MatRow mRow;
MatCol mCol;
Vetor vet, resPtr, resRow, resCol;
/* =============== TRATAMENTO DE LINHA DE COMAANDO =============== */
char *opts = "n:";
c = getopt (argc, argv, opts);
while ( c != -1 ) {
switch (c) {
case 'n': n = atoi(optarg); break;
default: usage(argv[0]);
}
c = getopt (argc, argv, opts);
}
/* ================ FIM DO TRATAMENTO DE LINHA DE COMANDO ========= */
resPtr = (double *) malloc (n * sizeof(double));
resRow = (double *) malloc (n * sizeof(double));
resCol = (double *) malloc (n * sizeof(double));
srand(20191);
mPtr = geraMatPtr (n, n);
mRow = geraMatRow (n, n);
mCol = geraMatCol (n, n);
vet = geraVetor (n);
#ifdef DEBUG
prnMatPtr (mPtr, n, n);
prnMatRow (mRow, n, n);
prnMatCol (mCol, n, n);
prnVetor (vet, n);
printf ("=================================\n\n");
#endif /* DEBUG */
multMatPtrVet (mPtr, vet, n, n, resPtr);
multMatRowVet (mRow, vet, n, n, resRow);
multMatColVet (mCol, vet, n, n, resCol);
norma = normaMax(resRow, resPtr, n);
norma = normaEucl(resCol, n);
#ifdef DEBUG
prnVetor (resPtr, n);
prnVetor (resRow, n);
prnVetor (resCol, n);
#endif /* DEBUG */
liberaMatPtr (mPtr, n);
liberaVetor ((void*)mRow);
liberaVetor ((void*)mCol);
liberaVetor ((void*)vet);
free(resCol);
free(resRow);
free(resPtr);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "matriz.h"
/**
* Função que gera os coeficientes de um sistema linear k-diagonal
* @param i,j coordenadas do elemento a ser calculado (0<=i,j<n)
* @param k numero de diagonais da matriz A
*/
static inline double generateRandomA( unsigned int i, unsigned int j, unsigned int k )
{
static double invRandMax = 1.0 / (double)RAND_MAX;
return ( (i==j) ? (double)(k<<1) : 1.0 ) * (double)rand() * invRandMax;
}
/**
* Função que gera os termos independentes de um sistema linear k-diagonal
* @param k numero de diagonais da matriz A
*
*/
static inline double generateRandomB( unsigned int k )
{
static double invRandMax = 1.0 / (double)RAND_MAX;
return (double)(k<<2) * (double)rand() * invRandMax;
}
/* ----------- FUNÇÕES ---------------- */
/**
* Funcao geraMatPtr: gera matriz como vetor de ponteiros para as suas linhas
*
* @param m número de linhas da matriz
* @param n número de colunas da matriz
* @return ponteiro para a matriz gerada
*
*/
MatPtr geraMatPtr (int m, int n)
{
MatPtr matriz = (double **) malloc(m*sizeof(double));
if (matriz) {
for (int i=0; i < m; ++i) {
if (matriz[i] = (double *) malloc(n*sizeof(double)))
for (int j=0; matriz[i] && j < n; ++j)
matriz[i][j] = generateRandomA(i, j, DIAG);
else
return NULL;
}
}
return (matriz);
}
/**
* \brief: libera matriz alocada como vetor de ponteiros para as suas linhas
*
* @param ponteiro para matriz
*
*/