diff --git a/participantes/fmk/2010/smooth/.gitignore b/participantes/fmk/2010/smooth/.gitignore index 279450fa33e577dd532259f8f9bad579c7905abf..6da8506654c7f4f67184faccab66cafd56c1eeac 100644 --- a/participantes/fmk/2010/smooth/.gitignore +++ b/participantes/fmk/2010/smooth/.gitignore @@ -4,3 +4,5 @@ smooth-generator movie.in smooth.out movie.out +batch +cuda diff --git a/participantes/fmk/2010/smooth/Makefile.batch b/participantes/fmk/2010/smooth/Makefile.batch new file mode 100644 index 0000000000000000000000000000000000000000..89dd747462372b39ef98f2f764f18b14a5515f22 --- /dev/null +++ b/participantes/fmk/2010/smooth/Makefile.batch @@ -0,0 +1,18 @@ +FLAGS=-O2 -fopenmp + +CC=gcc + +RM=rm -f + +EXEC=batch + +all: $(EXEC) + +$(EXEC): + $(CC) $(FLAGS) $(EXEC).c -o $(EXEC) + +run: + ./$(EXEC) + +clean: + $(RM) *.o $(EXEC) diff --git a/participantes/fmk/2010/smooth/Makefile.cuda b/participantes/fmk/2010/smooth/Makefile.cuda new file mode 100644 index 0000000000000000000000000000000000000000..b6539f089790b788aa226593f5bd6025882f5257 --- /dev/null +++ b/participantes/fmk/2010/smooth/Makefile.cuda @@ -0,0 +1,18 @@ +FLAGS=-O2 + +CC=nvcc + +RM=rm -f + +EXEC=cuda + +all: $(EXEC) + +$(EXEC): + $(CC) $(FLAGS) $(EXEC).cu -o $(EXEC) + +run: + ./$(EXEC) + +clean: + $(RM) *.o $(EXEC) diff --git a/participantes/fmk/2010/smooth/answer.md5 b/participantes/fmk/2010/smooth/answer.md5.batch similarity index 100% rename from participantes/fmk/2010/smooth/answer.md5 rename to participantes/fmk/2010/smooth/answer.md5.batch diff --git a/participantes/fmk/2010/smooth/answer.md5.cuda b/participantes/fmk/2010/smooth/answer.md5.cuda new file mode 100644 index 0000000000000000000000000000000000000000..34961a41ccf9964d66638afc799b4507ce4d0def --- /dev/null +++ b/participantes/fmk/2010/smooth/answer.md5.cuda @@ -0,0 +1 @@ +2bc7add1ce5e3ca0d48b6f956ae0f361 movie.out diff --git a/participantes/fmk/2010/smooth/batch b/participantes/fmk/2010/smooth/batch new file mode 100755 index 0000000000000000000000000000000000000000..ac6f7191b003f3091c812ad2e06edaaf0a2e360c Binary files /dev/null and b/participantes/fmk/2010/smooth/batch differ diff --git a/participantes/fmk/2010/smooth/batch.c b/participantes/fmk/2010/smooth/batch.c new file mode 100644 index 0000000000000000000000000000000000000000..48712cab09c1195cb23cbe4625854b19beb5f100 --- /dev/null +++ b/participantes/fmk/2010/smooth/batch.c @@ -0,0 +1,72 @@ +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#define image(k,x,y) pixels[((k)*framesize)+((x)*width)+(y)] +#define smooth(k,x,y) filtered[((k)*framesize)+((x)*width)+(y)] +const int BATCH_SIZE = 10; + +int main(int argc, char *argv[]) { + + FILE *in; + FILE *out; + + in = fopen("movie.in", "rb"); + if (in == NULL) { + perror("movie.in"); + exit(EXIT_FAILURE); + } + + out = fopen("movie.out", "wb"); + if (out == NULL) { + perror("movie.out"); + exit(EXIT_FAILURE); + } + + int width, height; + + fread(&width, sizeof(width), 1, in); + fread(&height, sizeof(height), 1, in); + + fwrite(&width, sizeof(width), 1, out); + fwrite(&height, sizeof(height), 1, out); + + int *pixels = (int *) malloc(height * width * sizeof(int) * BATCH_SIZE); + int *filtered = (int *) malloc(height * width * sizeof(int) * BATCH_SIZE); + + int DY[] = { -1, -1, -1, +0, +0, +0, +1, +1, +1 }; + int DX[] = { -1, +0, +1, -1, +0, +1, -1, +0, +1 }; + + int framesize = height*width; + + do { + int read = fread(pixels, framesize*sizeof(int), BATCH_SIZE, in); + if (!read) { break; } + +#pragma omp parallel for default(none) collapse(3) shared(DX, DY, framesize, height, width, pixels, filtered, read) + for (int k = 0; k < read; k++) + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + long long int sum = 0; + for (int d = 0; d < 9; d++) { + int dx = x + DX[d]; + int dy = y + DY[d]; + if (dx >= 0 && dx < width && dy >= 0 && dy < height) { + sum += image(k, dy, dx); + } + } + smooth(k, y, x) = sum / 9; + } + } + + fwrite(filtered, framesize*sizeof(int), read, out); + } while (!feof(in)); + + free(pixels); + free(filtered); + + fclose(out); + fclose(in); + + return EXIT_SUCCESS; +} diff --git a/participantes/fmk/2010/smooth/cuda b/participantes/fmk/2010/smooth/cuda new file mode 100755 index 0000000000000000000000000000000000000000..b7bab67fe0a51e37a000335134d90766c49a5416 Binary files /dev/null and b/participantes/fmk/2010/smooth/cuda differ diff --git a/participantes/fmk/2010/smooth/cuda.cu b/participantes/fmk/2010/smooth/cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..c96be2d5e76859d42ecf6f1b5c08564f1c2f5877 --- /dev/null +++ b/participantes/fmk/2010/smooth/cuda.cu @@ -0,0 +1,114 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <omp.h> +#include <time.h> + +#define image(k,x,y) pixels[((k)*framesize)+((x)*width)+(y)] +#define smooth(k,x,y) filtered[((k)*framesize)+((x)*width)+(y)] +const int BATCH_SIZE = 500; + +__global__ void blur(const int* pixels, int* filtered, const int width, const int height) { + int k = blockIdx.z; + + int i = blockDim.x*blockIdx.x + threadIdx.x; + if (i >= width) { return; } + int j = blockDim.y*blockIdx.y + threadIdx.y; + if (j >= height) { return; } + + long long int sum = 0; + + for (int dj = -1; dj <= +1; dj++) { + if (!(0 <= j+dj && j+dj < height)) { continue; } + for (int di = -1; di <= +1; di++) { + if (!(0 <= i+di && i+di < width)) { continue; } + int ix = (k*width*height) + (j+dj)*width + (i+di); + sum += pixels[ix]; + } + } + + filtered[k*width*height + j*width + i] = sum / 9; +} + + +/// Convert seconds to milliseconds +#define SEC_TO_MS(sec) ((sec)*1000) +/// Convert seconds to microseconds +#define SEC_TO_US(sec) ((sec)*1000000) +/// Convert seconds to nanoseconds +#define SEC_TO_NS(sec) ((sec)*1000000000) + +/// Convert nanoseconds to seconds +#define NS_TO_SEC(ns) ((ns)/1000000000) +/// Convert nanoseconds to milliseconds +#define NS_TO_MS(ns) ((ns)/1000000) +/// Convert nanoseconds to microseconds +#define NS_TO_US(ns) ((ns)/1000) + +double ts() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + uint64_t ms = SEC_TO_MS((uint64_t)ts.tv_sec) + NS_TO_MS((uint64_t)ts.tv_nsec); + return ms/1000.0; +} + +int ceil_div(int x, int y) { + return x/y + bool(x%y); +} + +int main(int argc, char *argv[]) { + + FILE *in; + FILE *out; + + in = fopen("movie.in", "rb"); + if (in == NULL) { + perror("movie.in"); + exit(EXIT_FAILURE); + } + + out = fopen("movie.out", "wb"); + if (out == NULL) { + perror("movie.out"); + exit(EXIT_FAILURE); + } + + int width, height; + + fread(&width, sizeof(width), 1, in); + fread(&height, sizeof(height), 1, in); + + fwrite(&width, sizeof(width), 1, out); + fwrite(&height, sizeof(height), 1, out); + + int *pixels = (int *) malloc(height * width * sizeof(int) * BATCH_SIZE); + int *filtered = (int *) malloc(height * width * sizeof(int) * BATCH_SIZE); + + int *pixelsGPU; + cudaMalloc(&pixelsGPU, height * width * sizeof(int) * BATCH_SIZE); + int *filteredGPU; + cudaMalloc(&filteredGPU, height * width * sizeof(int) * BATCH_SIZE); + + int framesize = height*width; + + int read = fread(pixels, framesize*sizeof(int), BATCH_SIZE, in); + cudaMemcpy(pixelsGPU, pixels, framesize*sizeof(int)*read, cudaMemcpyHostToDevice); + do { + int threads_per_block = 32; + int blocks_grid_x = ceil_div(width, threads_per_block); + int blocks_grid_y = ceil_div(height, threads_per_block); + dim3 dim_grid (blocks_grid_x, blocks_grid_y, read); + dim3 dim_block (threads_per_block, threads_per_block, 1); + blur<<<dim_grid, dim_block>>>(pixelsGPU, filteredGPU, width, height); + int nextread = fread(pixels, framesize*sizeof(int), BATCH_SIZE, in); + cudaMemcpy(filtered, filteredGPU, framesize*sizeof(int)*read, cudaMemcpyDeviceToHost); + cudaMemcpy(pixelsGPU, pixels, framesize*sizeof(int)*nextread, cudaMemcpyHostToDevice); + fwrite(filtered, framesize*sizeof(int), read, out); + read = nextread; + } while (read); + + fclose(out); + fclose(in); + + return EXIT_SUCCESS; +} diff --git a/participantes/fmk/2010/smooth/run-batch.sh b/participantes/fmk/2010/smooth/run-batch.sh new file mode 100644 index 0000000000000000000000000000000000000000..738945f590db96c8415d89ffb7e0348245910c5c --- /dev/null +++ b/participantes/fmk/2010/smooth/run-batch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --partition=maratona +#SBATCH --job-name=smooth.batch +#SBATCH --nodelist=pti +#SBATCH --exclusive +#SBATCH --cpu-freq=high +#SBATCH -o smooth.out.batch +#SBATCH -e time.batch + +TIME_COMMAND="/usr/bin/time " +TIME_PARAMETERS="-f %e" +make -B -f Makefile.batch > /dev/null +/usr/bin/time -f %e ./batch +md5sum movie.out > answer.md5.batch diff --git a/participantes/fmk/2010/smooth/run-cuda.sh b/participantes/fmk/2010/smooth/run-cuda.sh new file mode 100644 index 0000000000000000000000000000000000000000..c427ac0c2a39825729ef1b2ca403103352d04b11 --- /dev/null +++ b/participantes/fmk/2010/smooth/run-cuda.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --partition=maratona +#SBATCH --job-name=smooth.cuda +#SBATCH --exclusive +#SBATCH --cpu-freq=high +#SBATCH --gres=gpu:2 +#SBATCH -o smooth.out.cuda +#SBATCH -e time.cuda + +TIME_COMMAND="/usr/bin/time " +TIME_PARAMETERS="-f %e" +make -B -f Makefile.cuda > /dev/null +/usr/bin/time -f %e ./cuda +md5sum movie.out > answer.md5.cuda diff --git a/participantes/fmk/2010/smooth/smooth.c b/participantes/fmk/2010/smooth/smooth.c deleted file mode 100644 index ed8777e6fffc39ba1070e1220940455a02f910b0..0000000000000000000000000000000000000000 --- a/participantes/fmk/2010/smooth/smooth.c +++ /dev/null @@ -1,66 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> - -#define image(x,y) pixels[x*width+y] -#define smooth(x,y) filtered[x*width+y] - -int main(int argc, char *argv[]) { - - FILE *in; - FILE *out; - - in = fopen("movie.in", "rb"); - if (in == NULL) { - perror("movie.in"); - exit(EXIT_FAILURE); - } - - out = fopen("movie.out", "wb"); - if (out == NULL) { - perror("movie.out"); - exit(EXIT_FAILURE); - } - - int width, height; - - fread(&width, sizeof(width), 1, in); - fread(&height, sizeof(height), 1, in); - - fwrite(&width, sizeof(width), 1, out); - fwrite(&height, sizeof(height), 1, out); - - int *pixels = (int *) malloc(height * width * sizeof(int)); - int *filtered = (int *) malloc(height * width * sizeof(int)); - - int DY[] = { -1, -1, -1, +0, +0, +0, +1, +1, +1 }; - int DX[] = { -1, +0, +1, -1, +0, +1, -1, +0, +1 }; - int x, y, d, dx, dy; - - do { - if (!fread(pixels, height * width * sizeof(int), 1, in)) - break; - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - long long int sum = 0; - for (d = 0; d < 9; d++) { - dx = x + DX[d]; - dy = y + DY[d]; - if (dx >= 0 && dx < width && dy >= 0 && dy < height) - sum += image(dy, dx); - } - smooth(y, x) = sum / 9; - } - } - - fwrite(filtered, height * width * sizeof(int), 1, out); - } while (!feof(in)); - - free(pixels); - free(filtered); - - fclose(out); - fclose(in); - - return EXIT_SUCCESS; -} diff --git a/participantes/fmk/2010/smooth/smooth.out.batch b/participantes/fmk/2010/smooth/smooth.out.batch new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/participantes/fmk/2010/smooth/smooth.out.cuda b/participantes/fmk/2010/smooth/smooth.out.cuda new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/participantes/fmk/2010/smooth/smooth.sh b/participantes/fmk/2010/smooth/smooth.sh deleted file mode 100644 index 9d10abdc4d86a4937a0179c9a489541c550e8614..0000000000000000000000000000000000000000 --- a/participantes/fmk/2010/smooth/smooth.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -#SBATCH --partition=maratona -#SBATCH --job-name=smooth -#SBATCH --nodelist=pti -#SBATCH --exclusive -#SBATCH --cpu-freq=high -#SBATCH -o smoothg.out -#SBATCH -e time - -TIME_COMMAND="/usr/bin/time " -TIME_PARAMETERS="-f %e" -make -B > /dev/null -/usr/bin/time -f %e ./smooth -md5sum movie.out > answer.md5 diff --git a/participantes/fmk/2010/smooth/time b/participantes/fmk/2010/smooth/time deleted file mode 100644 index cc93c778fb4e821a67fca3d9bcc94f7ec45b7984..0000000000000000000000000000000000000000 --- a/participantes/fmk/2010/smooth/time +++ /dev/null @@ -1 +0,0 @@ -662.07 diff --git a/participantes/fmk/2010/smooth/time.batch b/participantes/fmk/2010/smooth/time.batch new file mode 100644 index 0000000000000000000000000000000000000000..b01eaef913859ee2fa77a549a4af27ed2b1df0e7 --- /dev/null +++ b/participantes/fmk/2010/smooth/time.batch @@ -0,0 +1 @@ +342.35 diff --git a/participantes/fmk/2010/smooth/time.cuda b/participantes/fmk/2010/smooth/time.cuda new file mode 100644 index 0000000000000000000000000000000000000000..999cae5c20b4fe089f44b833f5483d8e33be1d8c --- /dev/null +++ b/participantes/fmk/2010/smooth/time.cuda @@ -0,0 +1 @@ +356.72