Skip to content
Snippets Groups Projects
Commit dc47c0af authored by Matei BARBU's avatar Matei BARBU
Browse files

cuda: practice: first easy exercises

parent 1c9696c7
No related branches found
No related tags found
1 merge request!8Lab7 support
mergesort
EXEC := $(notdir $(CURDIR))
include ../../../Makefile
#include <stdio.h>
#define NUM_ELEMENTS 16
__device__ void merge(int* arr, int* temp, int left, int middle, int right) {
int i = left;
int j = middle;
int k = left;
while (i < middle && j < right) {
// TODO 1: Get the smaller element
}
while (i < middle) {
// TODO 2: Copy any remaining elements from the left subarray
}
while (j < right) {
// TODO 3: Copy any remaining elements from the right subarray
}
for (int x = left; x < right; x++) {
// TODO 4: Copy the sorted elements
}
}
__global__ void mergeSortGPU(int* arr, int* temp, int n, int width) {
int tid = threadIdx.x + blockDim.x * blockIdx.x;
int left = tid * width;
int middle = left + width / 2;
int right = left + width;
if (left < n && middle < n) {
merge(arr, temp, left, middle, right);
}
}
void generateData(int* data, int size) {
srand(time(0));
for (int i = 0; i < size; i++) {
data[i] = rand() % 14 + 1;
}
}
int main() {
int* array = NULL;
array = (int*)malloc(NUM_ELEMENTS * sizeof(int));
generateData(array, NUM_ELEMENTS);
printf("Original Array: ");
for (int i = 0; i < NUM_ELEMENTS; i++) {
printf("%d ", array[i]);
}
printf("\n");
int *d_array, *d_tmp;
cudaMalloc(&d_array, NUM_ELEMENTS * sizeof(int));
cudaMemcpy(d_array, array, NUM_ELEMENTS * sizeof(int),
cudaMemcpyHostToDevice);
cudaMalloc(&d_tmp, NUM_ELEMENTS * sizeof(int));
int block_size = 256;
size_t blocks_no = NUM_ELEMENTS / block_size;
if (NUM_ELEMENTS % block_size) ++blocks_no;
for (int width = 1; width < NUM_ELEMENTS; width *= 2) {
// TODO 0: Call the mergeSortGPU kernel with the appropriate arguments
}
cudaDeviceSynchronize();
cudaMemcpy(array, d_array, NUM_ELEMENTS * sizeof(int),
cudaMemcpyDeviceToHost);
cudaFree(d_array);
printf("Sorted Array: ");
for (int i = 0; i < NUM_ELEMENTS; i++) {
printf("%d ", array[i]);
}
printf("\n");
return 0;
}
oets
EXEC := $(notdir $(CURDIR))
include ../../../Makefile
#include <stdio.h>
#include <stdlib.h>
#define NUM_ELEMENTS 16
__device__ void swap(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
// TODO 2: define parameters
__global__ void oddEvenTranspositionSort() {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = 0; i < n; i++) {
if (i % 2 == 0) { // Even phase
// TODO 2: Compare and swap elements if thread id is even
} else { // Odd phase
// TODO 3: Compare and swap elements if thread id is odd
}
// TODO 4: Sync threads
}
}
void generateData(int *data, int size) {
srand(time(0));
for (int i = 0; i < size; i++) {
data[i] = rand() % 14 + 1;
}
}
int main() {
int *array = NULL;
array = (int *)malloc(NUM_ELEMENTS * sizeof(int));
generateData(array, NUM_ELEMENTS);
printf("Original Array: ");
for (int i = 0; i < NUM_ELEMENTS; i++) {
printf("%d ", array[i]);
}
printf("\n");
int *d_array;
// TODO 0: Allocate device array and copy host elements to it
// TODO 1: Calculate blocks_no and block_size
oddEvenTranspositionSort<<<blocks_no, block_size>>>(d_array, NUM_ELEMENTS);
cudaDeviceSynchronize();
cudaMemcpy(array, d_array, NUM_ELEMENTS * sizeof(int),
cudaMemcpyDeviceToHost);
cudaFree(d_array);
printf("Sorted Array: ");
for (int i = 0; i < NUM_ELEMENTS; i++) {
printf("%d ", array[i]);
}
printf("\n");
free(array);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment