Professional Documents
Culture Documents
Pdsla 1
Pdsla 1
Pdsla 1
#include<stdio.h>
#define SIZE 4
__global__ void dotProduct(int *a, int *b, int *c)
{
int i = threadIdx.x;
*c += a[i]*b[i];
atomicAdd(c, a[i]*b[i]);
}
int main()
{
int a[SIZE] = {1,2,3,4};
int b[SIZE] = {1,2,3,4};
int c = 0;
#include<stdio.h>
#include<sys/time.h>
#define N 4
#define M 4
#define BDIMX 2
#define BDIMY 2
double cpuSecond() {
struct timeval tp;
gettimeofday(&tp, NULL);
return ((double)tp.tv_sec + (double)tp.tv_usec*1.e-6);
}
int main() {
int *a, *b;
int size = N * M * sizeof(int);
a = (int* )malloc(size);
b = (int* )malloc(size);
for(int i = 0; i < N * M; i++) {
a[i] = i;
}
printf("Initial Array: \n");
for(int i = 0; i < N; i++) {
for(int j = 0; j < M; j++) {
printf("%d ", a[i * M + j]);
}
printf("\n");
}
#include <cuda_runtime.h>
#include<stdio.h>
#include<sys/time.h>
#define RADIUS 4
#define BDIM 8
// constant memory
__constant__ float coef[RADIUS + 1];
/*
// FD coeffecient
#define a0 0.00000f
#define a1 0.80000f
#define a2 -0.20000f
#define a3 0.03809f
#define a4 -0.00357f
*/
#define a0 0
#define a1 1
#define a2 2
#define a3 3
#define a4 4
double cpuSecond(){
struct timeval tp;
gettimeofday(&tp, NULL);
return ((double)tp.tv_sec + (double)tp.tv_usec*1.e-6);
}
printf("\n");
}
bool iprint = 1;
// Copy to device
cudaMemcpy(d_in, h_in, nBytes, cudaMemcpyHostToDevice);
// launch configuration
cudaDeviceProp info;
cudaGetDeviceProperties(&info, 0);
dim3 block(BDIM, 1);
dim3 grid(info.maxGridSize[0] < isize / block.x ? info.maxGridSize[0] :
isize / block.x, 1);
printf("(grid, block) %d,%d \n ", grid.x, block.x);
double istart = cpuSecond();
// Launch stencil_1d() kernel on GPU
stencil_1d<<<1, 8>>>(d_in + RADIUS, d_out + RADIUS, isize);
double ielapsed = cpuSecond() - istart;
// Copy result back to host
cudaMemcpy(gpuRef, d_out, nBytes, cudaMemcpyDeviceToHost);
// reset device
cudaDeviceReset();
return EXIT_SUCCESS;
}
Odd even sort
#include<stdio.h>
#include<stdlib.h>
#define N 100
int main()
{
int *arr;
int *dev_arr;
arr = (int*)malloc(N*sizeof(int));
cudaMalloc((void**)&dev_arr, N*sizeof(int));
int ct = N;
printf("Input array is :\n");
for (int i=0; i<N; i++) {
arr[i] = ct;
ct--;
printf("%d ",arr[i]);
}
printf("\n");
cudaMemcpy(dev_arr, arr, N*sizeof(int), cudaMemcpyHostToDevice);
oddevensort<<<1, N>>>(dev_arr);
cudaMemcpy(arr, dev_arr, N*sizeof(int), cudaMemcpyDeviceToHost);
cudaDeviceReset();
#include<stdio.h>
#include<stdlib.h>
#define N 100
int main()
{
int *arr;
int *dev_arr;
arr = (int*)malloc(N*sizeof(int));
cudaMalloc((void**)&dev_arr, N*sizeof(int));
int ct = N;
printf("Input array is :\n");
for (int i=0; i<N; i++) {
arr[i] = ct;
ct--;
printf("%d ",arr[i]);
}
printf("\n");
#include<stdio.h>
#include<time.h>
#define N 20
cudaMalloc(&da, size);
cudaMemcpy(da, arr, size, cudaMemcpyHostToDevice);
cudaMalloc(&dl, size);
cudaMemcpy(dl, lstack, size, cudaMemcpyHostToDevice);
cudaMalloc(&dh, size);
cudaMemcpy(dh, hstack, size, cudaMemcpyHostToDevice);
while(ni > 0) {
partition<<<nb, nt>>>(da, dl, dh, ni);
int ans;
cudaMemcpyFromSymbol(&ans, d_size, sizeof(int),0,
cudaMemcpyDeviceToHost);
if(ans < N * nt) {
nt = ans;
}
else {
nt = N * nt;
nb = ans / nt + (ans % nt == 0 ? 0 : 1);
}
ni = ans;
cudaMemcpy(arr, da, (high - low + 1) * sizeof(int),
cudaMemcpyDeviceToHost);
}
}
int main() {
int *a = (int* )malloc(N * sizeof(int));
initialize(a);
quickSort(a);
for(int i = 0; i < N; i++) {
printf("%d ", a[i]);
}
printf("\n");
}
Preorder
#include<stdio.h>
#define N 8
__device__ struct point {
int x;
int y;
};
__global__ void preorder(int *parent, int *sibling, int *child, int *adj, int *preo) {
int i = threadIdx.x;
int j = threadIdx.y;
int gind = j*N+i;
if(adj[gind]==1) {
printf("Edge (%d, %d)\n",i, j);
if(parent[i] == j) {
if(sibling[i]!=(-1)) {
struct point pt;
pt.x = j;
pt.y = sibling[i];
succ[i][j] = pt;
}
else if(parent[j]!=(-1)) {
struct point pt;
pt.x = j;
pt.y = parent[j];
succ[i][j] = pt;
}
else {
struct point pt;
pt.x = i;
pt.y = j;
succ[i][j] = pt;
preo[j] = 1;
}
}
else {
if(child[j]!=(-1)) {
struct point pt;
pt.x = j;
pt.y = child[j];
succ[i][j] = pt;
}
else {
struct point pt;
pt.x = j;
pt.y = i;
succ[i][j] = pt;
}
}
__syncthreads();
if(parent[i]==j) position[i][j] = 0;
else position[i][j] = 1;
int logval = (int)ceil(log2((double)(2*(N-1))));
printf("Successor of (%d, %d) = (%d, %d)\n",i, j, succ[i][j].x, succ[i][j].y);
int main() {
int parents[] = {-1, 0, 0, 1, 1, 2, 4, 4};
int sibling[] = {-1, 2, -1, 4, -1, -1, 7, -1};
int children[] = {1, 3, 5, -1, 6, -1, -1, -1};
int *parent, *sib, *child, *preo, *ordered, *adj;
ordered = (int *)malloc(sizeof(int)*N);
cudaMalloc((int **)&parent, sizeof(int)*N);
cudaMalloc((int **)&sib, sizeof(int)*N);
cudaMalloc((int **)&child, sizeof(int)*N);
cudaMalloc((int **)&preo, sizeof(int)*N);
cudaMalloc((int **)&adj, sizeof(int)*N*N);
int adjacency[N][N];
memset(adjacency, 0, sizeof(adjacency));
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) {
if(parents[j]!=-1 && parents[j]==i) {
adjacency[i][j] = 1;
adjacency[j][i] = 1;
}
}
}
cudaMemcpy(parent, parents, sizeof(int)*N, cudaMemcpyHostToDevice);
cudaMemcpy(sib, sibling, sizeof(int)*N, cudaMemcpyHostToDevice);
cudaMemcpy(child, children, sizeof(int)*N, cudaMemcpyHostToDevice);
cudaMemcpy(adj, adjacency, sizeof(int)*N*N, cudaMemcpyHostToDevice);
dim3 grid(1);
dim3 block(N, N);
preorder<<<grid, block>>>(parent, sib, child, adj, preo);
cudaMemcpy(ordered, preo, sizeof(int)*N, cudaMemcpyDeviceToHost);
int preordered[N];
for(int i=0; i<N; i++) {
preordered[ordered[i]-1] = i;
}
for(int i=0; i<N; i++) {
printf("%d ", preordered[i]);
}
printf("\n");
free(ordered);
cudaFree(parent);
cudaFree(sib);
cudaFree(child);
cudaDeviceReset();
}