Parallel Scan in C CUda

Uploaded by

lloyd24390_874347375

0% found this document useful (0 votes)

154 views3 pages

Parallel Scan Cuda Programming C language, coursework

Copyright

Available Formats

TXT, PDF, TXT or read online from Scribd

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Report this Document

Parallel Scan Cuda Programming C language, coursework

Copyright:

Available Formats

Download as TXT, PDF, TXT or read online from Scribd

Flag for inappropriate content

0% found this document useful (0 votes)

154 views3 pages

Parallel Scan in C CUda

Uploaded by

lloyd24390_874347375

Parallel Scan Cuda Programming C language, coursework

Copyright:

Available Formats

Download as TXT, PDF, TXT or read online from Scribd

Flag for inappropriate content

Jump to Page

You are on page 1of 3

Search inside document

// MP Scan

// Given a list (lst) of length n

// Output its prefix sum = {lst[0], lst[0] + lst[1], lst[0] + lst[1] + ... + lst
[n-1]}
#include

<wb.h>

#define BLOCK_SIZE 512 //@@ You can change this

#define wbCheck(stmt) do {
cudaError_t err = stmt;
if (err != cudaSuccess) {
wbLog(ERROR, "Failed to run stmt ", #stmt);
wbLog(ERROR, "Got CUDA error ... ", cudaGetErrorString(err));
return -1;
}
} while(0)
__global__ void scan(float * input, float * output, int len) {
//@@ Modify the body of this function to complete the functionality of
//@@ the scan on the device
//@@ You may need multiple kernel calls; write your kernels before this
// Load a segment of the input vector into shared memory
__shared__ float XY[2*BLOCK_SIZE];
unsigned int tx = threadIdx.x, start = 2 * blockIdx.x * BLOCK_SIZE;
int i=blockDim.x*blockIdx.x+threadIdx.x;
if (start + tx < len)
XY[tx] = input[start + tx];
else
XY[tx] = 0;
if (start + BLOCK_SIZE + tx < len)
XY[BLOCK_SIZE + tx] = input[start + BLOCK_SIZE + tx];
else
XY[BLOCK_SIZE + tx] = 0;
__syncthreads();
// Reduction
for (int stride = 1; stride <= BLOCK_SIZE; stride <<= 1) {
int index = (tx + 1) * stride * 2 - 1;
if (index < 2 * BLOCK_SIZE)
XY[index] += XY[index - stride];
__syncthreads();
}
// Post reduction
for (int stride = BLOCK_SIZE >> 1; stride>=1; stride >>= 1) {
int index = (tx + 1) * stride * 2 - 1;
if (index + stride < 2 * BLOCK_SIZE)
XY[index + stride] += XY[index];
__syncthreads();
}
/*if (start + t < len)
output[start + t] = scan_array[t];
if (start + BLOCK_SIZE + t < len)
output[start + BLOCK_SIZE + t] = XY[BLOCK_SIZE + t];
*/
if (i<len)
output[i] = XY[tx];
}

\
\
\
\
\
\
\

int main(int argc, char ** argv) {

wbArg_t args;
float * hostInput; // The input 1D list
float * hostOutput; // The output list
float * deviceInput;
float * deviceOutput;
float * deviceOutputemp;
int numElements; // number of elements in the list
args = wbArg_read(argc, argv);
wbTime_start(Generic, "Importing data and creating memory on host");
hostInput = (float *) wbImport(wbArg_getInputFile(args, 0), &numElements);
hostOutput = (float*) malloc(numElements * sizeof(float));
wbTime_stop(Generic, "Importing data and creating memory on host");
wbLog(TRACE, "The number of input elements in the input is ", numElements);
wbTime_start(GPU, "Allocating GPU memory.");
wbCheck(cudaMalloc((void**)&deviceInput, numElements*sizeof(float)));
wbCheck(cudaMalloc((void**)&deviceOutputemp, numElements*sizeof(float)));
wbCheck(cudaMalloc((void**)&deviceOutput, numElements*sizeof(float)));
wbTime_stop(GPU, "Allocating GPU memory.");
wbTime_start(GPU, "Clearing output memory.");
wbCheck(cudaMemset(deviceOutput, 0, numElements*sizeof(float)));
wbTime_stop(GPU, "Clearing output memory.");
wbTime_start(GPU, "Copying input memory to the GPU.");
wbCheck(cudaMemcpy(deviceInput, hostInput, numElements*sizeof(float), cudaMe
mcpyHostToDevice));
wbTime_stop(GPU, "Copying input memory to the GPU.");
//@@ Initialize the grid and block dimensions here
int numOfBlocks =(numElements-1)/BLOCK_SIZE+1;
dim3 gridSize(numOfBlocks);
wbTime_start(Compute, "Performing CUDA computation");
//@@ Modify this to complete the functionality of the scan
//@@ on the deivce
scan<<<gridSize, BLOCK_SIZE>>>(deviceInput, deviceOutputemp, numElements
);
cudaDeviceSynchronize();
scan<<<1, gridSize,numOfBlocks*sizeof(float)>>>(deviceOutputemp, deviceO
utput, numOfBlocks);
cudaDeviceSynchronize();
fixup<<<dimGrid, dimBlock>>>(deviceOutput, deviceAuxScannedArray, numElement
s);
wbTime_stop(Compute, "Performing CUDA computation");
wbTime_start(Copy, "Copying output memory to the CPU");
wbCheck(cudaMemcpy(hostOutput, deviceOutput, numElements*sizeof(float), cuda
MemcpyDeviceToHost));
wbTime_stop(Copy, "Copying output memory to the CPU");
wbTime_start(GPU, "Freeing GPU Memory");
cudaFree(deviceInput);
cudaFree(deviceOutput);
wbTime_stop(GPU, "Freeing GPU Memory");

wbSolution(args, hostOutput, numElements);

free(hostInput);
free(hostOutput);
return 0;
}

Cryptotab Script Hack 14 BTC Updated March 2020 Part
Document13 pages
Cryptotab Script Hack 14 BTC Updated March 2020 Part
Kyle Lawrence
40% (25)
Example Risk Assessment For A Motor Vehicle Showroom
Document4 pages
Example Risk Assessment For A Motor Vehicle Showroom
dogankku
100% (1)
Cloudgenerator
Document6 pages
Cloudgenerator
Jason Tysl
No ratings yet
Toaz - Info Cryptotab Script Hack 14 BTC Updated March 2020 Part PR
Document13 pages
Toaz - Info Cryptotab Script Hack 14 BTC Updated March 2020 Part PR
Crypto Nour
No ratings yet
Vulnerability Assessment Report
Document23 pages
Vulnerability Assessment Report
Asim Arunava Sahoo
No ratings yet
Errorlisting SIREMOBIL 2000 - VB01E
Document194 pages
Errorlisting SIREMOBIL 2000 - VB01E
ameer_jabur
100% (4)
NetBackup Commands
Document723 pages
NetBackup Commands
ruterjunior
0% (1)
Main C
Document4 pages
Main C
Name
No ratings yet
ADXL345_sensor_I2C_interface
Document5 pages
ADXL345_sensor_I2C_interface
avi kishu
No ratings yet
Java Servlet Multiplication Table Generator
Document3 pages
Java Servlet Multiplication Table Generator
Farouk Lassoued
No ratings yet
C303.Chương4 TH9
Document3 pages
C303.Chương4 TH9
Trần Ngọc Phong
No ratings yet
Bootp C
Document4 pages
Bootp C
Name
No ratings yet
Last Gps
Document9 pages
Last Gps
Thinh Hoang
No ratings yet
Misc C
Document3 pages
Misc C
Name
No ratings yet
Cdlab T.K.R
Document12 pages
Cdlab T.K.R
swetha_tkrecit
No ratings yet
Practicles - Solution Distributed System (Google Docs)
Document19 pages
Practicles - Solution Distributed System (Google Docs)
Chhavi garg
No ratings yet
Binary Tree Implementation in C
Document21 pages
Binary Tree Implementation in C
Aakash Kansal
No ratings yet
Decrypt Encrypted Strings in a PE File
Document6 pages
Decrypt Encrypted Strings in a PE File
ZzcarloszZ
No ratings yet
File Kernel
Document189 pages
File Kernel
tegalex
No ratings yet
C++ Guides
Document6 pages
C++ Guides
rajeshdawadi
No ratings yet
Octree Representation Source Code
Document5 pages
Octree Representation Source Code
prasadreddy87
100% (1)
Bootpz C
Document9 pages
Bootpz C
Name
No ratings yet
Greedy XM
Document11 pages
Greedy XM
Hugu Juju
No ratings yet
Main C
Document10 pages
Main C
Tarcísio Souza de Melo
No ratings yet
Rotinas de Teste em C Do Arduino Com X-Plane PDF
Document20 pages
Rotinas de Teste em C Do Arduino Com X-Plane PDF
Namedin Pereira Teles
No ratings yet
#Include #Include: "Spi.H" "Digitalio.H"
Document6 pages
#Include #Include: "Spi.H" "Digitalio.H"
api-552271981
No ratings yet
COE 4DN4 Lab1: DNS-Domain Name Server
Document10 pages
COE 4DN4 Lab1: DNS-Domain Name Server
Rohan Thivy
No ratings yet
Lab 10 Assignment
Document9 pages
Lab 10 Assignment
afsdf fdfsdf
No ratings yet
Stop and Wait Protocol
Document10 pages
Stop and Wait Protocol
ananthigowrishankar
No ratings yet
Ekrann
Document6 pages
Ekrann
Bubble
No ratings yet
How To Install
Document426 pages
How To Install
Boris ANDRIĆ
No ratings yet
CN Lab Manual1
Document36 pages
CN Lab Manual1
kumarishu125
No ratings yet
C++ Data Structures Cheat Sheet
Document24 pages
C++ Data Structures Cheat Sheet
Debasish Senapaty
No ratings yet
Program To Demonstrate NON BLOCKING IO
Document13 pages
Program To Demonstrate NON BLOCKING IO
Syed_Abdul_Gaf_6986
No ratings yet
Programs
Document51 pages
Programs
Asif Halde
100% (1)
Ipc Code
Document15 pages
Ipc Code
virendrakumarthakur
No ratings yet
NCS 751 Lab Manual
Document16 pages
NCS 751 Lab Manual
T.Saravanan Saravanan
No ratings yet
IPC in Unix of POSIX Standard
Document6 pages
IPC in Unix of POSIX Standard
syam prasad reddy
No ratings yet
Hack Rep
Document58 pages
Hack Rep
Robert
No ratings yet
I2c Sniffer2
Document5 pages
I2c Sniffer2
Anonymous n30qTRQPoI
100% (1)
TFT eSPI
Document236 pages
TFT eSPI
André Leopoldino
No ratings yet
Wifi Module Interfacing With AVR
Document33 pages
Wifi Module Interfacing With AVR
MH 108812
No ratings yet
Task Intermediate
Document15 pages
Task Intermediate
elnahassahmed34
No ratings yet
MOOMOO KRUNKER DIEP MULTI-GAME SUPER MOD
Document112 pages
MOOMOO KRUNKER DIEP MULTI-GAME SUPER MOD
kokonat
No ratings yet
#Include #Include #Include : Int Int Int Int Int
Document9 pages
#Include #Include #Include : Int Int Int Int Int
Đông Nguyễn
No ratings yet
Mini Project 1
Document7 pages
Mini Project 1
Ramu Kaka
No ratings yet
TestDeUart para MATLAB
Document2 pages
TestDeUart para MATLAB
Federico Carrillo
No ratings yet
Windows Driver SSDT Hooking
Document4 pages
Windows Driver SSDT Hooking
Alexander Rodríguez Inza
No ratings yet
Whatsapp Bloker and Haking Tool Kit
Document2,359 pages
Whatsapp Bloker and Haking Tool Kit
Aaditya Khanna
No ratings yet
Interfacing I2C Serial Real-Time Clocks To A Microcontroller
Document11 pages
Interfacing I2C Serial Real-Time Clocks To A Microcontroller
Jirlfe Oropeza
No ratings yet
Script To Play Multiply Odd 1.5 Win Chance 63% (NOT FOR LOW BALANCE)
Document4 pages
Script To Play Multiply Odd 1.5 Win Chance 63% (NOT FOR LOW BALANCE)
Quang Doan
No ratings yet
Sử Dụng Thư Viện Usb Được Cung Cấp Bởi Microchip: Code Dspic18F4500
Document14 pages
Sử Dụng Thư Viện Usb Được Cung Cấp Bởi Microchip: Code Dspic18F4500
anon_196235818
No ratings yet
LDRTest2 Ino
Document4 pages
LDRTest2 Ino
Ellima Chu
No ratings yet
Esp 8266
Document2 pages
Esp 8266
bilal
No ratings yet
Dahl's Distance Modified Code
Document28 pages
Dahl's Distance Modified Code
venkitheboss
No ratings yet
Cryptotab Hacking Script Source Code
Document13 pages
Cryptotab Hacking Script Source Code
wala oy
No ratings yet
Code Mau
Document22 pages
Code Mau
Khánh Trịnh Minh
No ratings yet
Pycharm Source Code:: Output
Document30 pages
Pycharm Source Code:: Output
sololife
No ratings yet
Pseudo - Code
Document12 pages
Pseudo - Code
Braham Aggarwal
No ratings yet
OODP WEEK 13/14 Questions C++ STL Data Structures
Document11 pages
OODP WEEK 13/14 Questions C++ STL Data Structures
VIPUL MANOJ KUMAR (RA2111003010646)
No ratings yet
VTU lab report on memory efficient doubly linked list & skip list
Document86 pages
VTU lab report on memory efficient doubly linked list & skip list
Sravan Dhira
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Build your own Blockchain: Make your own blockchain and trading bot on your pc
From Everand
Build your own Blockchain: Make your own blockchain and trading bot on your pc
Magelan Cybersecurity
No ratings yet
Edge Detectors, Image Processing C Language Courstesy Online Tell Aviv University
Document10 pages
Edge Detectors, Image Processing C Language Courstesy Online Tell Aviv University
lloyd24390_874347375
No ratings yet
Code Gibber
Document1 page
Code Gibber
lloyd24390_874347375
No ratings yet
Parallel Scan in C CUda
Document3 pages
Parallel Scan in C CUda
lloyd24390_874347375
No ratings yet
Computer Architecture Scoreboard Technique
Document36 pages
Computer Architecture Scoreboard Technique
lloyd24390_874347375
No ratings yet
Gibberish
Document1 page
Gibberish
lloyd24390_874347375
No ratings yet
GRE Screen Reader Instructions
Document5 pages
GRE Screen Reader Instructions
Vivek Bela
No ratings yet
ATT SGH-i337 Galaxy S4 English User Manual MDB F4
Document260 pages
ATT SGH-i337 Galaxy S4 English User Manual MDB F4
lloyd24390_874347375
No ratings yet
Magoosh GRE Math Formula Ebook
Document33 pages
Magoosh GRE Math Formula Ebook
Sunny Manchanda
100% (1)
Acer Aspire E5 473 1 LA C341P
Document57 pages
Acer Aspire E5 473 1 LA C341P
luis carlos sousa nunes
No ratings yet
P42H401
Document124 pages
P42H401
jangel332045
No ratings yet
Acterna SDH Access Tester ANT-5
Document7 pages
Acterna SDH Access Tester ANT-5
S. M. Arifinul Karim
No ratings yet
Intro Matlab
Document112 pages
Intro Matlab
Val Sel
No ratings yet
PC800 800LC 8R1 Loading
Document24 pages
PC800 800LC 8R1 Loading
Akhmad Sebeh
No ratings yet
01 Laboratory Exercise Lorejo Jomrey
Document3 pages
01 Laboratory Exercise Lorejo Jomrey
Jimboy Lorejo
No ratings yet
P1PA03289B30X603CEMANCC01
Document168 pages
P1PA03289B30X603CEMANCC01
chamaidisv
No ratings yet
Ipss For Crane Duty VFD
Document14 pages
Ipss For Crane Duty VFD
Nibyendu Saha
100% (1)
Unit 3 Cloud Virtualization Technology
Document18 pages
Unit 3 Cloud Virtualization Technology
YonesH gurUng
No ratings yet
Extreme NXT PDF
Document2 pages
Extreme NXT PDF
Shawn
No ratings yet
Designing Finite State Machines (FSM) Using Verilog
Document8 pages
Designing Finite State Machines (FSM) Using Verilog
usaravanakumar
No ratings yet
Common Sap Gui Problems and Solutions
Document41 pages
Common Sap Gui Problems and Solutions
Raghavendhar Reddy
No ratings yet
SURPASS HiT7035 R4.2 Acceptance Test Manual
Document72 pages
SURPASS HiT7035 R4.2 Acceptance Test Manual
Charles Raleigh
100% (1)
Microsoft Virtual Machine Converter Administration Guide
Document21 pages
Microsoft Virtual Machine Converter Administration Guide
samuefon
No ratings yet
TPMS With External Sensor For Car User's Manual: Chapter 1. Function Introduction
Document5 pages
TPMS With External Sensor For Car User's Manual: Chapter 1. Function Introduction
Alip Wildan
No ratings yet
Manual PROFACE PDF
Document137 pages
Manual PROFACE PDF
Alex Radu
100% (1)
Analog Integrated Circuits Lab Manual
Document133 pages
Analog Integrated Circuits Lab Manual
prabhu4scribd
No ratings yet
Ee321 Chap01 Review
Document60 pages
Ee321 Chap01 Review
Mohamed Ouail CHARAOUI
No ratings yet
Chapter 0 Course Introduction-1
Document12 pages
Chapter 0 Course Introduction-1
Nguyen Pham Khoi
No ratings yet
Pickit2™ Usage From Within The Ide: 1 Contents
Document7 pages
Pickit2™ Usage From Within The Ide: 1 Contents
Wilder Yancel Herrera Surita
No ratings yet
170 ADI 350 00 - 32 Pt. Discrete Input Module Overview
Document14 pages
170 ADI 350 00 - 32 Pt. Discrete Input Module Overview
Gabriel Zoratti
No ratings yet
Future Logic GEN2 Course Updated
Document25 pages
Future Logic GEN2 Course Updated
pablo bulzomi
No ratings yet
Datasheet P3015 - 4AA2-5002ENUC
Document4 pages
Datasheet P3015 - 4AA2-5002ENUC
dul jonna
No ratings yet
XG1.2 Brochure
Document6 pages
XG1.2 Brochure
Sholl1
No ratings yet
Prices and Stocks Subject To Change Without Prior Notice, Please Check With Our Branches. .
Document3 pages
Prices and Stocks Subject To Change Without Prior Notice, Please Check With Our Branches. .
Kwinsi Agnes Ramirez - Delos Reyes
No ratings yet
Bihar STET Previous Year Question Paper 2 Computer Science Shift 1 1
Document35 pages
Bihar STET Previous Year Question Paper 2 Computer Science Shift 1 1
riteshlock1
No ratings yet
0001 Cat
Document108 pages
0001 Cat
Jorge Cabrera
No ratings yet