Unverified Commit 8b8a40d0 authored by grzegorz's avatar grzegorz
Browse files

hello world

parent 3cc53961
%% Cell type:code id: tags:
``` python
def my_fun(x,y):
return x+y
```
%% Cell type:code id: tags:
``` python
my_fun(10,5)
```
%% Output
15
%% Cell type:code id: tags:
``` python
!pwd
!ls
```
%% Cell type:markdown id: tags:
# GPU is available
Runtime --> Change runtime --> Hardware accelerator: GPU
Open the file `hello_world.cu`, compile it and run!
%% Cell type:code id: tags:
``` python
!cat hello_world.cu
```
%% Output
#include <stdio.h>
__global__ void print_from_gpu(void) {
int tidx = blockIdx.x*blockDim.x+threadIdx.x;
printf("Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> %d = %d * %d + %d \n",
tidx, blockIdx.x, blockDim.x, threadIdx.x);
}
int main(void) {
printf("Hello World from host!\n");
print_from_gpu<<<2,3>>>(); // <<<blocks, threads_per_block>>>
cudaDeviceSynchronize();
return 0;
}
%% Cell type:code id: tags:
``` python
!nvidia-smi
```
%% Cell type:code id: tags:
``` python
# if you received an older gpu like Tesla K80 add the "-gencode arch=compute_35,code=sm_35" flags to nvcc compiler
%env CUDA_SUFF=35
!nvcc -gencode arch=compute_${CUDA_SUFF},code=sm_${CUDA_SUFF} ./hello_world.cu -o hello_world
!./hello_world
```
%% Output
env: CUDA_SUFF=35
nvcc warning : The 'compute_35', 'compute_37', 'compute_50', 'sm_35', 'sm_37' and 'sm_50' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
Hello World from host!
Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> 0 = 0 * 3 + 0
Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> 1 = 0 * 3 + 1
Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> 2 = 0 * 3 + 2
Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> 3 = 1 * 3 + 0
Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> 4 = 1 * 3 + 1
Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> 5 = 1 * 3 + 2
#include <stdio.h>
__global__ void print_from_gpu(void) {
int tidx = blockIdx.x*blockDim.x+threadIdx.x;
printf("Hello from device! My threadId = blockIdx.x *blockDim.x + threadIdx.x <=> %d = %d * %d + %d \n",
tidx, blockIdx.x, blockDim.x, threadIdx.x);
}
int main(void) {
printf("Hello World from host!\n");
print_from_gpu<<<2,3>>>(); // <<<blocks, threads_per_block>>>
cudaDeviceSynchronize();
return 0;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment