-
Notifications
You must be signed in to change notification settings - Fork 0
/
fp16_dev.cu
42 lines (37 loc) · 1.27 KB
/
fp16_dev.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/**
* Copyright 2014 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include "error_util.h"
#include "fp16_dev.h"
#define BLOCK_SIZE 128
template <class value_type>
__global__ void float2half_rn_kernel(int size, const value_type *buffIn, half1 *buffOut)
{
const int idx = BLOCK_SIZE*blockIdx.x+threadIdx.x;
if (idx >= size) {
return;
}
#if CUDART_VERSION < 9000
half1 val;
val.x = __float2half_rn(float(buffIn[idx]));
#else
half1 val = __float2half_rn(float(buffIn[idx]));
#endif
buffOut[idx] = val;
}
template <class value_type>
void gpu_float2half_rn(int size, const value_type *buffIn, half1 *buffOut)
{
int grid_size = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
float2half_rn_kernel<value_type><<<grid_size, BLOCK_SIZE>>> (size, buffIn, buffOut);
checkCudaErrors(cudaDeviceSynchronize());
}
template void gpu_float2half_rn<float> (int, const float*, half1*);
template void gpu_float2half_rn<double> (int, const double*, half1*);