1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * This source code is licensed under the BSD-style license found in the |
5 | * LICENSE file in the root directory of this source tree. |
6 | */ |
7 | #define FBGEMM_EXPORTS |
8 | #include "fbgemm/FbgemmConvert.h" |
9 | |
10 | #include "./RefImplementations.h" |
11 | |
12 | #ifdef USE_MKL |
13 | #include <mkl.h> |
14 | #endif |
15 | |
16 | #ifdef USE_BLAS |
17 | #if __APPLE__ |
18 | // not sure whether need to differentiate TARGET_OS_MAC or TARGET_OS_IPHONE, |
19 | // etc. |
20 | #include <Accelerate/Accelerate.h> |
21 | #else |
22 | #include <cblas.h> |
23 | #endif |
24 | #endif |
25 | |
26 | #include <cpuinfo.h> |
27 | #include <memory> |
28 | #include <utility> |
29 | #include <vector> |
30 | |
31 | #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN |
32 | double naive_malloc_time = 0.0; |
33 | double naive_A_bf16_to_fp32_time = 0.0; |
34 | double naive_B_bf16_to_fp32_time = 0.0; |
35 | double naive_C_bf16_to_fp32_time = 0.0; |
36 | double naive_computing_time = 0.0; |
37 | double naive_C_fp32_to_bf16_time = 0.0; |
38 | double naive_run_time = 0.0; |
39 | #endif |
40 | |
41 | namespace fbgemm { |
42 | |
43 | void FloatToBfloat16_simd(const float* src, bfloat16* dst, size_t size) { |
44 | // Run time CPU detection |
45 | if (cpuinfo_initialize()) { |
46 | #ifndef __aarch64__ |
47 | if (fbgemmHasAvx512Support()) { |
48 | FloatToBfloat16_avx512(src, dst, size); |
49 | } else |
50 | #endif |
51 | if (fbgemmHasAvx2Support()) { |
52 | FloatToBfloat16_avx2(src, dst, size); |
53 | } else { |
54 | FloatToBfloat16_ref(src, dst, size); |
55 | return; |
56 | } |
57 | } else { |
58 | throw std::runtime_error("Failed to initialize cpuinfo!" ); |
59 | } |
60 | } |
61 | |
62 | void Bfloat16ToFloat_simd(const bfloat16* src, float* dst, size_t size) { |
63 | // Run time CPU detection |
64 | if (cpuinfo_initialize()) { |
65 | #ifndef __aarch64__ |
66 | if (fbgemmHasAvx512Support()) { |
67 | Bfloat16ToFloat_avx512(src, dst, size); |
68 | } else |
69 | #endif |
70 | if (fbgemmHasAvx2Support()) { |
71 | Bfloat16ToFloat_avx2(src, dst, size); |
72 | } else { |
73 | Bfloat16ToFloat_ref(src, dst, size); |
74 | return; |
75 | } |
76 | } else { |
77 | throw std::runtime_error("Failed to initialize cpuinfo!" ); |
78 | } |
79 | } |
80 | |
81 | } // namespace fbgemm |
82 | |