1 | /******************************************************************************* |
2 | * Copyright 2020-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <mutex> |
18 | #include <unordered_map> |
19 | |
20 | #include "gpu/compute/compute_engine.hpp" |
21 | |
22 | #include "common/utils.hpp" |
23 | |
24 | namespace dnnl { |
25 | namespace impl { |
26 | namespace gpu { |
27 | namespace compute { |
28 | |
29 | // Cache for device_info_t objects. Reuse the already initialized |
30 | // device_info_t objects to save time on HW detection and nGEN binary |
31 | // check. |
32 | using device_info_cache_t = std::unordered_map<device_id_t, |
33 | std::shared_ptr<device_info_t>, device_id_hash_t>; |
34 | |
35 | utils::rw_mutex_t &device_info_cache_mutex() { |
36 | static utils::rw_mutex_t m; |
37 | return m; |
38 | } |
39 | |
40 | device_info_cache_t &device_info_cache() { |
41 | static device_info_cache_t cache; |
42 | return cache; |
43 | } |
44 | |
45 | // Returns true if found, false otherwise. |
46 | bool device_info_cache_get( |
47 | std::shared_ptr<device_info_t> *result, engine_t *engine) { |
48 | utils::lock_read_t lock(device_info_cache_mutex()); |
49 | |
50 | auto it = device_info_cache().find(engine->device_id()); |
51 | if (it == device_info_cache().end()) return false; |
52 | if (result) *result = it->second; |
53 | return true; |
54 | } |
55 | |
56 | void device_info_cache_set( |
57 | engine_t *engine, const std::shared_ptr<device_info_t> &device_info) { |
58 | utils::lock_write_t lock(device_info_cache_mutex()); |
59 | |
60 | // Clear the cache to avoid hypothetically large growth. |
61 | const int cache_size_threshold = 1024; |
62 | if (device_info_cache().size() > cache_size_threshold) |
63 | device_info_cache().clear(); |
64 | |
65 | device_info_cache().insert({engine->device_id(), device_info}); |
66 | } |
67 | |
68 | status_t compute_engine_t::init() { |
69 | return init({}); |
70 | } |
71 | |
72 | status_t compute_engine_t::init(const std::vector<uint8_t> &cache_blob) { |
73 | if (device_info_cache_get(&device_info_, this)) return status::success; |
74 | // Since init_device_info that takes a cache blob is only defined for |
75 | // OpenCL we need to do manual dispatching here. |
76 | if (cache_blob.empty()) |
77 | CHECK(init_device_info()); |
78 | else |
79 | CHECK(init_device_info(cache_blob)); |
80 | device_info_cache_set(this, device_info_); |
81 | |
82 | return status::success; |
83 | } |
84 | |
85 | } // namespace compute |
86 | } // namespace gpu |
87 | } // namespace impl |
88 | } // namespace dnnl |
89 | |
90 | bool dnnl_impl_gpu_mayiuse_ngen_kernels(dnnl::impl::engine_t *engine) { |
91 | using namespace dnnl::impl; |
92 | using namespace dnnl::impl::gpu::compute; |
93 | |
94 | auto *compute_engine = utils::downcast<compute_engine_t *>(engine); |
95 | return compute_engine->mayiuse_ngen_kernels(); |
96 | } |
97 | |