1/*******************************************************************************
2* Copyright 2020-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <mutex>
18#include <unordered_map>
19
20#include "gpu/compute/compute_engine.hpp"
21
22#include "common/utils.hpp"
23
24namespace dnnl {
25namespace impl {
26namespace gpu {
27namespace compute {
28
29// Cache for device_info_t objects. Reuse the already initialized
30// device_info_t objects to save time on HW detection and nGEN binary
31// check.
32using device_info_cache_t = std::unordered_map<device_id_t,
33 std::shared_ptr<device_info_t>, device_id_hash_t>;
34
35utils::rw_mutex_t &device_info_cache_mutex() {
36 static utils::rw_mutex_t m;
37 return m;
38}
39
40device_info_cache_t &device_info_cache() {
41 static device_info_cache_t cache;
42 return cache;
43}
44
45// Returns true if found, false otherwise.
46bool device_info_cache_get(
47 std::shared_ptr<device_info_t> *result, engine_t *engine) {
48 utils::lock_read_t lock(device_info_cache_mutex());
49
50 auto it = device_info_cache().find(engine->device_id());
51 if (it == device_info_cache().end()) return false;
52 if (result) *result = it->second;
53 return true;
54}
55
56void device_info_cache_set(
57 engine_t *engine, const std::shared_ptr<device_info_t> &device_info) {
58 utils::lock_write_t lock(device_info_cache_mutex());
59
60 // Clear the cache to avoid hypothetically large growth.
61 const int cache_size_threshold = 1024;
62 if (device_info_cache().size() > cache_size_threshold)
63 device_info_cache().clear();
64
65 device_info_cache().insert({engine->device_id(), device_info});
66}
67
68status_t compute_engine_t::init() {
69 return init({});
70}
71
72status_t compute_engine_t::init(const std::vector<uint8_t> &cache_blob) {
73 if (device_info_cache_get(&device_info_, this)) return status::success;
74 // Since init_device_info that takes a cache blob is only defined for
75 // OpenCL we need to do manual dispatching here.
76 if (cache_blob.empty())
77 CHECK(init_device_info());
78 else
79 CHECK(init_device_info(cache_blob));
80 device_info_cache_set(this, device_info_);
81
82 return status::success;
83}
84
85} // namespace compute
86} // namespace gpu
87} // namespace impl
88} // namespace dnnl
89
90bool dnnl_impl_gpu_mayiuse_ngen_kernels(dnnl::impl::engine_t *engine) {
91 using namespace dnnl::impl;
92 using namespace dnnl::impl::gpu::compute;
93
94 auto *compute_engine = utils::downcast<compute_engine_t *>(engine);
95 return compute_engine->mayiuse_ngen_kernels();
96}
97