1/*******************************************************************************
2* Copyright 2017-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <memory>
18
19#include "engine.hpp"
20#include "utils.hpp"
21
22#if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE
23#include "cpu/cpu_engine.hpp"
24#endif
25
26#include "scratchpad.hpp"
27
28namespace dnnl {
29namespace impl {
30
31namespace {
32
33memory_storage_t *create_scratchpad_memory_storage(
34 engine_t *engine, size_t size) {
35 // XXX: if engine is a non-native CPU engine (read: SYCL) then create
36 // scratchpad through other, native CPU engine.
37 //
38 // SYCL CPU engine has asynchronous execution, and the library has to
39 // extend (if needed) primitive lifetime until a kernel is completed.
40 // For that, the library implements a reference-counting mechanism for
41 // primitives (including internal scratchpads). In some cases a
42 // scratchpad has to be destroyed from inside a kernel. This doesn't
43 // play well with SYCL runtime, so switching to native CPU engine for such
44 // cases.
45 engine_t *mem_engine = nullptr;
46#if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE
47 mem_engine = (engine->kind() == engine_kind::cpu
48 && !is_native_runtime(engine->runtime_kind()))
49 ? cpu::get_service_engine()
50 : engine;
51#else
52 mem_engine = engine;
53#endif
54
55 memory_storage_t *mem_storage = nullptr;
56 auto status = mem_engine->create_memory_storage(&mem_storage, size);
57 MAYBE_UNUSED(status);
58 return mem_storage;
59}
60
61} // namespace
62
63/*
64 Implementation of the scratchpad_t interface that is compatible with
65 a concurrent execution
66*/
67struct concurrent_scratchpad_t : public scratchpad_t {
68 concurrent_scratchpad_t(engine_t *engine, size_t size) {
69 auto *mem_storage = create_scratchpad_memory_storage(engine, size);
70 size_ = size;
71 if (mem_storage == nullptr) size_ = 0;
72
73 mem_storage_.reset(mem_storage);
74 }
75
76 const memory_storage_t *get_memory_storage() const override {
77 return mem_storage_.get();
78 }
79
80 size_t size() const override { return size_; }
81
82private:
83 std::unique_ptr<memory_storage_t> mem_storage_;
84 size_t size_;
85
86 DNNL_DISALLOW_COPY_AND_ASSIGN(concurrent_scratchpad_t);
87};
88
89/*
90 Implementation of the scratchpad_t interface that uses a global
91 scratchpad
92*/
93
94struct global_scratchpad_t : public scratchpad_t {
95 global_scratchpad_t(engine_t *engine, size_t size) {
96 // TODO: check if engine is the same
97 if (size > size_) {
98 delete mem_storage_;
99 // Try to expand the global scratchpad to the necessary size
100 mem_storage_ = create_scratchpad_memory_storage(engine, size);
101 if (mem_storage_ == nullptr) {
102 // Recreate scratchpad with original capacity
103 mem_storage_ = create_scratchpad_memory_storage(engine, size_);
104 if (mem_storage_ == nullptr) size_ = 0;
105 } else
106 size_ = size;
107 }
108 reference_count_++;
109 }
110
111 ~global_scratchpad_t() override {
112 reference_count_--;
113 if (reference_count_ == 0) {
114 delete mem_storage_;
115 mem_storage_ = nullptr;
116 size_ = 0;
117 }
118 }
119
120 const memory_storage_t *get_memory_storage() const override {
121 return mem_storage_;
122 }
123
124 size_t size() const override { return size_; }
125
126private:
127 thread_local static memory_storage_t *mem_storage_;
128 thread_local static size_t size_;
129 thread_local static unsigned int reference_count_;
130};
131
132// CAVEAT: avoid having non-trivially-constructed thread-local objects. Their
133// construction order may depends on the program execution and the final
134// destruction order may be such that a thread-local object is destroyed
135// before all its users are destroyed thus causing a crash at exit.
136// Tested by tests/gtests/test_global_scratchad.cpp
137thread_local memory_storage_t *global_scratchpad_t::mem_storage_ = nullptr;
138thread_local size_t global_scratchpad_t::size_ = 0;
139thread_local unsigned int global_scratchpad_t::reference_count_ = 0;
140
141/*
142 Scratchpad creation routine
143*/
144scratchpad_t *create_scratchpad(
145 engine_t *engine, size_t size, bool use_global_scratchpad) {
146#ifndef DNNL_ENABLE_CONCURRENT_EXEC
147 /*
148 * TODO: global scratchpad should be able to handle memory
149 * from different engines.
150 * lock global scratchpad to work with CPU engine only.
151 */
152 if (use_global_scratchpad && engine->kind() == engine_kind_t::dnnl_cpu)
153 return new global_scratchpad_t(engine, size);
154 else
155 return new concurrent_scratchpad_t(engine, size);
156#else
157 UNUSED(use_global_scratchpad);
158 return new concurrent_scratchpad_t(engine, size);
159#endif
160}
161
162} // namespace impl
163} // namespace dnnl
164