1/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_KERNELS_RANDOM_OP_CPU_H_
17#define TENSORFLOW_CORE_KERNELS_RANDOM_OP_CPU_H_
18
19#define EIGEN_USE_THREADS
20
21#include <algorithm>
22#include <cmath>
23#include <memory>
24
25#include "tensorflow/core/framework/op_kernel.h"
26#include "tensorflow/core/framework/register_types.h"
27#include "tensorflow/core/framework/tensor.h"
28#include "tensorflow/core/framework/tensor_shape.h"
29#include "tensorflow/core/kernels/random_op.h"
30#include "tensorflow/core/kernels/random_ops_util.h"
31#include "tensorflow/core/lib/hash/crc32c.h"
32#include "tensorflow/core/lib/random/random_distributions.h"
33#include "tensorflow/core/lib/random/simple_philox.h"
34#include "tensorflow/core/platform/logging.h"
35#include "tensorflow/core/util/guarded_philox_random.h"
36#include "tensorflow/core/util/work_sharder.h"
37
38#if EIGEN_COMP_GNUC && __cplusplus > 199711L
39#define DISABLE_FLOAT_EQUALITY_WARNING \
40 _Pragma("GCC diagnostic push") \
41 _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
42#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
43#else
44#define DISABLE_FLOAT_EQUALITY_WARNING
45#define ENABLE_FLOAT_EQUALITY_WARNING
46#endif
47
48namespace tensorflow {
49
50typedef Eigen::ThreadPoolDevice CPUDevice;
51typedef Eigen::GpuDevice GPUDevice;
52
53namespace functor {
54using random::PhiloxRandom;
55using random::SingleSampleAdapter;
56
57// The default implementation of the functor, which should never be invoked
58// But we still need to provide implementation for now for the linker to work,
59// since we do not support all the distributions yet.
60template <typename Device, class Distribution>
61struct FillPhiloxRandom {
62 typedef typename Distribution::ResultElementType T;
63 void operator()(OpKernelContext* ctx, const Device&, const uint64* key,
64 const uint64* counter, random::PhiloxRandom gen, T* data,
65 int64_t size, Distribution dist) {
66 OP_REQUIRES(
67 ctx, false,
68 errors::Internal(
69 "Default `FillPhiloxRandom` implementation should not be executed. "
70 "The cause of this error is probably that `FillPhiloxRandom` does "
71 "not support this device or random distribution yet."));
72 }
73};
74
75// A class to fill a specified range of random groups
76template <class Distribution, bool VariableSamplesPerOutput>
77struct FillPhiloxRandomTask;
78
79// Specialization for distribution that takes a fixed number of samples for
80// each output.
81template <class Distribution>
82struct FillPhiloxRandomTask<Distribution, false> {
83 typedef typename Distribution::ResultElementType T;
84 static void Run(random::PhiloxRandom gen, T* data, int64_t size,
85 int64_t start_group, int64_t limit_group, Distribution dist) {
86 const int kGroupSize = Distribution::kResultElementCount;
87
88 gen.Skip(start_group);
89 int64_t offset = start_group * kGroupSize;
90
91 // First fill all the full-size groups
92 int64_t limit_group_full = std::min(limit_group, size / kGroupSize);
93 for (int64_t index = start_group; index < limit_group_full; ++index) {
94 auto samples = dist(&gen);
95 std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
96 offset += kGroupSize;
97 }
98
99 // If there are any remaining elements that need to be filled, process them
100 if (limit_group_full < limit_group) {
101 int64_t remaining_size = size - limit_group_full * kGroupSize;
102 auto samples = dist(&gen);
103 std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
104 }
105 }
106};
107
108// Specialization for distribution that takes a variable number of samples for
109// each output. This will be slower due to the generality.
110template <class Distribution>
111struct FillPhiloxRandomTask<Distribution, true> {
112 typedef typename Distribution::ResultElementType T;
113 static constexpr int64_t kReservedSamplesPerOutput = 256;
114
115 static void Run(random::PhiloxRandom base_gen, T* data, int64_t size,
116 int64_t start_group, int64_t limit_group, Distribution dist) {
117 const int kGroupSize = Distribution::kResultElementCount;
118
119 static const int kGeneratorSkipPerOutputGroup =
120 kGroupSize * kReservedSamplesPerOutput /
121 PhiloxRandom::kResultElementCount;
122
123 int64_t offset = start_group * kGroupSize;
124
125 // First fill all the full-size groups
126 int64_t limit_group_full = std::min(limit_group, size / kGroupSize);
127 int64_t group_index;
128 for (group_index = start_group; group_index < limit_group_full;
129 ++group_index) {
130 // Reset the generator to the beginning of the output group region
131 // This is necessary if we want the results to be independent of order
132 // of work
133 PhiloxRandom gen = base_gen;
134 gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
135 SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
136
137 auto samples = dist(&single_samples);
138 std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
139 offset += kGroupSize;
140 }
141
142 // If there are any remaining elements that need to be filled, process them
143 if (limit_group_full < limit_group) {
144 PhiloxRandom gen = base_gen;
145 gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
146 SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
147
148 int64_t remaining_size = size - limit_group_full * kGroupSize;
149 auto samples = dist(&single_samples);
150 std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
151 }
152 }
153};
154
155// Partial specialization for CPU to fill the entire region with randoms
156// It splits the work into several tasks and run them in parallel
157template <class Distribution>
158void FillPhiloxRandom<CPUDevice, Distribution>::operator()(
159 OpKernelContext* ctx, const CPUDevice&, const uint64* key,
160 const uint64* counter, random::PhiloxRandom gen,
161 typename Distribution::ResultElementType* data, int64_t size,
162 Distribution dist) {
163 const int kGroupSize = Distribution::kResultElementCount;
164
165 auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
166
167 int64_t total_group_count = (size + kGroupSize - 1) / kGroupSize;
168
169 const int kGroupCost =
170 random::PhiloxRandom::kResultElementCount *
171 (random::PhiloxRandom::kElementCost + Distribution::kElementCost);
172
173 if (key != nullptr && counter != nullptr) {
174 gen = GetPhiloxRandomFromCounterKeyMem(counter, key);
175 }
176
177 Shard(worker_threads.num_threads, worker_threads.workers, total_group_count,
178 kGroupCost,
179 [&gen, data, size, dist](int64_t start_group, int64_t limit_group) {
180 FillPhiloxRandomTask<
181 Distribution,
182 Distribution::kVariableSamplesPerOutput>::Run(gen, data, size,
183 start_group,
184 limit_group, dist);
185 });
186}
187
188} // namespace functor
189
190
191} // end namespace tensorflow
192
193#endif // TENSORFLOW_CORE_KERNELS_RANDOM_OP_CPU_H_
194