1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/math_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include "tensorflow/core/kernels/histogram_op.h" |
21 | #include "tensorflow/core/framework/op_kernel.h" |
22 | #include "tensorflow/core/framework/register_types.h" |
23 | #include "tensorflow/core/framework/types.h" |
24 | #include "tensorflow/core/lib/core/threadpool.h" |
25 | #include "tensorflow/core/platform/types.h" |
26 | |
27 | namespace tensorflow { |
28 | |
29 | typedef Eigen::ThreadPoolDevice CPUDevice; |
30 | typedef Eigen::GpuDevice GPUDevice; |
31 | |
32 | namespace functor { |
33 | |
34 | template <typename T, typename Tout> |
35 | struct HistogramFixedWidthFunctor<CPUDevice, T, Tout> { |
36 | static Status Compute(OpKernelContext* context, |
37 | const typename TTypes<T, 1>::ConstTensor& values, |
38 | const typename TTypes<T, 1>::ConstTensor& value_range, |
39 | int32_t nbins, typename TTypes<Tout, 1>::Tensor& out) { |
40 | const CPUDevice& d = context->eigen_device<CPUDevice>(); |
41 | |
42 | Tensor index_to_bin_tensor; |
43 | |
44 | TF_RETURN_IF_ERROR(context->forward_input_or_allocate_temp( |
45 | {0}, DataTypeToEnum<int32>::value, TensorShape({values.size()}), |
46 | &index_to_bin_tensor)); |
47 | auto index_to_bin = index_to_bin_tensor.flat<int32>(); |
48 | |
49 | const double step = static_cast<double>(value_range(1) - value_range(0)) / |
50 | static_cast<double>(nbins); |
51 | const double nbins_minus_1 = static_cast<double>(nbins - 1); |
52 | |
53 | // We cannot handle NANs in the algorithm below (due to the case to int32) |
54 | const Eigen::Tensor<int32, 1, 1> nans_tensor = |
55 | values.isnan().template cast<int32>(); |
56 | const Eigen::Tensor<int32, 0, 1> reduced_tensor = nans_tensor.sum(); |
57 | const int num_nans = reduced_tensor(0); |
58 | if (num_nans > 0) { |
59 | return errors::InvalidArgument("Histogram values must not contain NaN" ); |
60 | } |
61 | |
62 | // The calculation is done by finding the slot of each value in `values`. |
63 | // With [a, b]: |
64 | // step = (b - a) / nbins |
65 | // (x - a) / step |
66 | // , then the entries are mapped to output. |
67 | |
68 | // Bug fix: Switch the order of cwiseMin and int32-casting to avoid |
69 | // producing a negative index when casting an big int64 number to int32 |
70 | index_to_bin.device(d) = |
71 | ((values.cwiseMax(value_range(0)) - values.constant(value_range(0))) |
72 | .template cast<double>() / |
73 | step) |
74 | .cwiseMin(nbins_minus_1) |
75 | .template cast<int32>(); |
76 | |
77 | out.setZero(); |
78 | for (int32_t i = 0; i < index_to_bin.size(); i++) { |
79 | out(index_to_bin(i)) += Tout(1); |
80 | } |
81 | return OkStatus(); |
82 | } |
83 | }; |
84 | |
85 | } // namespace functor |
86 | |
87 | template <typename Device, typename T, typename Tout> |
88 | class HistogramFixedWidthOp : public OpKernel { |
89 | public: |
90 | explicit HistogramFixedWidthOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} |
91 | |
92 | void Compute(OpKernelContext* ctx) override { |
93 | const Tensor& values_tensor = ctx->input(0); |
94 | const Tensor& value_range_tensor = ctx->input(1); |
95 | const Tensor& nbins_tensor = ctx->input(2); |
96 | |
97 | OP_REQUIRES(ctx, TensorShapeUtils::IsVector(value_range_tensor.shape()), |
98 | errors::InvalidArgument("value_range should be a vector." )); |
99 | OP_REQUIRES(ctx, (value_range_tensor.shape().num_elements() == 2), |
100 | errors::InvalidArgument( |
101 | "value_range should be a vector of 2 elements." )); |
102 | OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(nbins_tensor.shape()), |
103 | errors::InvalidArgument("nbins should be a scalar." )); |
104 | |
105 | const auto values = values_tensor.flat<T>(); |
106 | const auto value_range = value_range_tensor.flat<T>(); |
107 | const auto nbins = nbins_tensor.scalar<int32>()(); |
108 | |
109 | OP_REQUIRES( |
110 | ctx, value_range(0) < value_range(1), |
111 | errors::InvalidArgument("value_range should satisfy value_range[0] < " |
112 | "value_range[1], but got '[" , |
113 | value_range(0), ", " , value_range(1), "]'" )); |
114 | OP_REQUIRES( |
115 | ctx, nbins > 0, |
116 | errors::InvalidArgument("nbins should be a positive number, but got '" , |
117 | nbins, "'" )); |
118 | |
119 | Tensor* out_tensor; |
120 | OP_REQUIRES_OK(ctx, |
121 | ctx->allocate_output(0, TensorShape({nbins}), &out_tensor)); |
122 | auto out = out_tensor->flat<Tout>(); |
123 | |
124 | OP_REQUIRES_OK( |
125 | ctx, functor::HistogramFixedWidthFunctor<Device, T, Tout>::Compute( |
126 | ctx, values, value_range, nbins, out)); |
127 | } |
128 | }; |
129 | |
130 | #define REGISTER_KERNELS(type) \ |
131 | REGISTER_KERNEL_BUILDER(Name("HistogramFixedWidth") \ |
132 | .Device(DEVICE_CPU) \ |
133 | .TypeConstraint<type>("T") \ |
134 | .TypeConstraint<int32>("dtype"), \ |
135 | HistogramFixedWidthOp<CPUDevice, type, int32>) \ |
136 | REGISTER_KERNEL_BUILDER(Name("HistogramFixedWidth") \ |
137 | .Device(DEVICE_CPU) \ |
138 | .TypeConstraint<type>("T") \ |
139 | .TypeConstraint<int64_t>("dtype"), \ |
140 | HistogramFixedWidthOp<CPUDevice, type, int64>) |
141 | |
142 | TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); |
143 | #undef REGISTER_KERNELS |
144 | |
145 | #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
146 | #define REGISTER_KERNELS(type) \ |
147 | REGISTER_KERNEL_BUILDER(Name("HistogramFixedWidth") \ |
148 | .Device(DEVICE_GPU) \ |
149 | .HostMemory("value_range") \ |
150 | .HostMemory("nbins") \ |
151 | .TypeConstraint<type>("T") \ |
152 | .TypeConstraint<int32>("dtype"), \ |
153 | HistogramFixedWidthOp<GPUDevice, type, int32>) |
154 | |
155 | TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNELS); |
156 | #undef REGISTER_KERNELS |
157 | |
158 | #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
159 | |
160 | } // end namespace tensorflow |
161 | |