1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #define EIGEN_USE_THREADS |
17 | |
18 | #include "tensorflow/core/kernels/dense_update_functor.h" |
19 | |
20 | #include "tensorflow/core/framework/register_types.h" |
21 | #include "tensorflow/core/framework/variant_op_registry.h" |
22 | #include "tensorflow/core/lib/core/errors.h" |
23 | #include "tensorflow/core/platform/mutex.h" |
24 | #include "tensorflow/core/platform/types.h" |
25 | |
26 | namespace tensorflow { |
27 | |
28 | typedef Eigen::ThreadPoolDevice CPUDevice; |
29 | typedef Eigen::GpuDevice GPUDevice; |
30 | |
31 | namespace functor { |
32 | |
33 | template <> |
34 | struct DenseUpdate<CPUDevice, string, ASSIGN> { |
35 | void operator()(const CPUDevice& d, typename TTypes<tstring>::Flat params, |
36 | typename TTypes<tstring>::ConstFlat update) { |
37 | if (params.dimension(0) == 1) { |
38 | params.data()->resize(update.data()->size()); |
39 | auto work = [¶ms, &update](int64_t start, int64_t end) { |
40 | memmove(const_cast<char*>(params.data()->data()) + start, |
41 | update.data()->data() + start, end - start); |
42 | }; |
43 | d.parallelFor(update.data()->size(), |
44 | Eigen::TensorOpCost(.1, // chosen to force large chunks |
45 | .1, 0), |
46 | work); |
47 | } else { |
48 | auto work = [¶ms, &update](int64_t start, int64_t end) { |
49 | for (int i = start; i < end; ++i) { |
50 | params.data()[i].resize(update.data()[i].size()); |
51 | memmove(const_cast<char*>(params.data()[i].data()), |
52 | update.data()[i].data(), update.data()[i].size()); |
53 | } |
54 | }; |
55 | int64_t estimated_string_size; |
56 | if (update.size() > 0) { |
57 | // first element of the tensor seems as good a guess as any of the sizes |
58 | // of the strings contained within... |
59 | estimated_string_size = |
60 | std::max(update.data()[0].size(), sizeof(tstring)); |
61 | } else { |
62 | estimated_string_size = sizeof(tstring); |
63 | } |
64 | d.parallelFor( |
65 | params.dimension(0), |
66 | Eigen::TensorOpCost(estimated_string_size, estimated_string_size, 0), |
67 | work); |
68 | } |
69 | } |
70 | }; |
71 | |
72 | } // namespace functor |
73 | |
74 | #define CPU_DENSE_COPY(T) \ |
75 | case DataTypeToEnum<T>::value: { \ |
76 | functor::DenseUpdate<CPUDevice, T, ASSIGN> copy_functor_; \ |
77 | copy_functor_(context->eigen_device<CPUDevice>(), tensor.flat<T>(), \ |
78 | from.flat<T>()); \ |
79 | break; \ |
80 | } |
81 | |
82 | #define INSTANTIATE_GET_VARIANT_COPY_FN(DEVICE, TYPE_CALLER, TYPE_DENSE_COPY) \ |
83 | template <> \ |
84 | Status VariantCopyFn<DEVICE>(OpKernelContext * context, const Tensor& from, \ |
85 | Tensor* to) { \ |
86 | Tensor tensor; \ |
87 | AllocatorAttributes attr; \ |
88 | attr.set_gpu_compatible(true); \ |
89 | attr.set_nic_compatible(true); \ |
90 | TF_RETURN_IF_ERROR( \ |
91 | context->allocate_temp(from.dtype(), from.shape(), &tensor, attr)); \ |
92 | switch (from.dtype()) { \ |
93 | TYPE_CALLER(TYPE_DENSE_COPY); \ |
94 | default: \ |
95 | return errors::InvalidArgument( \ |
96 | "VariantCopyFn: Could not perform a deep copy of variant " \ |
97 | "element of type: ", \ |
98 | DataTypeString(from.dtype()), \ |
99 | " using device: ", context->device()->name()); \ |
100 | } \ |
101 | *to = tensor; \ |
102 | return OkStatus(); \ |
103 | } |
104 | |
105 | INSTANTIATE_GET_VARIANT_COPY_FN(CPUDevice, TF_CALL_ALL_TYPES, CPU_DENSE_COPY); |
106 | |
107 | #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
108 | #define GPU_DENSE_COPY(T) \ |
109 | case DataTypeToEnum<T>::value: { \ |
110 | functor::DenseUpdate<GPUDevice, T, ASSIGN> copy_functor_; \ |
111 | copy_functor_(context->eigen_device<GPUDevice>(), tensor.flat<T>(), \ |
112 | from.flat<T>()); \ |
113 | break; \ |
114 | } |
115 | #define TF_CALL_GPU_AND_ADDITIONAL_TYPES(T) \ |
116 | TF_CALL_GPU_ALL_TYPES(T); \ |
117 | TF_CALL_int32(T); \ |
118 | TF_CALL_int64(T); |
119 | INSTANTIATE_GET_VARIANT_COPY_FN(GPUDevice, TF_CALL_GPU_AND_ADDITIONAL_TYPES, |
120 | GPU_DENSE_COPY); |
121 | #undef TF_CALL_GPU_AND_ADDITIONAL_TYPES |
122 | #undef GPU_DENSE_COPY |
123 | #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
124 | |
125 | #undef CPU_DENSE_COPY |
126 | #undef INSTANTIATE_GET_VARIANT_COPY_FN |
127 | |
128 | } // namespace tensorflow |
129 | |