1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include <algorithm> |
17 | #include <cmath> |
18 | |
19 | #include "tensorflow/core/framework/common_shape_fns.h" |
20 | #include "tensorflow/core/framework/op.h" |
21 | #include "tensorflow/core/framework/op_kernel.h" |
22 | #include "tensorflow/core/framework/op_requires.h" |
23 | #include "tensorflow/core/framework/shape_inference.h" |
24 | #include "tensorflow/core/platform/byte_order.h" |
25 | |
26 | namespace tensorflow { |
27 | |
28 | template <typename T> |
29 | class DecodePaddedRawOp : public OpKernel { |
30 | public: |
31 | explicit DecodePaddedRawOp(OpKernelConstruction* context) |
32 | : OpKernel(context) { |
33 | OP_REQUIRES_OK(context, context->GetAttr("out_type" , &out_type_)); |
34 | |
35 | const bool host_is_little_endian = port::kLittleEndian; |
36 | bool data_is_little_endian; |
37 | OP_REQUIRES_OK(context, |
38 | context->GetAttr("little_endian" , &data_is_little_endian)); |
39 | convert_data_endianness_ = host_is_little_endian != data_is_little_endian; |
40 | } |
41 | |
42 | void Compute(OpKernelContext* context) override { |
43 | const auto& input = context->input(0); |
44 | auto flat_in = input.flat<tstring>(); |
45 | |
46 | int fixed_length; |
47 | const auto& length_input = context->input(1); |
48 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(length_input.shape()), |
49 | errors::InvalidArgument("k must be scalar, got shape " , |
50 | length_input.shape().DebugString())); |
51 | fixed_length = length_input.scalar<int32>()(); |
52 | |
53 | OP_REQUIRES( |
54 | context, fixed_length % sizeof(T) == 0, |
55 | errors::InvalidArgument( |
56 | "fixed_length (" , fixed_length, |
57 | ") must be a multiple of the size of out_type (" , sizeof(T), ")" )); |
58 | |
59 | OP_REQUIRES(context, fixed_length > 0, |
60 | errors::InvalidArgument("fixed_length (" , fixed_length, |
61 | ") must be greater than zero." )); |
62 | |
63 | int width = fixed_length / sizeof(T); |
64 | |
65 | TensorShape out_shape = input.shape(); |
66 | out_shape.AddDim(width); |
67 | Tensor* output_tensor = nullptr; |
68 | OP_REQUIRES_OK( |
69 | context, context->allocate_output("output" , out_shape, &output_tensor)); |
70 | |
71 | if (flat_in.size() == 0) { // Empty input |
72 | return; |
73 | } |
74 | |
75 | auto out = output_tensor->flat_inner_dims<T>(); |
76 | T* out_data = out.data(); |
77 | |
78 | // Forcibly clear memory - we're going to copy variable length strings in, |
79 | // and need to ensure that if we don't write to byte N when we copy, that |
80 | // we're not getting random data. |
81 | memset(out_data, 0, fixed_length * flat_in.size()); |
82 | |
83 | // If the data is already in the host's byte order, or if the width of the |
84 | // output type is a single byte (meaning the ordering doesn't matter), we |
85 | // can copy the memory directly. |
86 | if (!convert_data_endianness_ || sizeof(T) == 1) { |
87 | for (int64_t i = 0; i < flat_in.size(); ++i) { |
88 | const auto to_copy = |
89 | std::min(flat_in(i).size(), static_cast<size_t>(fixed_length)); |
90 | memcpy(out_data, flat_in(i).data(), to_copy); |
91 | // Note: increase out_data by width since it's already of type T* so |
92 | // each shift amount is implicitly multiplied by sizeof(T) according to |
93 | // pointer arithmetic rules. |
94 | out_data += width; |
95 | } |
96 | } else { |
97 | // Otherwise, the data is not in the host's byte order, and rather than a |
98 | // direct copy, we need to reverse the byte ordering of each element. |
99 | for (int64_t i = 0; i < flat_in.size(); ++i) { |
100 | const char* in_data_bytes = |
101 | reinterpret_cast<const char*>(flat_in(i).data()); |
102 | char* out_data_bytes = reinterpret_cast<char*>(out_data); |
103 | const char* p_in = in_data_bytes; |
104 | char* p_out = out_data_bytes; |
105 | for (; p_in < in_data_bytes + fixed_length; |
106 | p_in += sizeof(T), p_out += sizeof(T)) { |
107 | std::reverse_copy(p_in, p_in + sizeof(T), p_out); |
108 | } |
109 | // Note: increase out_data by width since it's already of type T* so |
110 | // each shift amount is implicitly multiplied by sizeof(T) according to |
111 | // pointer arithmetic rules. |
112 | out_data += width; |
113 | } |
114 | } |
115 | } |
116 | |
117 | private: |
118 | // True if the endianness of the data and the endianness of the host are |
119 | // different, and the data needs conversion. |
120 | bool convert_data_endianness_; |
121 | |
122 | // Data type of the output tensor. |
123 | DataType out_type_; |
124 | }; |
125 | |
126 | #define REGISTER(type) \ |
127 | REGISTER_KERNEL_BUILDER(Name("DecodePaddedRaw") \ |
128 | .Device(DEVICE_CPU) \ |
129 | .TypeConstraint<type>("out_type"), \ |
130 | DecodePaddedRawOp<type>) |
131 | |
132 | REGISTER(float); |
133 | REGISTER(double); |
134 | REGISTER(int32); |
135 | REGISTER(uint16); |
136 | REGISTER(uint8); |
137 | REGISTER(int16); |
138 | REGISTER(int8); |
139 | REGISTER(int64_t); |
140 | REGISTER(bfloat16); |
141 | |
142 | #undef REGISTER |
143 | |
144 | } // namespace tensorflow |
145 | |