1 | /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/string_ops.cc. |
17 | |
18 | #include <string> |
19 | |
20 | #include "absl/strings/ascii.h" |
21 | #include "unicode/unistr.h" // from @icu |
22 | #include "tensorflow/core/framework/kernel_def_builder.h" |
23 | #include "tensorflow/core/framework/op_kernel.h" |
24 | #include "tensorflow/core/framework/tensor.h" |
25 | #include "tensorflow/core/lib/core/errors.h" |
26 | #include "tensorflow/core/lib/core/status.h" |
27 | #include "tensorflow/core/lib/strings/str_util.h" |
28 | |
29 | namespace tensorflow { |
30 | |
31 | class StringUpperOp : public OpKernel { |
32 | public: |
33 | explicit StringUpperOp(OpKernelConstruction* context) : OpKernel(context) { |
34 | OP_REQUIRES_OK(context, context->GetAttr("encoding" , &encoding_)); |
35 | OP_REQUIRES(context, encoding_.empty() || encoding_ == "utf-8" , |
36 | errors::InvalidArgument( |
37 | "only utf-8 or '' (no encoding) is supported, received " , |
38 | encoding_)); |
39 | } |
40 | |
41 | void Compute(OpKernelContext* ctx) override { |
42 | const Tensor* input_tensor; |
43 | OP_REQUIRES_OK(ctx, ctx->input("input" , &input_tensor)); |
44 | Tensor* output_tensor; |
45 | OP_REQUIRES_OK( |
46 | ctx, ctx->allocate_output(0, input_tensor->shape(), &output_tensor)); |
47 | |
48 | const auto input = input_tensor->flat<tstring>(); |
49 | auto output = output_tensor->flat<tstring>(); |
50 | if (encoding_.empty()) { |
51 | for (int64_t i = 0; i < input.size(); ++i) { |
52 | StringPiece entry(input(i)); |
53 | output(i) = absl::AsciiStrToUpper(entry); |
54 | } |
55 | } else { |
56 | // The validation of utf-8 has already been done in GetAttr above. |
57 | for (int64_t i = 0; i < input.size(); ++i) { |
58 | icu::UnicodeString us(input(i).c_str(), "UTF-8" ); |
59 | us.toUpper(); |
60 | us.toUTF8String(output(i)); |
61 | } |
62 | } |
63 | } |
64 | |
65 | private: |
66 | string encoding_; |
67 | }; |
68 | |
69 | REGISTER_KERNEL_BUILDER(Name("StringUpper" ).Device(DEVICE_CPU), StringUpperOp); |
70 | |
71 | } // namespace tensorflow |
72 | |