1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/kernels/string_to_hash_bucket_op.h" |
17 | |
18 | #include "tensorflow/core/lib/hash/hash.h" |
19 | #include "tensorflow/core/platform/strong_hash.h" |
20 | |
21 | namespace tensorflow { |
22 | |
23 | // Deprecated class. It also uses `string_tensor` as Op argument instead of |
24 | // `input`. |
25 | class LegacyStringToHashBucketOp : public OpKernel { |
26 | public: |
27 | explicit LegacyStringToHashBucketOp(OpKernelConstruction* ctx) |
28 | : OpKernel(ctx) { |
29 | OP_REQUIRES_OK(ctx, ctx->GetAttr("num_buckets" , &num_buckets_)); |
30 | } |
31 | |
32 | void Compute(OpKernelContext* context) override { |
33 | const Tensor* input_tensor; |
34 | OP_REQUIRES_OK(context, context->input("string_tensor" , &input_tensor)); |
35 | const auto& input_flat = input_tensor->flat<tstring>(); |
36 | |
37 | Tensor* output_tensor = nullptr; |
38 | OP_REQUIRES_OK(context, |
39 | context->allocate_output("output" , input_tensor->shape(), |
40 | &output_tensor)); |
41 | auto output_flat = output_tensor->flat<int64_t>(); |
42 | |
43 | typedef decltype(input_flat.size()) Index; |
44 | for (Index i = 0; i < input_flat.size(); ++i) { |
45 | const uint64 input_hash = Hash64(input_flat(i)); |
46 | const uint64 bucket_id = input_hash % num_buckets_; |
47 | // The number of buckets is always in the positive range of int64 so is |
48 | // the resulting bucket_id. Casting the bucket_id from uint64 to int64 is |
49 | // safe. |
50 | output_flat(i) = static_cast<int64_t>(bucket_id); |
51 | } |
52 | } |
53 | |
54 | private: |
55 | int64_t num_buckets_; |
56 | |
57 | TF_DISALLOW_COPY_AND_ASSIGN(LegacyStringToHashBucketOp); |
58 | }; |
59 | |
60 | // StringToHashBucket is deprecated in favor of StringToHashBucketFast/Strong. |
61 | REGISTER_KERNEL_BUILDER(Name("StringToHashBucket" ).Device(DEVICE_CPU), |
62 | LegacyStringToHashBucketOp); |
63 | |
64 | REGISTER_KERNEL_BUILDER(Name("StringToHashBucketStrong" ).Device(DEVICE_CPU), |
65 | StringToKeyedHashBucketOp<StrongKeyedHash>); |
66 | |
67 | } // namespace tensorflow |
68 | |