1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/audio_ops.cc |
17 | |
18 | #include "tensorflow/core/framework/op_kernel.h" |
19 | #include "tensorflow/core/framework/register_types.h" |
20 | #include "tensorflow/core/framework/tensor.h" |
21 | #include "tensorflow/core/framework/tensor_shape.h" |
22 | #include "tensorflow/core/framework/types.h" |
23 | #include "tensorflow/core/lib/core/status.h" |
24 | #include "tensorflow/core/lib/wav/wav_io.h" |
25 | |
26 | namespace tensorflow { |
27 | |
28 | // Decode the contents of a WAV file |
29 | class DecodeWavOp : public OpKernel { |
30 | public: |
31 | explicit DecodeWavOp(OpKernelConstruction* context) : OpKernel(context) { |
32 | OP_REQUIRES_OK(context, |
33 | context->GetAttr("desired_channels" , &desired_channels_)); |
34 | OP_REQUIRES_OK(context, |
35 | context->GetAttr("desired_samples" , &desired_samples_)); |
36 | } |
37 | |
38 | void Compute(OpKernelContext* context) override { |
39 | const Tensor& contents = context->input(0); |
40 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()), |
41 | errors::InvalidArgument("contents must be scalar, got shape " , |
42 | contents.shape().DebugString())); |
43 | const string& wav_string = contents.scalar<tstring>()(); |
44 | OP_REQUIRES(context, wav_string.size() <= std::numeric_limits<int>::max(), |
45 | errors::InvalidArgument("WAV contents are too large for int: " , |
46 | wav_string.size())); |
47 | |
48 | std::vector<float> decoded_samples; |
49 | uint32 decoded_sample_count; |
50 | uint16 decoded_channel_count; |
51 | uint32 decoded_sample_rate; |
52 | OP_REQUIRES_OK(context, |
53 | wav::DecodeLin16WaveAsFloatVector( |
54 | wav_string, &decoded_samples, &decoded_sample_count, |
55 | &decoded_channel_count, &decoded_sample_rate)); |
56 | |
57 | int32_t output_sample_count; |
58 | if (desired_samples_ == -1) { |
59 | output_sample_count = decoded_sample_count; |
60 | } else { |
61 | output_sample_count = desired_samples_; |
62 | } |
63 | int32_t output_channel_count; |
64 | if (desired_channels_ == -1) { |
65 | output_channel_count = decoded_channel_count; |
66 | } else { |
67 | output_channel_count = desired_channels_; |
68 | } |
69 | |
70 | Tensor* output = nullptr; |
71 | OP_REQUIRES_OK( |
72 | context, |
73 | context->allocate_output( |
74 | 0, TensorShape({output_sample_count, output_channel_count}), |
75 | &output)); |
76 | |
77 | auto output_matrix = output->matrix<float>(); |
78 | for (int sample = 0; sample < output_sample_count; ++sample) { |
79 | for (int channel = 0; channel < output_channel_count; ++channel) { |
80 | float output_value; |
81 | if (sample >= decoded_sample_count) { |
82 | output_value = 0.0f; |
83 | } else { |
84 | int source_channel; |
85 | if (channel < decoded_channel_count) { |
86 | source_channel = channel; |
87 | } else { |
88 | source_channel = decoded_channel_count - 1; |
89 | } |
90 | const int decoded_index = |
91 | (sample * decoded_channel_count) + source_channel; |
92 | output_value = decoded_samples[decoded_index]; |
93 | } |
94 | output_matrix(sample, channel) = output_value; |
95 | } |
96 | } |
97 | |
98 | Tensor* sample_rate_output = nullptr; |
99 | OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape({}), |
100 | &sample_rate_output)); |
101 | sample_rate_output->flat<int32>()(0) = decoded_sample_rate; |
102 | } |
103 | |
104 | private: |
105 | int32 desired_channels_; |
106 | int32 desired_samples_; |
107 | }; |
108 | REGISTER_KERNEL_BUILDER(Name("DecodeWav" ).Device(DEVICE_CPU), DecodeWavOp); |
109 | |
110 | } // namespace tensorflow |
111 | |