1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/audio_ops.cc |
17 | |
18 | #include "tensorflow/core/framework/op_kernel.h" |
19 | #include "tensorflow/core/framework/register_types.h" |
20 | #include "tensorflow/core/framework/tensor.h" |
21 | #include "tensorflow/core/framework/tensor_shape.h" |
22 | #include "tensorflow/core/framework/types.h" |
23 | #include "tensorflow/core/kernels/spectrogram.h" |
24 | #include "tensorflow/core/lib/core/status.h" |
25 | |
26 | namespace tensorflow { |
27 | |
28 | // Create a spectrogram frequency visualization from audio data. |
29 | class AudioSpectrogramOp : public OpKernel { |
30 | public: |
31 | explicit AudioSpectrogramOp(OpKernelConstruction* context) |
32 | : OpKernel(context) { |
33 | OP_REQUIRES_OK(context, context->GetAttr("window_size" , &window_size_)); |
34 | OP_REQUIRES_OK(context, context->GetAttr("stride" , &stride_)); |
35 | OP_REQUIRES_OK(context, |
36 | context->GetAttr("magnitude_squared" , &magnitude_squared_)); |
37 | } |
38 | |
39 | void Compute(OpKernelContext* context) override { |
40 | const Tensor& input = context->input(0); |
41 | OP_REQUIRES(context, input.dims() == 2, |
42 | errors::InvalidArgument("input must be 2-dimensional" , |
43 | input.shape().DebugString())); |
44 | Spectrogram spectrogram; |
45 | OP_REQUIRES(context, spectrogram.Initialize(window_size_, stride_), |
46 | errors::InvalidArgument( |
47 | "Spectrogram initialization failed for window size " , |
48 | window_size_, " and stride " , stride_)); |
49 | |
50 | const auto input_as_matrix = input.matrix<float>(); |
51 | |
52 | const int64_t sample_count = input.dim_size(0); |
53 | const int64_t channel_count = input.dim_size(1); |
54 | |
55 | const int64_t output_width = spectrogram.output_frequency_channels(); |
56 | const int64_t length_minus_window = (sample_count - window_size_); |
57 | int64_t output_height; |
58 | if (length_minus_window < 0) { |
59 | output_height = 0; |
60 | } else { |
61 | output_height = 1 + (length_minus_window / stride_); |
62 | } |
63 | const int64_t output_slices = channel_count; |
64 | |
65 | Tensor* output_tensor = nullptr; |
66 | OP_REQUIRES_OK( |
67 | context, |
68 | context->allocate_output( |
69 | 0, TensorShape({output_slices, output_height, output_width}), |
70 | &output_tensor)); |
71 | auto output_flat = output_tensor->flat<float>().data(); |
72 | |
73 | std::vector<float> input_for_channel(sample_count); |
74 | for (int64_t channel = 0; channel < channel_count; ++channel) { |
75 | OP_REQUIRES(context, spectrogram.Reset(), |
76 | errors::InvalidArgument("Failed to Reset()" )); |
77 | |
78 | float* output_slice = |
79 | output_flat + (channel * output_height * output_width); |
80 | for (int i = 0; i < sample_count; ++i) { |
81 | input_for_channel[i] = input_as_matrix(i, channel); |
82 | } |
83 | std::vector<std::vector<float>> spectrogram_output; |
84 | OP_REQUIRES(context, |
85 | spectrogram.ComputeSquaredMagnitudeSpectrogram( |
86 | input_for_channel, &spectrogram_output), |
87 | errors::InvalidArgument("Spectrogram compute failed" )); |
88 | OP_REQUIRES(context, (spectrogram_output.size() == output_height), |
89 | errors::InvalidArgument( |
90 | "Spectrogram size calculation failed: Expected height " , |
91 | output_height, " but got " , spectrogram_output.size())); |
92 | OP_REQUIRES(context, |
93 | spectrogram_output.empty() || |
94 | (spectrogram_output[0].size() == output_width), |
95 | errors::InvalidArgument( |
96 | "Spectrogram size calculation failed: Expected width " , |
97 | output_width, " but got " , spectrogram_output[0].size())); |
98 | for (int row_index = 0; row_index < output_height; ++row_index) { |
99 | const std::vector<float>& spectrogram_row = |
100 | spectrogram_output[row_index]; |
101 | DCHECK_EQ(spectrogram_row.size(), output_width); |
102 | float* output_row = output_slice + (row_index * output_width); |
103 | if (magnitude_squared_) { |
104 | for (int i = 0; i < output_width; ++i) { |
105 | output_row[i] = spectrogram_row[i]; |
106 | } |
107 | } else { |
108 | for (int i = 0; i < output_width; ++i) { |
109 | output_row[i] = sqrtf(spectrogram_row[i]); |
110 | } |
111 | } |
112 | } |
113 | } |
114 | } |
115 | |
116 | private: |
117 | int32 window_size_; |
118 | int32 stride_; |
119 | bool magnitude_squared_; |
120 | }; |
121 | REGISTER_KERNEL_BUILDER(Name("AudioSpectrogram" ).Device(DEVICE_CPU), |
122 | AudioSpectrogramOp); |
123 | |
124 | } // namespace tensorflow |
125 | |