1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // Class for generating spectrogram slices from a waveform. |
17 | // Initialize() should be called before calls to other functions. Once |
18 | // Initialize() has been called and returned true, The Compute*() functions can |
19 | // be called repeatedly with sequential input data (ie. the first element of the |
20 | // next input vector directly follows the last element of the previous input |
21 | // vector). Whenever enough audio samples are buffered to produce a |
22 | // new frame, it will be placed in output. Output is cleared on each |
23 | // call to Compute*(). This class is thread-unsafe, and should only be |
24 | // called from one thread at a time. |
25 | // With the default parameters, the output of this class should be very |
26 | // close to the results of the following MATLAB code: |
27 | // overlap_samples = window_length_samples - step_samples; |
28 | // window = hann(window_length_samples, 'periodic'); |
29 | // S = abs(spectrogram(audio, window, overlap_samples)).^2; |
30 | |
31 | #ifndef TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_ |
32 | #define TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_ |
33 | |
34 | #include <complex> |
35 | #include <deque> |
36 | #include <vector> |
37 | |
38 | #include "third_party/fft2d/fft.h" |
39 | #include "tensorflow/core/framework/op_kernel.h" |
40 | #include "tensorflow/core/framework/tensor.h" |
41 | |
42 | namespace tensorflow { |
43 | |
44 | class Spectrogram { |
45 | public: |
46 | Spectrogram() : initialized_(false) {} |
47 | ~Spectrogram() {} |
48 | |
49 | // Initializes the class with a given window length and step length |
50 | // (both in samples). Internally a Hann window is used as the window |
51 | // function. Returns true on success, after which calls to Process() |
52 | // are possible. window_length must be greater than 1 and step |
53 | // length must be greater than 0. |
54 | bool Initialize(int window_length, int step_length); |
55 | |
56 | // Initialize with an explicit window instead of a length. |
57 | bool Initialize(const std::vector<double>& window, int step_length); |
58 | |
59 | // Reset internal variables. |
60 | // Spectrogram keeps internal state: remaining input data from previous call. |
61 | // As a result it can produce different number of frames when you call |
62 | // ComputeComplexSpectrogram multiple times (even though input data |
63 | // has the same size). As it is shown in |
64 | // MultipleCallsToComputeComplexSpectrogramMayYieldDifferentNumbersOfFrames |
65 | // in tensorflow/core/kernels/spectrogram_test.cc. |
66 | // But if you need to compute Spectrogram on input data without keeping |
67 | // internal state (and clear remaining input data from the previous call) |
68 | // you have to call Reset() before computing Spectrogram. |
69 | // For example in tensorflow/core/kernels/spectrogram_op.cc |
70 | bool Reset(); |
71 | |
72 | // Processes an arbitrary amount of audio data (contained in input) |
73 | // to yield complex spectrogram frames. After a successful call to |
74 | // Initialize(), Process() may be called repeatedly with new input data |
75 | // each time. The audio input is buffered internally, and the output |
76 | // vector is populated with as many temporally-ordered spectral slices |
77 | // as it is possible to generate from the input. The output is cleared |
78 | // on each call before the new frames (if any) are added. |
79 | // |
80 | // The template parameters can be float or double. |
81 | template <class InputSample, class OutputSample> |
82 | bool ComputeComplexSpectrogram( |
83 | const std::vector<InputSample>& input, |
84 | std::vector<std::vector<std::complex<OutputSample>>>* output); |
85 | |
86 | // This function works as the one above, but returns the power |
87 | // (the L2 norm, or the squared magnitude) of each complex value. |
88 | template <class InputSample, class OutputSample> |
89 | bool ComputeSquaredMagnitudeSpectrogram( |
90 | const std::vector<InputSample>& input, |
91 | std::vector<std::vector<OutputSample>>* output); |
92 | |
93 | // Return reference to the window function used internally. |
94 | const std::vector<double>& GetWindow() const { return window_; } |
95 | |
96 | // Return the number of frequency channels in the spectrogram. |
97 | int output_frequency_channels() const { return output_frequency_channels_; } |
98 | |
99 | private: |
100 | template <class InputSample> |
101 | bool GetNextWindowOfSamples(const std::vector<InputSample>& input, |
102 | int* input_start); |
103 | void ProcessCoreFFT(); |
104 | |
105 | int fft_length_; |
106 | int output_frequency_channels_; |
107 | int window_length_; |
108 | int step_length_; |
109 | bool initialized_; |
110 | int samples_to_next_step_; |
111 | |
112 | std::vector<double> window_; |
113 | std::vector<double> fft_input_output_; |
114 | std::deque<double> input_queue_; |
115 | |
116 | // Working data areas for the FFT routines. |
117 | std::vector<int> fft_integer_working_area_; |
118 | std::vector<double> fft_double_working_area_; |
119 | |
120 | TF_DISALLOW_COPY_AND_ASSIGN(Spectrogram); |
121 | }; |
122 | |
123 | } // namespace tensorflow |
124 | |
125 | #endif // TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_ |
126 | |