1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// Class for generating spectrogram slices from a waveform.
17// Initialize() should be called before calls to other functions. Once
18// Initialize() has been called and returned true, The Compute*() functions can
19// be called repeatedly with sequential input data (ie. the first element of the
20// next input vector directly follows the last element of the previous input
21// vector). Whenever enough audio samples are buffered to produce a
22// new frame, it will be placed in output. Output is cleared on each
23// call to Compute*(). This class is thread-unsafe, and should only be
24// called from one thread at a time.
25// With the default parameters, the output of this class should be very
26// close to the results of the following MATLAB code:
27// overlap_samples = window_length_samples - step_samples;
28// window = hann(window_length_samples, 'periodic');
29// S = abs(spectrogram(audio, window, overlap_samples)).^2;
30
31#ifndef TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_
32#define TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_
33
34#include <complex>
35#include <deque>
36#include <vector>
37
38#include "third_party/fft2d/fft.h"
39#include "tensorflow/core/framework/op_kernel.h"
40#include "tensorflow/core/framework/tensor.h"
41
42namespace tensorflow {
43
44class Spectrogram {
45 public:
46 Spectrogram() : initialized_(false) {}
47 ~Spectrogram() {}
48
49 // Initializes the class with a given window length and step length
50 // (both in samples). Internally a Hann window is used as the window
51 // function. Returns true on success, after which calls to Process()
52 // are possible. window_length must be greater than 1 and step
53 // length must be greater than 0.
54 bool Initialize(int window_length, int step_length);
55
56 // Initialize with an explicit window instead of a length.
57 bool Initialize(const std::vector<double>& window, int step_length);
58
59 // Reset internal variables.
60 // Spectrogram keeps internal state: remaining input data from previous call.
61 // As a result it can produce different number of frames when you call
62 // ComputeComplexSpectrogram multiple times (even though input data
63 // has the same size). As it is shown in
64 // MultipleCallsToComputeComplexSpectrogramMayYieldDifferentNumbersOfFrames
65 // in tensorflow/core/kernels/spectrogram_test.cc.
66 // But if you need to compute Spectrogram on input data without keeping
67 // internal state (and clear remaining input data from the previous call)
68 // you have to call Reset() before computing Spectrogram.
69 // For example in tensorflow/core/kernels/spectrogram_op.cc
70 bool Reset();
71
72 // Processes an arbitrary amount of audio data (contained in input)
73 // to yield complex spectrogram frames. After a successful call to
74 // Initialize(), Process() may be called repeatedly with new input data
75 // each time. The audio input is buffered internally, and the output
76 // vector is populated with as many temporally-ordered spectral slices
77 // as it is possible to generate from the input. The output is cleared
78 // on each call before the new frames (if any) are added.
79 //
80 // The template parameters can be float or double.
81 template <class InputSample, class OutputSample>
82 bool ComputeComplexSpectrogram(
83 const std::vector<InputSample>& input,
84 std::vector<std::vector<std::complex<OutputSample>>>* output);
85
86 // This function works as the one above, but returns the power
87 // (the L2 norm, or the squared magnitude) of each complex value.
88 template <class InputSample, class OutputSample>
89 bool ComputeSquaredMagnitudeSpectrogram(
90 const std::vector<InputSample>& input,
91 std::vector<std::vector<OutputSample>>* output);
92
93 // Return reference to the window function used internally.
94 const std::vector<double>& GetWindow() const { return window_; }
95
96 // Return the number of frequency channels in the spectrogram.
97 int output_frequency_channels() const { return output_frequency_channels_; }
98
99 private:
100 template <class InputSample>
101 bool GetNextWindowOfSamples(const std::vector<InputSample>& input,
102 int* input_start);
103 void ProcessCoreFFT();
104
105 int fft_length_;
106 int output_frequency_channels_;
107 int window_length_;
108 int step_length_;
109 bool initialized_;
110 int samples_to_next_step_;
111
112 std::vector<double> window_;
113 std::vector<double> fft_input_output_;
114 std::deque<double> input_queue_;
115
116 // Working data areas for the FFT routines.
117 std::vector<int> fft_integer_working_area_;
118 std::vector<double> fft_double_working_area_;
119
120 TF_DISALLOW_COPY_AND_ASSIGN(Spectrogram);
121};
122
123} // namespace tensorflow
124
125#endif // TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_
126