FFTModel.cpp

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
00002 
00003 /*
00004     Sonic Visualiser
00005     An audio file viewer and annotation editor.
00006     Centre for Digital Music, Queen Mary, University of London.
00007     This file copyright 2006 Chris Cannam.
00008     
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of the
00012     License, or (at your option) any later version.  See the file
00013     COPYING included with this distribution for more information.
00014 */
00015 
00016 #include "FFTModel.h"
00017 #include "DenseTimeValueModel.h"
00018 #include "AggregateWaveModel.h"
00019 
00020 #include "base/Profiler.h"
00021 #include "base/Pitch.h"
00022 
00023 #include <cassert>
00024 
00025 FFTModel::FFTModel(const DenseTimeValueModel *model,
00026                    int channel,
00027                    WindowType windowType,
00028                    size_t windowSize,
00029                    size_t windowIncrement,
00030                    size_t fftSize,
00031                    bool polar,
00032                    StorageAdviser::Criteria criteria,
00033                    size_t fillFromColumn) :
00035     m_server(0),
00036     m_xshift(0),
00037     m_yshift(0)
00038 {
00039     setSourceModel(const_cast<DenseTimeValueModel *>(model)); 
00040 
00041     m_server = getServer(model,
00042                          channel,
00043                          windowType,
00044                          windowSize,
00045                          windowIncrement,
00046                          fftSize,
00047                          polar,
00048                          criteria,
00049                          fillFromColumn);
00050 
00051     if (!m_server) return; // caller should check isOK()
00052 
00053     size_t xratio = windowIncrement / m_server->getWindowIncrement();
00054     size_t yratio = m_server->getFFTSize() / fftSize;
00055 
00056     while (xratio > 1) {
00057         if (xratio & 0x1) {
00058             std::cerr << "ERROR: FFTModel: Window increment ratio "
00059                       << windowIncrement << " / "
00060                       << m_server->getWindowIncrement()
00061                       << " must be a power of two" << std::endl;
00062             assert(!(xratio & 0x1));
00063         }
00064         ++m_xshift;
00065         xratio >>= 1;
00066     }
00067 
00068     while (yratio > 1) {
00069         if (yratio & 0x1) {
00070             std::cerr << "ERROR: FFTModel: FFT size ratio "
00071                       << m_server->getFFTSize() << " / " << fftSize
00072                       << " must be a power of two" << std::endl;
00073             assert(!(yratio & 0x1));
00074         }
00075         ++m_yshift;
00076         yratio >>= 1;
00077     }
00078 }
00079 
00080 FFTModel::~FFTModel()
00081 {
00082     if (m_server) FFTDataServer::releaseInstance(m_server);
00083 }
00084 
00085 void
00086 FFTModel::sourceModelAboutToBeDeleted()
00087 {
00088     if (m_sourceModel) {
00089         std::cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << std::endl;
00090         if (m_server) {
00091             FFTDataServer::releaseInstance(m_server);
00092             m_server = 0;
00093         }
00094         FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
00095     }
00096 }
00097 
00098 FFTDataServer *
00099 FFTModel::getServer(const DenseTimeValueModel *model,
00100                     int channel,
00101                     WindowType windowType,
00102                     size_t windowSize,
00103                     size_t windowIncrement,
00104                     size_t fftSize,
00105                     bool polar,
00106                     StorageAdviser::Criteria criteria,
00107                     size_t fillFromColumn)
00108 {
00109     // Obviously, an FFT model of channel C (where C != -1) of an
00110     // aggregate model is the same as the FFT model of the appropriate
00111     // channel of whichever model that aggregate channel is drawn
00112     // from.  We should use that model here, in case we already have
00113     // the data for it or will be wanting the same data again later.
00114 
00115     // If the channel is -1 (i.e. mixture of all channels), then we
00116     // can't do this shortcut unless the aggregate model only has one
00117     // channel or contains exactly all of the channels of a single
00118     // other model.  That isn't very likely -- if it were the case,
00119     // why would we be using an aggregate model?
00120 
00121     if (channel >= 0) {
00122 
00123         const AggregateWaveModel *aggregate =
00124             dynamic_cast<const AggregateWaveModel *>(model);
00125 
00126         if (aggregate && channel < aggregate->getComponentCount()) {
00127 
00128             AggregateWaveModel::ModelChannelSpec spec =
00129                 aggregate->getComponent(channel);
00130 
00131             return getServer(spec.model,
00132                              spec.channel,
00133                              windowType,
00134                              windowSize,
00135                              windowIncrement,
00136                              fftSize,
00137                              polar,
00138                              criteria,
00139                              fillFromColumn);
00140         }
00141     }
00142 
00143     // The normal case
00144 
00145     return FFTDataServer::getFuzzyInstance(model,
00146                                            channel,
00147                                            windowType,
00148                                            windowSize,
00149                                            windowIncrement,
00150                                            fftSize,
00151                                            polar,
00152                                            criteria,
00153                                            fillFromColumn);
00154 }
00155 
00156 size_t
00157 FFTModel::getSampleRate() const
00158 {
00159     return isOK() ? m_server->getModel()->getSampleRate() : 0;
00160 }
00161 
00162 void
00163 FFTModel::getColumn(size_t x, Column &result) const
00164 {
00165     Profiler profiler("FFTModel::getColumn", false);
00166 
00167     result.clear();
00168     size_t height(getHeight());
00169     for (size_t y = 0; y < height; ++y) {
00170         result.push_back(const_cast<FFTModel *>(this)->getMagnitudeAt(x, y));
00171     }
00172 }
00173 
00174 QString
00175 FFTModel::getBinName(size_t n) const
00176 {
00177     size_t sr = getSampleRate();
00178     if (!sr) return "";
00179     QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
00180     return name;
00181 }
00182 
00183 bool
00184 FFTModel::estimateStableFrequency(size_t x, size_t y, float &frequency)
00185 {
00186     if (!isOK()) return false;
00187 
00188     size_t sampleRate = m_server->getModel()->getSampleRate();
00189 
00190     size_t fftSize = m_server->getFFTSize() >> m_yshift;
00191     frequency = (float(y) * sampleRate) / fftSize;
00192 
00193     if (x+1 >= getWidth()) return false;
00194 
00195     // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
00196     // At hopsize h and sample rate sr, one hop happens in h/sr sec.
00197     // At window size w, for bin b, f is b*sr/w.
00198     // thus 2pi phase shift happens in w/(b*sr) sec.
00199     // We need to know what phase shift we expect from h/sr sec.
00200     // -> 2pi * ((h/sr) / (w/(b*sr)))
00201     //  = 2pi * ((h * b * sr) / (w * sr))
00202     //  = 2pi * (h * b) / w.
00203 
00204     float oldPhase = getPhaseAt(x, y);
00205     float newPhase = getPhaseAt(x+1, y);
00206 
00207     size_t incr = getResolution();
00208 
00209     float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
00210 
00211     float phaseError = princargf(newPhase - expectedPhase);
00212 
00213 //    bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
00214 
00215     // The new frequency estimate based on the phase error resulting
00216     // from assuming the "native" frequency of this bin
00217 
00218     frequency =
00219         (sampleRate * (expectedPhase + phaseError - oldPhase)) /
00220         (2 * M_PI * incr);
00221 
00222     return true;
00223 }
00224 
00225 FFTModel::PeakLocationSet
00226 FFTModel::getPeaks(PeakPickType type, size_t x, size_t ymin, size_t ymax)
00227 {
00228     FFTModel::PeakLocationSet peaks;
00229     if (!isOK()) return peaks;
00230 
00231     if (ymax == 0 || ymax > getHeight() - 1) {
00232         ymax = getHeight() - 1;
00233     }
00234 
00235     Column values;
00236 
00237     if (type == AllPeaks) {
00238         for (size_t y = ymin; y <= ymax; ++y) {
00239             values.push_back(getMagnitudeAt(x, y));
00240         }
00241         size_t i = 0;
00242         for (size_t bin = ymin; bin <= ymax; ++bin) {
00243             if ((i == 0 || values[i] > values[i-1]) &&
00244                 (i == values.size()-1 || values[i] >= values[i+1])) {
00245                 peaks.insert(bin);
00246             }
00247             ++i;
00248         }
00249         return peaks;
00250     }
00251 
00252     getColumn(x, values);
00253 
00254     // For peak picking we use a moving median window, picking the
00255     // highest value within each continuous region of values that
00256     // exceed the median.  For pitch adaptivity, we adjust the window
00257     // size to a roughly constant pitch range (about four tones).
00258 
00259     size_t sampleRate = getSampleRate();
00260 
00261     std::deque<float> window;
00262     std::vector<size_t> inrange;
00263     float dist = 0.5;
00264     size_t medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
00265     size_t halfWin = medianWinSize/2;
00266 
00267     size_t binmin;
00268     if (ymin > halfWin) binmin = ymin - halfWin;
00269     else binmin = 0;
00270 
00271     size_t binmax;
00272     if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
00273     else binmax = values.size()-1;
00274 
00275     for (size_t bin = binmin; bin <= binmax; ++bin) {
00276 
00277         float value = values[bin];
00278 
00279         window.push_back(value);
00280 
00281         // so-called median will actually be the dist*100'th percentile
00282         medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
00283         halfWin = medianWinSize/2;
00284 
00285         while (window.size() > medianWinSize) window.pop_front();
00286 
00287         if (type == MajorPitchAdaptivePeaks) {
00288             if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
00289             else binmax = values.size()-1;
00290         }
00291 
00292         std::deque<float> sorted(window);
00293         std::sort(sorted.begin(), sorted.end());
00294         float median = sorted[int(sorted.size() * dist)];
00295 
00296         if (value > median) {
00297             inrange.push_back(bin);
00298         }
00299 
00300         if (value <= median || bin+1 == values.size()) {
00301             size_t peakbin = 0;
00302             float peakval = 0.f;
00303             if (!inrange.empty()) {
00304                 for (size_t i = 0; i < inrange.size(); ++i) {
00305                     if (i == 0 || values[inrange[i]] > peakval) {
00306                         peakval = values[inrange[i]];
00307                         peakbin = inrange[i];
00308                     }
00309                 }
00310                 inrange.clear();
00311                 if (peakbin >= ymin && peakbin <= ymax) {
00312                     peaks.insert(peakbin);
00313                 }
00314             }
00315         }
00316     }
00317 
00318     return peaks;
00319 }
00320 
00321 size_t
00322 FFTModel::getPeakPickWindowSize(PeakPickType type, size_t sampleRate,
00323                                 size_t bin, float &percentile) const
00324 {
00325     percentile = 0.5;
00326     if (type == MajorPeaks) return 10;
00327     if (bin == 0) return 3;
00328 
00329     size_t fftSize = m_server->getFFTSize() >> m_yshift;
00330     float binfreq = (sampleRate * bin) / fftSize;
00331     float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
00332 
00333     int hibin = lrintf((hifreq * fftSize) / sampleRate);
00334     int medianWinSize = hibin - bin;
00335     if (medianWinSize < 3) medianWinSize = 3;
00336 
00337     percentile = 0.5 + (binfreq / sampleRate);
00338 
00339     return medianWinSize;
00340 }
00341 
00342 FFTModel::PeakSet
00343 FFTModel::getPeakFrequencies(PeakPickType type, size_t x,
00344                              size_t ymin, size_t ymax)
00345 {
00346     PeakSet peaks;
00347     if (!isOK()) return peaks;
00348     PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
00349 
00350     size_t sampleRate = getSampleRate();
00351     size_t fftSize = m_server->getFFTSize() >> m_yshift;
00352     size_t incr = getResolution();
00353 
00354     // This duplicates some of the work of estimateStableFrequency to
00355     // allow us to retrieve the phases in two separate vertical
00356     // columns, instead of jumping back and forth between columns x and
00357     // x+1, which may be significantly slower if re-seeking is needed
00358 
00359     std::vector<float> phases;
00360     for (PeakLocationSet::iterator i = locations.begin();
00361          i != locations.end(); ++i) {
00362         phases.push_back(getPhaseAt(x, *i));
00363     }
00364 
00365     size_t phaseIndex = 0;
00366     for (PeakLocationSet::iterator i = locations.begin();
00367          i != locations.end(); ++i) {
00368         float oldPhase = phases[phaseIndex];
00369         float newPhase = getPhaseAt(x+1, *i);
00370         float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
00371         float phaseError = princargf(newPhase - expectedPhase);
00372         float frequency =
00373             (sampleRate * (expectedPhase + phaseError - oldPhase))
00374             / (2 * M_PI * incr);
00375 //        bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
00376 //        if (stable)
00377         peaks[*i] = frequency;
00378         ++phaseIndex;
00379     }
00380 
00381     return peaks;
00382 }
00383 
00384 Model *
00385 FFTModel::clone() const
00386 {
00387     return new FFTModel(*this);
00388 }
00389 
00390 FFTModel::FFTModel(const FFTModel &model) :
00391     DenseThreeDimensionalModel(),
00392     m_server(model.m_server),
00393     m_xshift(model.m_xshift),
00394     m_yshift(model.m_yshift)
00395 {
00396     FFTDataServer::claimInstance(m_server);
00397 }
00398 

Generated on Wed Feb 20 15:45:25 2008 for SonicVisualiser by  doxygen 1.5.1