00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "FeatureExtractionModelTransformer.h"
00017
00018 #include "plugin/FeatureExtractionPluginFactory.h"
00019 #include "plugin/PluginXml.h"
00020 #include "vamp-sdk/Plugin.h"
00021
00022 #include "data/model/Model.h"
00023 #include "base/Window.h"
00024 #include "data/model/SparseOneDimensionalModel.h"
00025 #include "data/model/SparseTimeValueModel.h"
00026 #include "data/model/EditableDenseThreeDimensionalModel.h"
00027 #include "data/model/DenseTimeValueModel.h"
00028 #include "data/model/NoteModel.h"
00029 #include "data/model/FFTModel.h"
00030 #include "data/model/WaveFileModel.h"
00031
00032 #include "TransformFactory.h"
00033
00034 #include <QMessageBox>
00035
00036 #include <iostream>
00037
00038 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in,
00039 const Transform &transform) :
00040 ModelTransformer(in, transform),
00041 m_plugin(0),
00042 m_descriptor(0),
00043 m_outputFeatureNo(0)
00044 {
00045
00046
00047 QString pluginId = transform.getPluginIdentifier();
00048
00049 FeatureExtractionPluginFactory *factory =
00050 FeatureExtractionPluginFactory::instanceFor(pluginId);
00051
00052 if (!factory) {
00053 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
00054 return;
00055 }
00056
00057 DenseTimeValueModel *input = getConformingInput();
00058 if (!input) {
00059 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
00060 return;
00061 }
00062
00063 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
00064 if (!m_plugin) {
00065 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
00066 return;
00067 }
00068
00069 TransformFactory::getInstance()->makeContextConsistentWithPlugin
00070 (m_transform, m_plugin);
00071
00072 TransformFactory::getInstance()->setPluginParameters
00073 (m_transform, m_plugin);
00074
00075 size_t channelCount = input->getChannelCount();
00076 if (m_plugin->getMaxChannelCount() < channelCount) {
00077 channelCount = 1;
00078 }
00079 if (m_plugin->getMinChannelCount() > channelCount) {
00080 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
00081 .arg(pluginId)
00082 .arg(m_plugin->getMinChannelCount())
00083 .arg(m_plugin->getMaxChannelCount())
00084 .arg(input->getChannelCount());
00085 return;
00086 }
00087
00088 std::cerr << "Initialising feature extraction plugin with channels = "
00089 << channelCount << ", step = " << m_transform.getStepSize()
00090 << ", block = " << m_transform.getBlockSize() << std::endl;
00091
00092 if (!m_plugin->initialise(channelCount,
00093 m_transform.getStepSize(),
00094 m_transform.getBlockSize())) {
00095
00096 size_t pstep = m_transform.getStepSize();
00097 size_t pblock = m_transform.getBlockSize();
00098
00099 m_transform.setStepSize(0);
00100 m_transform.setBlockSize(0);
00101 TransformFactory::getInstance()->makeContextConsistentWithPlugin
00102 (m_transform, m_plugin);
00103
00104 if (m_transform.getStepSize() != pstep ||
00105 m_transform.getBlockSize() != pblock) {
00106
00107 if (!m_plugin->initialise(channelCount,
00108 m_transform.getStepSize(),
00109 m_transform.getBlockSize())) {
00110
00111 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
00112 return;
00113
00114 } else {
00115
00116 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
00117 .arg(pluginId)
00118 .arg(pstep)
00119 .arg(pblock)
00120 .arg(m_transform.getStepSize())
00121 .arg(m_transform.getBlockSize());
00122 }
00123
00124 } else {
00125
00126 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
00127 return;
00128 }
00129 }
00130
00131 if (m_transform.getPluginVersion() != "") {
00132 QString pv = QString("%1").arg(m_plugin->getPluginVersion());
00133 if (pv != m_transform.getPluginVersion()) {
00134 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
00135 .arg(m_transform.getPluginVersion())
00136 .arg(pluginId)
00137 .arg(pv);
00138 if (m_message != "") {
00139 m_message = QString("%1; %2").arg(vm).arg(m_message);
00140 } else {
00141 m_message = vm;
00142 }
00143 }
00144 }
00145
00146 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
00147
00148 if (outputs.empty()) {
00149 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
00150 return;
00151 }
00152
00153 for (size_t i = 0; i < outputs.size(); ++i) {
00154 if (m_transform.getOutput() == "" ||
00155 outputs[i].identifier == m_transform.getOutput().toStdString()) {
00156 m_outputFeatureNo = i;
00157 m_descriptor = new Vamp::Plugin::OutputDescriptor
00158 (outputs[i]);
00159 break;
00160 }
00161 }
00162
00163 if (!m_descriptor) {
00164 m_message = tr("Plugin \"%1\" has no output named \"%2\"")
00165 .arg(pluginId)
00166 .arg(m_transform.getOutput());
00167 return;
00168 }
00169
00170
00171
00172
00173 int binCount = 1;
00174 float minValue = 0.0, maxValue = 0.0;
00175 bool haveExtents = false;
00176
00177 if (m_descriptor->hasFixedBinCount) {
00178 binCount = m_descriptor->binCount;
00179 }
00180
00181
00182
00183
00184 if (binCount > 0 && m_descriptor->hasKnownExtents) {
00185 minValue = m_descriptor->minValue;
00186 maxValue = m_descriptor->maxValue;
00187 haveExtents = true;
00188 }
00189
00190 size_t modelRate = input->getSampleRate();
00191 size_t modelResolution = 1;
00192
00193 switch (m_descriptor->sampleType) {
00194
00195 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
00196 if (m_descriptor->sampleRate != 0.0) {
00197 modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
00198 }
00199 break;
00200
00201 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
00202 modelResolution = m_transform.getStepSize();
00203 break;
00204
00205 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
00206 modelRate = size_t(m_descriptor->sampleRate + 0.001);
00207 break;
00208 }
00209
00210 if (binCount == 0) {
00211
00212 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
00213 false);
00214
00215 } else if (binCount == 1) {
00216
00217 SparseTimeValueModel *model;
00218 if (haveExtents) {
00219 model = new SparseTimeValueModel
00220 (modelRate, modelResolution, minValue, maxValue, false);
00221 } else {
00222 model = new SparseTimeValueModel
00223 (modelRate, modelResolution, false);
00224 }
00225 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
00226
00227 m_output = model;
00228
00229 } else if (m_descriptor->sampleType ==
00230 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
00231
00232
00233
00234
00235
00236
00237
00239
00240 NoteModel *model;
00241 if (haveExtents) {
00242 model = new NoteModel
00243 (modelRate, modelResolution, minValue, maxValue, false);
00244 } else {
00245 model = new NoteModel
00246 (modelRate, modelResolution, false);
00247 }
00248 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
00249
00250 m_output = model;
00251
00252 } else {
00253
00254 EditableDenseThreeDimensionalModel *model =
00255 new EditableDenseThreeDimensionalModel
00256 (modelRate, modelResolution, binCount, false);
00257
00258 if (!m_descriptor->binNames.empty()) {
00259 std::vector<QString> names;
00260 for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
00261 names.push_back(m_descriptor->binNames[i].c_str());
00262 }
00263 model->setBinNames(names);
00264 }
00265
00266 m_output = model;
00267 }
00268
00269 if (m_output) m_output->setSourceModel(input);
00270 }
00271
00272 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()
00273 {
00274 std::cerr << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << std::endl;
00275 delete m_plugin;
00276 delete m_descriptor;
00277 }
00278
00279 DenseTimeValueModel *
00280 FeatureExtractionModelTransformer::getConformingInput()
00281 {
00282 DenseTimeValueModel *dtvm =
00283 dynamic_cast<DenseTimeValueModel *>(getInputModel());
00284 if (!dtvm) {
00285 std::cerr << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
00286 }
00287 return dtvm;
00288 }
00289
00290 void
00291 FeatureExtractionModelTransformer::run()
00292 {
00293 DenseTimeValueModel *input = getConformingInput();
00294 if (!input) return;
00295
00296 if (!m_output) return;
00297
00298 while (!input->isReady()) {
00299
00300
00301
00302
00303
00304
00305
00306 std::cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << std::endl;
00307 sleep(1);
00308 }
00309
00310 size_t sampleRate = input->getSampleRate();
00311
00312 size_t channelCount = input->getChannelCount();
00313 if (m_plugin->getMaxChannelCount() < channelCount) {
00314 channelCount = 1;
00315 }
00316
00317 float **buffers = new float*[channelCount];
00318 for (size_t ch = 0; ch < channelCount; ++ch) {
00319 buffers[ch] = new float[m_transform.getBlockSize() + 2];
00320 }
00321
00322 size_t stepSize = m_transform.getStepSize();
00323 size_t blockSize = m_transform.getBlockSize();
00324
00325 bool frequencyDomain = (m_plugin->getInputDomain() ==
00326 Vamp::Plugin::FrequencyDomain);
00327 std::vector<FFTModel *> fftModels;
00328
00329 if (frequencyDomain) {
00330 for (size_t ch = 0; ch < channelCount; ++ch) {
00331 FFTModel *model = new FFTModel
00332 (getConformingInput(),
00333 channelCount == 1 ? m_input.getChannel() : ch,
00334 m_transform.getWindowType(),
00335 blockSize,
00336 stepSize,
00337 blockSize,
00338 false,
00339 StorageAdviser::PrecisionCritical);
00340 if (!model->isOK()) {
00341 QMessageBox::critical
00342 (0, tr("FFT cache failed"),
00343 tr("Failed to create the FFT model for this transform.\n"
00344 "There may be insufficient memory or disc space to continue."));
00345 delete model;
00346 setCompletion(100);
00347 return;
00348 }
00349 model->resume();
00350 fftModels.push_back(model);
00351 }
00352 }
00353
00354 long startFrame = m_input.getModel()->getStartFrame();
00355 long endFrame = m_input.getModel()->getEndFrame();
00356
00357 RealTime contextStartRT = m_transform.getStartTime();
00358 RealTime contextDurationRT = m_transform.getDuration();
00359
00360 long contextStart =
00361 RealTime::realTime2Frame(contextStartRT, sampleRate);
00362
00363 long contextDuration =
00364 RealTime::realTime2Frame(contextDurationRT, sampleRate);
00365
00366 if (contextStart == 0 || contextStart < startFrame) {
00367 contextStart = startFrame;
00368 }
00369
00370 if (contextDuration == 0) {
00371 contextDuration = endFrame - contextStart;
00372 }
00373 if (contextStart + contextDuration > endFrame) {
00374 contextDuration = endFrame - contextStart;
00375 }
00376
00377 long blockFrame = contextStart;
00378
00379 long prevCompletion = 0;
00380
00381 setCompletion(0);
00382
00383 while (!m_abandoned) {
00384
00385 if (frequencyDomain) {
00386 if (blockFrame - int(blockSize)/2 >
00387 contextStart + contextDuration) break;
00388 } else {
00389 if (blockFrame >=
00390 contextStart + contextDuration) break;
00391 }
00392
00393
00394
00395
00396
00397 long completion =
00398 (((blockFrame - contextStart) / stepSize) * 99) /
00399 (contextDuration / stepSize);
00400
00401
00402
00403 if (frequencyDomain) {
00404 for (size_t ch = 0; ch < channelCount; ++ch) {
00405 int column = (blockFrame - startFrame) / stepSize;
00406 for (size_t i = 0; i <= blockSize/2; ++i) {
00407 fftModels[ch]->getValuesAt
00408 (column, i, buffers[ch][i*2], buffers[ch][i*2+1]);
00409 }
00410 }
00411 } else {
00412 getFrames(channelCount, blockFrame, blockSize, buffers);
00413 }
00414
00415 Vamp::Plugin::FeatureSet features = m_plugin->process
00416 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
00417
00418 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
00419 Vamp::Plugin::Feature feature =
00420 features[m_outputFeatureNo][fi];
00421 addFeature(blockFrame, feature);
00422 }
00423
00424 if (blockFrame == contextStart || completion > prevCompletion) {
00425 setCompletion(completion);
00426 prevCompletion = completion;
00427 }
00428
00429 blockFrame += stepSize;
00430 }
00431
00432 if (m_abandoned) return;
00433
00434 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
00435
00436 for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
00437 Vamp::Plugin::Feature feature =
00438 features[m_outputFeatureNo][fi];
00439 addFeature(blockFrame, feature);
00440 }
00441
00442 if (frequencyDomain) {
00443 for (size_t ch = 0; ch < channelCount; ++ch) {
00444 delete fftModels[ch];
00445 }
00446 }
00447
00448 setCompletion(100);
00449 }
00450
00451 void
00452 FeatureExtractionModelTransformer::getFrames(int channelCount,
00453 long startFrame, long size,
00454 float **buffers)
00455 {
00456 long offset = 0;
00457
00458 if (startFrame < 0) {
00459 for (int c = 0; c < channelCount; ++c) {
00460 for (int i = 0; i < size && startFrame + i < 0; ++i) {
00461 buffers[c][i] = 0.0f;
00462 }
00463 }
00464 offset = -startFrame;
00465 size -= offset;
00466 if (size <= 0) return;
00467 startFrame = 0;
00468 }
00469
00470 DenseTimeValueModel *input = getConformingInput();
00471 if (!input) return;
00472
00473 long got = 0;
00474
00475 if (channelCount == 1) {
00476
00477 got = input->getData(m_input.getChannel(), startFrame, size,
00478 buffers[0] + offset);
00479
00480 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
00481
00482 float cc = float(input->getChannelCount());
00483 for (long i = 0; i < size; ++i) {
00484 buffers[0][i + offset] /= cc;
00485 }
00486 }
00487
00488 } else {
00489
00490 float **writebuf = buffers;
00491 if (offset > 0) {
00492 writebuf = new float *[channelCount];
00493 for (int i = 0; i < channelCount; ++i) {
00494 writebuf[i] = buffers[i] + offset;
00495 }
00496 }
00497
00498 got = input->getData(0, channelCount-1, startFrame, size, writebuf);
00499
00500 if (writebuf != buffers) delete[] writebuf;
00501 }
00502
00503 while (got < size) {
00504 for (int c = 0; c < channelCount; ++c) {
00505 buffers[c][got + offset] = 0.0;
00506 }
00507 ++got;
00508 }
00509 }
00510
00511 void
00512 FeatureExtractionModelTransformer::addFeature(size_t blockFrame,
00513 const Vamp::Plugin::Feature &feature)
00514 {
00515 size_t inputRate = m_input.getModel()->getSampleRate();
00516
00517
00518
00519
00520 int binCount = 1;
00521 if (m_descriptor->hasFixedBinCount) {
00522 binCount = m_descriptor->binCount;
00523 }
00524
00525 size_t frame = blockFrame;
00526
00527 if (m_descriptor->sampleType ==
00528 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
00529
00530 if (!feature.hasTimestamp) {
00531 std::cerr
00532 << "WARNING: FeatureExtractionModelTransformer::addFeature: "
00533 << "Feature has variable sample rate but no timestamp!"
00534 << std::endl;
00535 return;
00536 } else {
00537 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
00538 }
00539
00540 } else if (m_descriptor->sampleType ==
00541 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
00542
00543 if (feature.hasTimestamp) {
00545 frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
00546 lrintf(m_descriptor->sampleRate));
00547 } else {
00548 frame = m_output->getEndFrame();
00549 }
00550 }
00551
00552 if (binCount == 0) {
00553
00554 SparseOneDimensionalModel *model =
00555 getConformingOutput<SparseOneDimensionalModel>();
00556 if (!model) return;
00557
00558 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
00559
00560 } else if (binCount == 1) {
00561
00562 float value = 0.0;
00563 if (feature.values.size() > 0) value = feature.values[0];
00564
00565 SparseTimeValueModel *model =
00566 getConformingOutput<SparseTimeValueModel>();
00567 if (!model) return;
00568
00569 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
00570
00571
00572 } else if (m_descriptor->sampleType ==
00573 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
00574
00575 float pitch = 0.0;
00576 if (feature.values.size() > 0) pitch = feature.values[0];
00577
00578 float duration = 1;
00579 if (feature.values.size() > 1) duration = feature.values[1];
00580
00581 float velocity = 100;
00582 if (feature.values.size() > 2) velocity = feature.values[2];
00583 if (velocity < 0) velocity = 127;
00584 if (velocity > 127) velocity = 127;
00585
00586 NoteModel *model = getConformingOutput<NoteModel>();
00587 if (!model) return;
00588
00589 model->addPoint(NoteModel::Point(frame, pitch,
00590 lrintf(duration),
00591 velocity / 127.f,
00592 feature.label.c_str()));
00593
00594 } else {
00595
00596 DenseThreeDimensionalModel::Column values = feature.values;
00597
00598 EditableDenseThreeDimensionalModel *model =
00599 getConformingOutput<EditableDenseThreeDimensionalModel>();
00600 if (!model) return;
00601
00602 model->setColumn(frame / model->getResolution(), values);
00603 }
00604 }
00605
00606 void
00607 FeatureExtractionModelTransformer::setCompletion(int completion)
00608 {
00609 int binCount = 1;
00610 if (m_descriptor->hasFixedBinCount) {
00611 binCount = m_descriptor->binCount;
00612 }
00613
00614
00615
00616
00617 if (binCount == 0) {
00618
00619 SparseOneDimensionalModel *model =
00620 getConformingOutput<SparseOneDimensionalModel>();
00621 if (!model) return;
00622 model->setCompletion(completion, true);
00623
00624 } else if (binCount == 1) {
00625
00626 SparseTimeValueModel *model =
00627 getConformingOutput<SparseTimeValueModel>();
00628 if (!model) return;
00629 model->setCompletion(completion, true);
00630
00631 } else if (m_descriptor->sampleType ==
00632 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
00633
00634 NoteModel *model =
00635 getConformingOutput<NoteModel>();
00636 if (!model) return;
00637 model->setCompletion(completion, true);
00638
00639 } else {
00640
00641 EditableDenseThreeDimensionalModel *model =
00642 getConformingOutput<EditableDenseThreeDimensionalModel>();
00643 if (!model) return;
00644 model->setCompletion(completion, true);
00645 }
00646 }
00647