00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "CSVFileReader.h"
00017
00018 #include "model/Model.h"
00019 #include "base/RealTime.h"
00020 #include "model/SparseOneDimensionalModel.h"
00021 #include "model/SparseTimeValueModel.h"
00022 #include "model/EditableDenseThreeDimensionalModel.h"
00023 #include "DataFileReaderFactory.h"
00024
00025 #include <QFile>
00026 #include <QString>
00027 #include <QRegExp>
00028 #include <QStringList>
00029 #include <QTextStream>
00030 #include <QFrame>
00031 #include <QGridLayout>
00032 #include <QPushButton>
00033 #include <QHBoxLayout>
00034 #include <QVBoxLayout>
00035 #include <QTableWidget>
00036 #include <QComboBox>
00037 #include <QLabel>
00038
00039 #include <iostream>
00040
00041 CSVFileReader::CSVFileReader(QString path, size_t mainModelSampleRate) :
00042 m_file(0),
00043 m_mainModelSampleRate(mainModelSampleRate)
00044 {
00045 m_file = new QFile(path);
00046 bool good = false;
00047
00048 if (!m_file->exists()) {
00049 m_error = QFile::tr("File \"%1\" does not exist").arg(path);
00050 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
00051 m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
00052 } else {
00053 good = true;
00054 }
00055
00056 if (!good) {
00057 delete m_file;
00058 m_file = 0;
00059 }
00060 }
00061
00062 CSVFileReader::~CSVFileReader()
00063 {
00064 std::cerr << "CSVFileReader::~CSVFileReader: file is " << m_file << std::endl;
00065
00066 if (m_file) {
00067 std::cerr << "CSVFileReader::CSVFileReader: Closing file" << std::endl;
00068 m_file->close();
00069 }
00070 delete m_file;
00071 }
00072
00073 bool
00074 CSVFileReader::isOK() const
00075 {
00076 return (m_file != 0);
00077 }
00078
00079 QString
00080 CSVFileReader::getError() const
00081 {
00082 return m_error;
00083 }
00084
00085 Model *
00086 CSVFileReader::load() const
00087 {
00088 if (!m_file) return 0;
00089
00090 CSVFormatDialog *dialog = new CSVFormatDialog
00091 (0, m_file, m_mainModelSampleRate);
00092
00093 if (dialog->exec() == QDialog::Rejected) {
00094 delete dialog;
00095 throw DataFileReaderFactory::ImportCancelled;
00096 }
00097
00098 CSVFormatDialog::ModelType modelType = dialog->getModelType();
00099 CSVFormatDialog::TimingType timingType = dialog->getTimingType();
00100 CSVFormatDialog::TimeUnits timeUnits = dialog->getTimeUnits();
00101 QString separator = dialog->getSeparator();
00102 size_t sampleRate = dialog->getSampleRate();
00103 size_t windowSize = dialog->getWindowSize();
00104
00105 delete dialog;
00106
00107 if (timingType == CSVFormatDialog::ExplicitTiming) {
00108 windowSize = 1;
00109 if (timeUnits == CSVFormatDialog::TimeSeconds) {
00110 sampleRate = m_mainModelSampleRate;
00111 }
00112 }
00113
00114 SparseOneDimensionalModel *model1 = 0;
00115 SparseTimeValueModel *model2 = 0;
00116 EditableDenseThreeDimensionalModel *model3 = 0;
00117 Model *model = 0;
00118
00119 QTextStream in(m_file);
00120 in.seek(0);
00121
00122 unsigned int warnings = 0, warnLimit = 10;
00123 unsigned int lineno = 0;
00124
00125 float min = 0.0, max = 0.0;
00126
00127 size_t frameNo = 0;
00128
00129 while (!in.atEnd()) {
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142 QString chunk = in.readLine();
00143 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
00144
00145 for (size_t li = 0; li < lines.size(); ++li) {
00146
00147 QString line = lines[li];
00148
00149 if (line.startsWith("#")) continue;
00150
00151 QStringList list = line.split(separator, QString::KeepEmptyParts);
00152
00153 if (!model) {
00154
00155 switch (modelType) {
00156
00157 case CSVFormatDialog::OneDimensionalModel:
00158 model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
00159 model = model1;
00160 break;
00161
00162 case CSVFormatDialog::TwoDimensionalModel:
00163 model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
00164 model = model2;
00165 break;
00166
00167 case CSVFormatDialog::ThreeDimensionalModel:
00168 model3 = new EditableDenseThreeDimensionalModel(sampleRate,
00169 windowSize,
00170 list.size());
00171 model = model3;
00172 break;
00173 }
00174 }
00175
00176 QStringList tidyList;
00177 QRegExp nonNumericRx("[^0-9.,+-]");
00178
00179 for (int i = 0; i < list.size(); ++i) {
00180
00181 QString s(list[i].trimmed());
00182
00183 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
00184 s = s.mid(1, s.length() - 2);
00185 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
00186 s = s.mid(1, s.length() - 2);
00187 }
00188
00189 if (i == 0 && timingType == CSVFormatDialog::ExplicitTiming) {
00190
00191 bool ok = false;
00192 QString numeric = s;
00193 numeric.remove(nonNumericRx);
00194
00195 if (timeUnits == CSVFormatDialog::TimeSeconds) {
00196
00197 double time = numeric.toDouble(&ok);
00198 frameNo = int(time * sampleRate + 0.00001);
00199
00200 } else {
00201
00202 frameNo = numeric.toInt(&ok);
00203
00204 if (timeUnits == CSVFormatDialog::TimeWindows) {
00205 frameNo *= windowSize;
00206 }
00207 }
00208
00209 if (!ok) {
00210 if (warnings < warnLimit) {
00211 std::cerr << "WARNING: CSVFileReader::load: "
00212 << "Bad time format (\"" << s.toStdString()
00213 << "\") in data line "
00214 << lineno << ":" << std::endl;
00215 std::cerr << line.toStdString() << std::endl;
00216 } else if (warnings == warnLimit) {
00217 std::cerr << "WARNING: Too many warnings" << std::endl;
00218 }
00219 ++warnings;
00220 }
00221 } else {
00222 tidyList.push_back(s);
00223 }
00224 }
00225
00226 if (modelType == CSVFormatDialog::OneDimensionalModel) {
00227
00228 SparseOneDimensionalModel::Point point
00229 (frameNo,
00230 tidyList.size() > 0 ? tidyList[tidyList.size()-1] :
00231 QString("%1").arg(lineno));
00232
00233 model1->addPoint(point);
00234
00235 } else if (modelType == CSVFormatDialog::TwoDimensionalModel) {
00236
00237 SparseTimeValueModel::Point point
00238 (frameNo,
00239 tidyList.size() > 0 ? tidyList[0].toFloat() : 0.0,
00240 tidyList.size() > 1 ? tidyList[1] : QString("%1").arg(lineno));
00241
00242 model2->addPoint(point);
00243
00244 } else if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
00245
00246 DenseThreeDimensionalModel::Column values;
00247
00248 for (int i = 0; i < tidyList.size(); ++i) {
00249
00250 bool ok = false;
00251 float value = list[i].toFloat(&ok);
00252 values.push_back(value);
00253
00254 if ((lineno == 0 && i == 0) || value < min) min = value;
00255 if ((lineno == 0 && i == 0) || value > max) max = value;
00256
00257 if (!ok) {
00258 if (warnings < warnLimit) {
00259 std::cerr << "WARNING: CSVFileReader::load: "
00260 << "Non-numeric value in data line " << lineno
00261 << ":" << std::endl;
00262 std::cerr << line.toStdString() << std::endl;
00263 ++warnings;
00264 } else if (warnings == warnLimit) {
00265 std::cerr << "WARNING: Too many warnings" << std::endl;
00266 }
00267 }
00268 }
00269
00270 std::cerr << "Setting bin values for count " << lineno << ", frame "
00271 << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << std::endl;
00272
00273 model3->setColumn(frameNo / model3->getResolution(), values);
00274 }
00275
00276 ++lineno;
00277 if (timingType == CSVFormatDialog::ImplicitTiming ||
00278 list.size() == 0) {
00279 frameNo += windowSize;
00280 }
00281 }
00282 }
00283
00284 if (modelType == CSVFormatDialog::ThreeDimensionalModel) {
00285 model3->setMinimumLevel(min);
00286 model3->setMaximumLevel(max);
00287 }
00288
00289 return model;
00290 }
00291
00292
00293 CSVFormatDialog::CSVFormatDialog(QWidget *parent, QFile *file,
00294 size_t defaultSampleRate) :
00295 QDialog(parent),
00296 m_modelType(OneDimensionalModel),
00297 m_timingType(ExplicitTiming),
00298 m_timeUnits(TimeAudioFrames),
00299 m_separator("")
00300 {
00301 setModal(true);
00302 setWindowTitle(tr("Select Data Format"));
00303
00304 (void)guessFormat(file);
00305
00306 QGridLayout *layout = new QGridLayout;
00307
00308 layout->addWidget(new QLabel(tr("<b>Select Data Format</b><p>Please select the correct data format for this file.")),
00309 0, 0, 1, 4);
00310
00311 layout->addWidget(new QLabel(tr("Each row specifies:")), 1, 0);
00312
00313 m_modelTypeCombo = new QComboBox;
00314 m_modelTypeCombo->addItem(tr("A point in time"));
00315 m_modelTypeCombo->addItem(tr("A value at a time"));
00316 m_modelTypeCombo->addItem(tr("A set of values"));
00317 layout->addWidget(m_modelTypeCombo, 1, 1, 1, 2);
00318 connect(m_modelTypeCombo, SIGNAL(activated(int)),
00319 this, SLOT(modelTypeChanged(int)));
00320 m_modelTypeCombo->setCurrentIndex(int(m_modelType));
00321
00322 layout->addWidget(new QLabel(tr("The first column contains:")), 2, 0);
00323
00324 m_timingTypeCombo = new QComboBox;
00325 m_timingTypeCombo->addItem(tr("Time, in seconds"));
00326 m_timingTypeCombo->addItem(tr("Time, in audio sample frames"));
00327 m_timingTypeCombo->addItem(tr("Data (rows are consecutive in time)"));
00328 layout->addWidget(m_timingTypeCombo, 2, 1, 1, 2);
00329 connect(m_timingTypeCombo, SIGNAL(activated(int)),
00330 this, SLOT(timingTypeChanged(int)));
00331 m_timingTypeCombo->setCurrentIndex(m_timingType == ExplicitTiming ?
00332 m_timeUnits == TimeSeconds ? 0 : 1 : 2);
00333
00334 m_sampleRateLabel = new QLabel(tr("Audio sample rate (Hz):"));
00335 layout->addWidget(m_sampleRateLabel, 3, 0);
00336
00337 size_t sampleRates[] = {
00338 8000, 11025, 12000, 22050, 24000, 32000,
00339 44100, 48000, 88200, 96000, 176400, 192000
00340 };
00341
00342 m_sampleRateCombo = new QComboBox;
00343 m_sampleRate = defaultSampleRate;
00344 for (size_t i = 0; i < sizeof(sampleRates) / sizeof(sampleRates[0]); ++i) {
00345 m_sampleRateCombo->addItem(QString("%1").arg(sampleRates[i]));
00346 if (sampleRates[i] == m_sampleRate) m_sampleRateCombo->setCurrentIndex(i);
00347 }
00348 m_sampleRateCombo->setEditable(true);
00349
00350 layout->addWidget(m_sampleRateCombo, 3, 1);
00351 connect(m_sampleRateCombo, SIGNAL(activated(QString)),
00352 this, SLOT(sampleRateChanged(QString)));
00353 connect(m_sampleRateCombo, SIGNAL(editTextChanged(QString)),
00354 this, SLOT(sampleRateChanged(QString)));
00355
00356 m_windowSizeLabel = new QLabel(tr("Frame increment between rows:"));
00357 layout->addWidget(m_windowSizeLabel, 4, 0);
00358
00359 m_windowSizeCombo = new QComboBox;
00360 m_windowSize = 1024;
00361 for (int i = 0; i <= 16; ++i) {
00362 int value = 1 << i;
00363 m_windowSizeCombo->addItem(QString("%1").arg(value));
00364 if (value == int(m_windowSize)) m_windowSizeCombo->setCurrentIndex(i);
00365 }
00366 m_windowSizeCombo->setEditable(true);
00367
00368 layout->addWidget(m_windowSizeCombo, 4, 1);
00369 connect(m_windowSizeCombo, SIGNAL(activated(QString)),
00370 this, SLOT(windowSizeChanged(QString)));
00371 connect(m_windowSizeCombo, SIGNAL(editTextChanged(QString)),
00372 this, SLOT(windowSizeChanged(QString)));
00373
00374 layout->addWidget(new QLabel(tr("\nExample data from file:")), 5, 0, 1, 4);
00375
00376 m_exampleWidget = new QTableWidget
00377 (std::min(10, m_example.size()), m_maxExampleCols);
00378
00379 layout->addWidget(m_exampleWidget, 6, 0, 1, 4);
00380 layout->setColumnStretch(3, 10);
00381 layout->setRowStretch(4, 10);
00382
00383 QPushButton *ok = new QPushButton(tr("OK"));
00384 connect(ok, SIGNAL(clicked()), this, SLOT(accept()));
00385 ok->setDefault(true);
00386
00387 QPushButton *cancel = new QPushButton(tr("Cancel"));
00388 connect(cancel, SIGNAL(clicked()), this, SLOT(reject()));
00389
00390 QHBoxLayout *buttonLayout = new QHBoxLayout;
00391 buttonLayout->addStretch(1);
00392 buttonLayout->addWidget(ok);
00393 buttonLayout->addWidget(cancel);
00394
00395 QVBoxLayout *mainLayout = new QVBoxLayout;
00396 mainLayout->addLayout(layout);
00397 mainLayout->addLayout(buttonLayout);
00398
00399 setLayout(mainLayout);
00400
00401 timingTypeChanged(m_timingTypeCombo->currentIndex());
00402 }
00403
00404 CSVFormatDialog::~CSVFormatDialog()
00405 {
00406 }
00407
00408 void
00409 CSVFormatDialog::populateExample()
00410 {
00411 m_exampleWidget->setColumnCount
00412 (m_timingType == ExplicitTiming ?
00413 m_maxExampleCols - 1 : m_maxExampleCols);
00414
00415 m_exampleWidget->setHorizontalHeaderLabels(QStringList());
00416
00417 for (int i = 0; i < m_example.size(); ++i) {
00418 for (int j = 0; j < m_example[i].size(); ++j) {
00419
00420 QTableWidgetItem *item = new QTableWidgetItem(m_example[i][j]);
00421
00422 if (j == 0) {
00423 if (m_timingType == ExplicitTiming) {
00424 m_exampleWidget->setVerticalHeaderItem(i, item);
00425 continue;
00426 } else {
00427 QTableWidgetItem *header =
00428 new QTableWidgetItem(QString("%1").arg(i));
00429 header->setFlags(Qt::ItemIsEnabled);
00430 m_exampleWidget->setVerticalHeaderItem(i, header);
00431 }
00432 }
00433 int index = j;
00434 if (m_timingType == ExplicitTiming) --index;
00435 item->setFlags(Qt::ItemIsEnabled);
00436 m_exampleWidget->setItem(i, index, item);
00437 }
00438 }
00439 }
00440
00441 void
00442 CSVFormatDialog::modelTypeChanged(int type)
00443 {
00444 m_modelType = (ModelType)type;
00445
00446 if (m_modelType == ThreeDimensionalModel) {
00447
00448
00449 m_timingTypeCombo->setCurrentIndex(2);
00450 timingTypeChanged(2);
00451 }
00452 }
00453
00454 void
00455 CSVFormatDialog::timingTypeChanged(int type)
00456 {
00457 switch (type) {
00458
00459 case 0:
00460 m_timingType = ExplicitTiming;
00461 m_timeUnits = TimeSeconds;
00462 m_sampleRateCombo->setEnabled(false);
00463 m_sampleRateLabel->setEnabled(false);
00464 m_windowSizeCombo->setEnabled(false);
00465 m_windowSizeLabel->setEnabled(false);
00466 if (m_modelType == ThreeDimensionalModel) {
00467 m_modelTypeCombo->setCurrentIndex(1);
00468 modelTypeChanged(1);
00469 }
00470 break;
00471
00472 case 1:
00473 m_timingType = ExplicitTiming;
00474 m_timeUnits = TimeAudioFrames;
00475 m_sampleRateCombo->setEnabled(true);
00476 m_sampleRateLabel->setEnabled(true);
00477 m_windowSizeCombo->setEnabled(false);
00478 m_windowSizeLabel->setEnabled(false);
00479 if (m_modelType == ThreeDimensionalModel) {
00480 m_modelTypeCombo->setCurrentIndex(1);
00481 modelTypeChanged(1);
00482 }
00483 break;
00484
00485 case 2:
00486 m_timingType = ImplicitTiming;
00487 m_timeUnits = TimeWindows;
00488 m_sampleRateCombo->setEnabled(true);
00489 m_sampleRateLabel->setEnabled(true);
00490 m_windowSizeCombo->setEnabled(true);
00491 m_windowSizeLabel->setEnabled(true);
00492 break;
00493 }
00494
00495 populateExample();
00496 }
00497
00498 void
00499 CSVFormatDialog::sampleRateChanged(QString rateString)
00500 {
00501 bool ok = false;
00502 int sampleRate = rateString.toInt(&ok);
00503 if (ok) m_sampleRate = sampleRate;
00504 }
00505
00506 void
00507 CSVFormatDialog::windowSizeChanged(QString sizeString)
00508 {
00509 bool ok = false;
00510 int size = sizeString.toInt(&ok);
00511 if (ok) m_windowSize = size;
00512 }
00513
00514 bool
00515 CSVFormatDialog::guessFormat(QFile *file)
00516 {
00517 QTextStream in(file);
00518 in.seek(0);
00519
00520 unsigned int lineno = 0;
00521
00522 bool nonIncreasingPrimaries = false;
00523 bool nonNumericPrimaries = false;
00524 bool floatPrimaries = false;
00525 bool variableItemCount = false;
00526 int itemCount = 1;
00527 int earliestNonNumericItem = -1;
00528
00529 float prevPrimary = 0.0;
00530
00531 m_maxExampleCols = 0;
00532
00533 while (!in.atEnd()) {
00534
00535
00536
00537 QString chunk = in.readLine();
00538 QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
00539
00540 for (size_t li = 0; li < lines.size(); ++li) {
00541
00542 QString line = lines[li];
00543
00544 if (line.startsWith("#")) continue;
00545
00546 if (m_separator == "") {
00548 if (line.split(",").size() >= 2) m_separator = ",";
00549 else if (line.split("\t").size() >= 2) m_separator = "\t";
00550 else if (line.split("|").size() >= 2) m_separator = "|";
00551 else if (line.split("/").size() >= 2) m_separator = "/";
00552 else if (line.split(":").size() >= 2) m_separator = ":";
00553 else m_separator = " ";
00554 }
00555
00556 QStringList list = line.split(m_separator);
00557 QStringList tidyList;
00558
00559 for (int i = 0; i < list.size(); ++i) {
00560
00561 QString s(list[i]);
00562 bool numeric = false;
00563
00564 if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
00565 s = s.mid(1, s.length() - 2);
00566 } else if (s.length() >= 2 && s.startsWith("'") && s.endsWith("'")) {
00567 s = s.mid(1, s.length() - 2);
00568 } else {
00569 (void)s.toFloat(&numeric);
00570 }
00571
00572 tidyList.push_back(s);
00573
00574 if (lineno == 0 || (list.size() < itemCount)) {
00575 itemCount = list.size();
00576 } else {
00577 if (itemCount != list.size()) {
00578 variableItemCount = true;
00579 }
00580 }
00581
00582 if (i == 0) {
00583
00584 if (numeric) {
00585
00586 float primary = s.toFloat();
00587
00588 if (lineno > 0 && primary <= prevPrimary) {
00589 nonIncreasingPrimaries = true;
00590 }
00591
00592 if (s.contains(".") || s.contains(",")) {
00593 floatPrimaries = true;
00594 }
00595
00596 prevPrimary = primary;
00597
00598 } else {
00599 nonNumericPrimaries = true;
00600 }
00601 } else {
00602
00603 if (!numeric) {
00604 if (earliestNonNumericItem < 0 ||
00605 i < earliestNonNumericItem) {
00606 earliestNonNumericItem = i;
00607 }
00608 }
00609 }
00610 }
00611
00612 if (lineno < 10) {
00613 m_example.push_back(tidyList);
00614 if (lineno == 0 || tidyList.size() > m_maxExampleCols) {
00615 m_maxExampleCols = tidyList.size();
00616 }
00617 }
00618
00619 ++lineno;
00620
00621 if (lineno == 50) break;
00622 }
00623 }
00624
00625 if (nonNumericPrimaries || nonIncreasingPrimaries) {
00626
00627
00628
00629 m_timingType = ImplicitTiming;
00630 m_timeUnits = TimeWindows;
00631
00632 if (nonNumericPrimaries) {
00633 m_modelType = OneDimensionalModel;
00634 } else if (itemCount == 1 || variableItemCount ||
00635 (earliestNonNumericItem != -1)) {
00636 m_modelType = TwoDimensionalModel;
00637 } else {
00638 m_modelType = ThreeDimensionalModel;
00639 }
00640
00641 } else {
00642
00643
00644
00645 m_timingType = ExplicitTiming;
00646
00647 if (floatPrimaries) {
00648 m_timeUnits = TimeSeconds;
00649 } else {
00650 m_timeUnits = TimeAudioFrames;
00651 }
00652
00653 if (itemCount == 1) {
00654 m_modelType = OneDimensionalModel;
00655 } else if (variableItemCount || (earliestNonNumericItem != -1)) {
00656 if (earliestNonNumericItem != -1 && earliestNonNumericItem < 2) {
00657 m_modelType = OneDimensionalModel;
00658 } else {
00659 m_modelType = TwoDimensionalModel;
00660 }
00661 } else {
00662 m_modelType = ThreeDimensionalModel;
00663 }
00664 }
00665
00666 std::cerr << "Estimated model type: " << m_modelType << std::endl;
00667 std::cerr << "Estimated timing type: " << m_timingType << std::endl;
00668 std::cerr << "Estimated units: " << m_timeUnits << std::endl;
00669
00670 in.seek(0);
00671 return true;
00672 }