Skip to content

Commit

Permalink
ENH: experimental QIIME 2 mapping file support (#80)
Browse files Browse the repository at this point in the history
Fixes #79.
  • Loading branch information
jairideout authored Jun 30, 2017
1 parent 1e04a69 commit e26d8f2
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 14 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

**Note on versioning:** the version numbers used here match the version numbers displayed to users in the Chrome Web Store. Sometimes there are gaps between release versions (e.g., version 2 jumps to version 5). This happens because each separate upload of Keemei to the web store increments the version number, and sometimes multiple uploads are necessary before a release is finalized (e.g., if the release is reviewed by an add-ons advisor and updates are required before it can go public). Therefore, the version numbering used here in the changelog and tagged GitHub releases will match the public release version displayed in the web store.

## Version 13 (2017-06-28)

This release adds **experimental** support for validating [QIIME 2](https://qiime2.org) mapping files.

### Features
* Added **experimental** support for validating [QIIME 2 mapping files](https://docs.qiime2.org/2017.6/tutorials/metadata/#metadata-from-a-text-file) ([#79](https://github.com/biocore/Keemei/issues/79))

## Version 12 (2016-06-17)

Minor beta release with [Keemei paper](http://dx.doi.org/10.1186/s13742-016-0133-6) citation information.
Expand Down
13 changes: 9 additions & 4 deletions src/App.gs
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,27 @@ function onInstall(e) {

function onOpen(e) {
SpreadsheetApp.getUi().createAddonMenu()
.addItem("Validate QIIME mapping file", "validateQiime")
.addItem("Validate QIIME 1 mapping file", "validateQiime1")
.addItem("Validate QIIME 2 mapping file", "validateQiime2")
.addItem("Validate SRGD file", "validateSrgd")
.addItem("Validate Qiita sample template (experimental)", "validateQiitaSampleTemplate")
.addSeparator()
.addItem("Clear validation status", "clear")
.addItem("About", "about")
.addSeparator()
.addSubMenu(SpreadsheetApp.getUi().createMenu("Developer tools")
.addItem("Create simulated QIIME mapping file dataset", "createSimulatedData")
.addItem("Create simulated QIIME 1 mapping file dataset", "createSimulatedData")
.addItem("Run benchmarks: dataset size and error rate", "runDatasetSizeBenchmarks")
.addItem("Run benchmarks: rule size", "runRuleSizeBenchmarks"))
.addToUi();
};

function validateQiime() {
validate_(getQiimeFormatSpec_);
function validateQiime1() {
validate_(getQiime1FormatSpec_);
};

function validateQiime2() {
validate_(getQiime2FormatSpec_);
};

function validateSrgd() {
Expand Down
2 changes: 1 addition & 1 deletion src/Benchmark.gs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ function runDatasetSizeBenchmarks() {
var runtime = row[j + 1];
if (runtime === "") {
// Depends on core Keemei API.
var report = validate_(getQiimeFormatSpec_, sheet);
var report = validate_(getQiime1FormatSpec_, sheet);

var numInvalidCells = Object.keys(report.validationResults).length;
var expectedNumInvalidCells = (parseInt(rowCount, 10) * numColumns) * errorProportion;
Expand Down
16 changes: 8 additions & 8 deletions src/QiimeFormat.gs → src/Qiime1Format.gs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function getQiimeFormatSpec_(sheetData) {
function getQiime1FormatSpec_(sheetData) {
var requiredHeaders = {
"#SampleID": [0, "first"],
"BarcodeSequence": [1, "second"],
Expand All @@ -7,9 +7,9 @@ function getQiimeFormatSpec_(sheetData) {
};

return {
format: "QIIME mapping file",
format: "QIIME 1 mapping file",
headerRowIdx: 0,
dataStartRowIdx: getQiimeDataStartRowIdx_(sheetData),
dataStartRowIdx: getQiime1DataStartRowIdx_(sheetData),
headerValidation: [
{
validator: findMissingValues_,
Expand All @@ -22,11 +22,11 @@ function getQiimeFormatSpec_(sheetData) {
{
// #SampleID is an invalid column header name, so we'll only check header names
// if they aren't required headers. Assume the required header names are valid.
validator: findInvalidQiimeColumns_,
validator: findInvalidQiime1Columns_,
args: [requiredHeaders]
},
{
validator: findMisplacedQiimeColumns_,
validator: findMisplacedQiime1Columns_,
args: [requiredHeaders]
},
{
Expand Down Expand Up @@ -121,7 +121,7 @@ function getPrimerValidators_() {
];
};

function getQiimeDataStartRowIdx_(sheetData) {
function getQiime1DataStartRowIdx_(sheetData) {
for (var i = 1; i < sheetData.length; i++) {
if (!startsWith_(sheetData[i][0], "#")) {
break;
Expand All @@ -130,7 +130,7 @@ function getQiimeDataStartRowIdx_(sheetData) {
return i;
};

function findInvalidQiimeColumns_(valueToPositions, ignoredValues) {
function findInvalidQiime1Columns_(valueToPositions, ignoredValues) {
var invalidCells = {};
var message = [
Utilities.formatString("Invalid column header name. Only alphanumeric and underscore characters are allowed. The first character must be a letter.")
Expand All @@ -153,7 +153,7 @@ function findInvalidQiimeColumns_(valueToPositions, ignoredValues) {
return invalidCells;
};

function findMisplacedQiimeColumns_(valueToPositions, requiredHeaders) {
function findMisplacedQiime1Columns_(valueToPositions, requiredHeaders) {
var invalidCells = {};
for (var value in valueToPositions) {
if (valueToPositions.hasOwnProperty(value)) {
Expand Down
75 changes: 75 additions & 0 deletions src/Qiime2Format.gs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
function getQiime2FormatSpec_(sheetData) {
// TODO: this isn't the best place to put this type of validation.
// There isn't a hook yet to error if there are missing data rows
// for a file format, so use an ad-hoc check for now.
if (sheetData.length < 2) {
var ui = SpreadsheetApp.getUi();
ui.alert("Missing data",
"This sheet must have at least two rows in order to be validated. " +
"The first row contains the header and subsequent rows contain data.",
ui.ButtonSet.OK);
return null;
}

var axisLabelRegex = /[^\/\\*<>?|$]/ig;

var formatSpec = {
format: "QIIME 2 mapping file",

// TODO: update when blank lines and comments are supported
headerRowIdx: 0,
dataStartRowIdx: 1,

headerValidation: [
{
validator: findDuplicates_,
args: ["Duplicate column label"]
},
{
validator: findInvalidCharacters_,
args: [axisLabelRegex, "errors", "column label"]
},
{
validator: findEmpty_,
args: ["errors"]
},
{
validator: findLeadingTrailingWhitespace_,
args: []
}
],
columnValidation: {
"default": [
{
validator: findLeadingTrailingWhitespace_,
args: []
}
],
columns: {}
}
};

// TODO: update when blank lines and comments are supported
var idColumnLabel = sheetData[0][0];

formatSpec.columnValidation.columns[idColumnLabel] = [
{
validator: findDuplicates_,
args: ["Duplicate identifier"]
},
{
validator: findInvalidCharacters_,
args: [axisLabelRegex, "errors", "identifier"]
},
{
validator: findEmpty_,
args: ["errors"]
},
{
validator: findLeadingTrailingWhitespace_,
args: []
}
];

return formatSpec;
};
2 changes: 1 addition & 1 deletion src/SimulatedData.gs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ var ERROR_TRANSFORMS = [

/*
* Substitute first character with invalid character.
* $ is invalid across all QIIME mapping file columns.
* $ is invalid across all QIIME 1 mapping file columns.
* We replace a character instead of appending to keep
* barcodes the same length, otherwise additional cells
* could be marked invalid in rare cases. Replacing a
Expand Down
4 changes: 4 additions & 0 deletions src/Validate.gs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ function validate_(formatSpecFunction, sheet) {

var formatSpec = formatSpecFunction(sheetData);

if (!formatSpec) {
return {};
}

var report = {
format: formatSpec.format,
validationResults: mergeValidationResults_([
Expand Down

0 comments on commit e26d8f2

Please sign in to comment.