From bc04bc7d94427029e85002ce9b9eb1d4551a2505 Mon Sep 17 00:00:00 2001 From: christiano Date: Sun, 13 Oct 2024 01:47:49 -0400 Subject: [PATCH 01/20] adding quality control tool --- js/source/entries/qualitycontrol.js | 493 ++++++++++++++++ lib/SGN/Controller/AJAX/QualityControl.pm | 536 +++++------------- lib/SGN/Controller/QualityControl.pm | 22 + .../dataset_quality_control.mas | 188 ++++++ 4 files changed, 837 insertions(+), 402 deletions(-) create mode 100755 js/source/entries/qualitycontrol.js create mode 100755 lib/SGN/Controller/QualityControl.pm create mode 100755 mason/tools/qualityControl/dataset_quality_control.mas diff --git a/js/source/entries/qualitycontrol.js b/js/source/entries/qualitycontrol.js new file mode 100755 index 0000000000..269879ab32 --- /dev/null +++ b/js/source/entries/qualitycontrol.js @@ -0,0 +1,493 @@ +import '../legacy/jquery.js'; +import '../legacy/d3/d3v4Min.js'; + +var version = '0.01'; + +export function init(main_div) { + if (!(main_div instanceof HTMLElement)) { + main_div = document.getElementById(main_div.startsWith("#") ? main_div.slice(1) : main_div); + } + + var dataset_id; + get_select_box("datasets", "qc_dataset_select", { "checkbox_name": "qc_dataset_select_checkbox" }); + + jQuery('#qc_analysis_prepare_button').removeClass('active').addClass('inactive'); + + $(document).on('click', 'input[name=select_engine]', function (e) { + get_model_string(); + }); + + $('#qc_analysis_prepare_button').click(function () { + dataset_id = get_dataset_id(); + if (dataset_id != false) { + $.ajax({ + url: '/ajax/qualitycontrol/prepare', + data: { 'dataset_id': get_dataset_id() }, + success: function (r) { + if (r.error) { + alert(r.error); + } else { + + if (r.tempfile) { + $('#tempfile').html(r.tempfile); + } + updateBoxplot(); + } + }, + error: function (jqXHR, textStatus, errorThrown) { + console.error('AJAX request failed: ', textStatus, errorThrown); + alert('Error in AJAX request: ' + errorThrown); + } + }); + } + }); + $(function() { + var handle = $("#custom-handle"); + $("#outliers_range").slider({ + orientation: "horizontal", + range: "min", + max: 10, + min: 0, + value: 1.5, + step: 0.1, + create: function() { + handle.text($(this).slider("value")); + }, + slide: function(event, ui) { + handle.text(ui.value); + updateBoxplot(); // Update the boxplot when the slider value changes + } + }); + }); + + $('#qc_analysis_prepare_button').click(function () { + dataset_id = get_dataset_id(); + if (dataset_id != false) { + $.ajax({ + url: '/ajax/qualitycontrol/prepare', + data: { 'dataset_id': dataset_id }, // No need to call get_dataset_id() again + success: function (r) { + if (r.error) { + alert(r.error); + } else { + console.log("AJAX response:", r); // Log full response for debugging + + if (r.selected_variable) { + console.log("Selected Variable HTML:", r.selected_variable); // Debugging log + populateTraitDropdown(r.selected_variable); // Populate dropdown with traits + } + if (r.tempfile) { + $('#tempfile').html(r.tempfile); + } + } + }, + error: function (jqXHR, textStatus, errorThrown) { + alert('Error in AJAX request: ' + errorThrown); + } + }); + } + }); + + let outliers = []; + $('#selected_variable').on('change', function () { + var trait_selected = $('#trait_select').val(); // Get the selected trait from the dropdown + + if (!trait_selected) { + $('#trait_histogram').html('Please select a trait to see the boxplot!'); + return; + } + + // Fetch tempfile value + var tempfile = $('#tempfile').html(); + + // Check if tempfile is not empty + if (!tempfile || tempfile.trim() === '') { + return; // Exit if tempfile is empty + } + + var outlierMultiplier = $('#outliers_range').slider("value"); + if (!outlierMultiplier || isNaN(outlierMultiplier)) { + outlierMultiplier = 1.5; // Set default value + } + + // Proceed with the AJAX call for grabbing data + $.ajax({ + url: '/ajax/qualitycontrol/grabdata', + data: { 'file': tempfile }, + success: function (r) { + $('#working_modal').modal("hide"); + const result = drawBoxplot(r.data, trait_selected, outlierMultiplier ); + outliers = result.outliers; // Extract the outliers + console.log(outliers); + populateOutlierTable(outliers); + }, + error: function (e) { + alert('Error during AJAX request!'); + } + }); + + }); + + $('#store_outliers_button').click(function () { + $.ajax({ + url: '/ajax/qualitycontrol/storeoutliers', + method: "POST", + data: {"outliers": JSON.stringify(uniqueOutliers), + }, + success: function(response) { + alert('Outliers saved successfully!'); // Success message + console.log(response); // Log server response + }, + error: function(xhr, status, error) { + alert('Error saving outliers: ' + error); // Error message + console.log(xhr, status); // Log error details + } + }); + }); + + +} + +let currentPage = 1; +let uniqueOutliers; +const rowsPerPage = 10; + + +function populateOutlierTable(outliers) { + const tableBody = document.querySelector("#outlier_table tbody"); + + // Clear the current table body + tableBody.innerHTML = ''; + + // Ensure outliers is an array to avoid errors + if (!Array.isArray(outliers)) { + console.warn("Expected an array of outliers, but received:", outliers); + return; + } + + // Create a Set to track unique identifiers for filtering duplicates + const uniqueIdentifiers = new Set(); + uniqueOutliers = []; // Store unique outliers for pagination + + // Insert rows with outlier data + outliers.forEach(outlier => { + // Create a unique identifier for the outlier + const identifier = `${outlier.locationDbId}-${outlier.plotName}`; + + // Check if this identifier has already been added to the Set + if (!uniqueIdentifiers.has(identifier)) { + uniqueIdentifiers.add(identifier); // Mark this identifier as seen + uniqueOutliers.push(outlier); // Store unique outliers + + const row = tableBody.insertRow(); + const cell1 = row.insertCell(0); + const cell2 = row.insertCell(1); + const cell3 = row.insertCell(2); + const cell4 = row.insertCell(3); + const cell5 = row.insertCell(4); + + cell1.innerHTML = outlier.locationDbId || 'N/A'; + cell2.innerHTML = outlier.locationName || 'N/A'; + cell3.innerHTML = outlier.plotName || 'N/A'; + cell4.innerHTML = outlier.trait || 'N/A'; + cell5.innerHTML = outlier.value || 'N/A'; + } + }); + + // Update pagination controls with the count of unique outliers + updatePaginationControls(uniqueOutliers.length); +} + + +function updatePaginationControls(totalItems) { + const paginationControls = document.getElementById("pagination_controls"); + paginationControls.innerHTML = ''; // Clear previous controls + + const totalPages = Math.ceil(totalItems / rowsPerPage); + + // Create Previous button + const prevButton = document.createElement('button'); + prevButton.innerHTML = 'Previous'; + prevButton.disabled = currentPage === 1; // Disable if on the first page + prevButton.addEventListener('click', () => { + if (currentPage > 1) { + currentPage--; + populateOutlierTable(outliersGlobal); // Use the global outliers array + } + }); + paginationControls.appendChild(prevButton); + + // Display page numbers + const pageButtons = []; + if (totalPages > 1) { + for (let i = 1; i <= Math.min(totalPages, 3); i++) { + const pageButton = document.createElement('button'); + pageButton.innerHTML = i; + pageButton.classList.add('page-button'); + if (i === currentPage) { + pageButton.disabled = true; // Disable the current page button + } + pageButton.addEventListener('click', () => { + currentPage = i; + populateOutlierTable(outliersGlobal); // Use the global outliers array + }); + paginationControls.appendChild(pageButton); + pageButtons.push(pageButton); + } + + // Check if there are more pages and add ellipsis if needed + if (totalPages > 3) { + const ellipsis = document.createElement('span'); + ellipsis.innerHTML = '...'; + paginationControls.appendChild(ellipsis); + + // Last page button + const lastPageButton = document.createElement('button'); + lastPageButton.innerHTML = totalPages; + lastPageButton.addEventListener('click', () => { + currentPage = totalPages; + populateOutlierTable(outliersGlobal); // Use the global outliers array + }); + paginationControls.appendChild(lastPageButton); + } + } +} + + +function updateBoxplot() { + // Fetch the selected trait and tempfile + var trait_selected = $('#trait_select').val(); + var tempfile = $('#tempfile').html(); + + if (!trait_selected || !tempfile || tempfile.trim() === '') { + console.log("Either trait or tempfile is missing!"); + return; + } + + const outlierMultiplier = $("#outliers_range").slider("value") || 1.5; + + // Perform an AJAX call to fetch the actual data + $.ajax({ + url: '/ajax/qualitycontrol/grabdata', // Adjust this URL if needed + data: { 'file': tempfile, 'trait': trait_selected }, // Send both tempfile and trait + success: function (response) { + + const boxplotData = response.data || []; // Adjust based on the actual response structure + drawBoxplot(boxplotData, trait_selected, outlierMultiplier); + const result = drawBoxplot(boxplotData, trait_selected, outlierMultiplier); + + const outliers = result.outliers || []; + console.log("Outliers identified:", outliers); + + populateOutlierTable(outliers); + + }, + + error: function (jqXHR, textStatus, errorThrown) { + console.error('AJAX request failed: ', textStatus, errorThrown); + alert('Error fetching data for boxplot: ' + errorThrown); + } + }); +} + + + + +function drawBoxplot(data, selected_trait, outlierMultiplier) { + const groupedData = d3.nest() + .key(d => d.locationDbId) + .entries(data); + if (outlierMultiplier === null){outlierMultiplier = 1.5} + let allOutliers = []; // Collect all outliers here + + const boxplotData = groupedData.map(group => { + const values = group.values.map(d => d[selected_trait]).filter(d => d != null); + + if (values.length < 4) { + console.warn(`Not enough valid values to calculate quartiles for group ${group.key}. Values:`, values); + return { + locationDbId: group.key, + values: [], + outliers: [], + q1: null, + q3: null, + iqr: null, + lowerBound: null, + upperBound: null + }; + } + + values.sort(d3.ascending); + const q1 = d3.quantile(values, 0.25); + const q3 = d3.quantile(values, 0.75); + const iqr = Math.max(0, q3 - q1); + const lowerBound = q1 - outlierMultiplier * iqr; + const upperBound = q3 + outlierMultiplier * iqr; + + const outliers = values.filter(v => v < lowerBound || v > upperBound); + + // Collect outlier data with relevant information + if (outliers.length > 0) { + const groupOutliers = outliers.map(value => ({ + locationDbId: group.key, + locationName: group.values.find(v => v[selected_trait] === value).locationName, + plotName: group.values.find(v => v[selected_trait] === value).observationUnitName, + trait: selected_trait, + value: value + })); + allOutliers = allOutliers.concat(groupOutliers); + } + + return { + locationDbId: group.key, + values: values, + min: d3.min(values), + q1: q1, + median: d3.median(values), + q3: q3, + max: d3.max(values), + lowerBound: lowerBound, + upperBound: upperBound, + outliers: outliers + }; + }); + + // Drawing the boxplot + const margin = {top: 10, right: 30, bottom: 50, left: 40}, + width = 800 - margin.left - margin.right, + height = 400 - margin.top - margin.bottom; + + d3.select("#trait_histogram").select("svg").remove(); + + const svg = d3.select("#trait_histogram") + .append("svg") + .attr("width", width + margin.left + margin.right) + .attr("height", height + margin.top + margin.bottom) + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + const x = d3.scaleLinear() + .domain([d3.min(boxplotData, d => d.min), d3.max(boxplotData, d => d.max)]) + .range([0, width]); + + const y = d3.scaleBand() + .domain(boxplotData.map(d => d.locationDbId)) + .range([0, height]) + .padding(0.1); + + svg.append("g") + .attr("transform", "translate(0," + height + ")") + .call(d3.axisBottom(x)); + + svg.append("g") + .call(d3.axisLeft(y)); + + const boxWidth = y.bandwidth() / 2; // Width of the box + + const boxplotGroup = svg.selectAll(".boxplot") + .data(boxplotData) + .enter().append("g") + .attr("class", "boxplot") + .attr("transform", d => "translate(0," + y(d.locationDbId) + ")"); + + // Draw boxes + boxplotGroup.append("rect") + .attr("x", d => { + if (d.q1 !== null && d.q3 !== null) { + return x(d.q1); + } + return 0; // Default to 0 if quartiles are not valid + }) + .attr("y", boxWidth / 2) + .attr("height", boxWidth) + .attr("width", d => { + if (d.q1 !== null && d.q3 !== null) { + const width = x(d.q3) - x(d.q1); + return Math.max(0, width); // Ensure no negative width + } + return 0; // Default to 0 if quartiles are not valid + }) + .attr("fill", "lightgray"); + + // Draw median line + boxplotGroup.append("line") + .attr("x1", d => x(d.median)) + .attr("x2", d => x(d.median)) + .attr("y1", 0) + .attr("y2", boxWidth) + .attr("stroke", "black"); + + // Draw whiskers + boxplotGroup.append("line") + .attr("x1", d => x(d.min)) + .attr("x2", d => x(d.max)) + .attr("y1", boxWidth / 2) + .attr("y2", boxWidth / 2) + .attr("stroke", "black"); + + // Draw individual points with jitter + const pointGroup = svg.selectAll(".point") + .data(boxplotData) + .enter().append("g") + .attr("class", "point") + .attr("transform", d => "translate(0," + y(d.locationDbId) + ")"); + + // Draw circles for all points (including outliers) + pointGroup.selectAll("circle") + .data(d => d.values.map(value => ({ + value: value, + isOutlier: value < d.lowerBound || value > d.upperBound // Check for outliers + }))) + .enter().append("circle") + .attr("cx", d => x(d.value)) + .attr("cy", d => boxWidth / 2 + (Math.random() - 0.5) * 10) // Add jitter + .attr("r", 3) // Set a fixed radius for points + .attr("fill", d => d.isOutlier ? "#d9534f" : "#5cb85c" ); // Outliers are red, non-outliers are blue + + // Return both the boxplot data and the outliers + return { + boxplotData: boxplotData, + outliers: allOutliers + }; + +} + + + +function get_dataset_id() { + var selected_datasets = []; + jQuery('input[name="qc_dataset_select_checkbox"]:checked').each(function () { + selected_datasets.push(jQuery(this).val()); + }); + if (selected_datasets.length < 1) { + alert('Please select at least one dataset!'); + return false; + } else if (selected_datasets.length > 1) { + alert('Please select only one dataset!'); + return false; + } else { + var dataset_id = selected_datasets[0]; + return dataset_id; + } +} + +function populateTraitDropdown(selectedVariableHTML) { + var traitSelect = $('#trait_select'); // Dropdown element + traitSelect.empty(); // Clear previous options + + // Add default "Select a trait" option + traitSelect.append(''); + + // Create a temporary div to hold the HTML string + var tempDiv = $('
').html(selectedVariableHTML); + + // Extract values from the checkboxes in the HTML + tempDiv.find('input.trait_box').each(function () { + var traitValue = $(this).val(); + traitSelect.append($('
+ +
+
+
+

Selecting this box, it will set fixed minimum and maximum for all locations.

+
+
+ + + + + +
+
+
+
+ +
+
+
+
+
+
+
-

Raw Data Available

- - - - - - - - - - - - - - -
Location IDLocation NameTrial NameMinMaxMeanSDCV
-
-

Data After Filtering

- - - - - - - - - - - - - - -
Location IDLocation NameTrial NameMinMaxMeanSDCV
-
+

Raw Data Available

+ + + + + + + + + + + + + + +
Location IDLocation NameTrial NameMinMaxMeanSDCV
-
- + +
+

Data After Filtering

+ + + + + + + + + + + + + + +
Location IDLocation NameTrial NameMinMaxMeanSDCV
+
+
+
+ +
+ + + <&| /util/workflow.mas:step, title=> "Other Traits as Outlier" &> + <& /page/page_title.mas, title=>"Set Plot as Outlier for Other Traits" &> + +
+

This is an option to set selected plots as outliers for other traits as well.

+

Clicking on 'Select Other Traits' you will see available traits in your dataset.

+

If you want to ignore this step, please click on 'Next step' button.

+ +
+
+
+
+
+ +
+ <&| /util/workflow.mas:step, title=> "Save Outliers" &> @@ -202,3 +232,4 @@ var qualitycontrol = window.jsMod['qualitycontrol'].init("qualitycontrol"); }); + diff --git a/mason/tools/qualityControl/validated_trials.mas b/mason/tools/qualityControl/validated_trials.mas index a6b286cd6d..94deece302 100644 --- a/mason/tools/qualityControl/validated_trials.mas +++ b/mason/tools/qualityControl/validated_trials.mas @@ -2,21 +2,24 @@ -<& /util/import_javascript.mas, classes => ['jquery', 'CXGN.BreedersToolbox.HTMLSelect'], entries => ['validatedtrials' ] &> +<& /util/import_javascript.mas, classes => ['jquery', 'CXGN.Login','CXGN.BreedersToolbox.HTMLSelect'], entries => ['validatedtrials' ] &> - + + + + <& /page/page_title.mas, title=>"Validated Trials" &> -

This table displays all trials that have been filtered and validated using the Quality Control Tool.

+

This table presents all trials that have been thoroughly filtered and validated through the Quality Control Tool.


@@ -28,6 +31,7 @@ + @@ -57,13 +61,486 @@ +
+
+
+ +<&| /page/info_section.mas, title=>"Statistcs", collapsible=>1, collapsed=>1 &> + + +
+
+
+
Select Project Name Project Location Trait Name
+ + + + + + + + + + + + + + + +
AccessionTrialsObservationsTraitAverageSDMinMax
+
+ + + + + +<&| /page/info_section.mas, title=>"Bar Plot", collapsible=>1, collapsed=>1 &> + + + + +
+ + + + + +
+ + + + + + +
+ +
+ +
+ + +
+ + + + + + From 803b314903fb90b74034b54ba0039f95f24e70fe Mon Sep 17 00:00:00 2001 From: Chris Simoes Date: Thu, 9 Jan 2025 01:51:06 -0500 Subject: [PATCH 16/20] making query more efficient --- lib/SGN/Controller/AJAX/QualityControl.pm | 5 ++- lib/SGN/Controller/AJAX/ValidatedTrials.pm | 46 +++------------------- 2 files changed, 9 insertions(+), 42 deletions(-) diff --git a/lib/SGN/Controller/AJAX/QualityControl.pm b/lib/SGN/Controller/AJAX/QualityControl.pm index a65ec3e68c..f3c0b95c21 100644 --- a/lib/SGN/Controller/AJAX/QualityControl.pm +++ b/lib/SGN/Controller/AJAX/QualityControl.pm @@ -118,8 +118,9 @@ sub extract_trait_data :Path('/ajax/qualitycontrol/grabdata') Args(0) { # Format the unique project names for the SQL query + my $trait_raw = $trait; $trait =~ s/\|.*//; - my $trait_ilike = $trait . '%'; + my $trait_ilike = $trait; my $project_names = join(", ", map { "'$_'" } keys %unique_names); @@ -128,7 +129,7 @@ sub extract_trait_data :Path('/ajax/qualitycontrol/grabdata') Args(0) { join project on project.project_id = projectprop.project_id where projectprop.type_id = (select cvterm_id from cvterm where cvterm."name" = 'validated_phenotype') and project.name in ($project_names) - and projectprop.value ilike '$trait_ilike' + and projectprop.value = '$trait_raw' group by project."name"; }; diff --git a/lib/SGN/Controller/AJAX/ValidatedTrials.pm b/lib/SGN/Controller/AJAX/ValidatedTrials.pm index 74c1558920..dfb52cba61 100644 --- a/lib/SGN/Controller/AJAX/ValidatedTrials.pm +++ b/lib/SGN/Controller/AJAX/ValidatedTrials.pm @@ -153,49 +153,15 @@ sub get_phenotype :Path('/ajax/validatedtrials/getphenotype') Args(0){ my @data; foreach my $project (@$projects_names) { - my $trial_name = $project->{name}; # Extract 'name' (trial) - my $trait_name = $project->{validated_trait}; # Extract 'validated_trait' (trait) + my $trial_name = $project->{name}; + my $trait_name = $project->{validated_trait}; # Skip if either trial or trait is missing next unless $trial_name && $trait_name; - # Step 1: Get outlier names for this trial–trait pair - my $sql_outliers = " - select s.uniquename from phenotypeprop p - join nd_experiment_phenotype nep on nep.phenotype_id = p.phenotype_id - join phenotype p2 on p2.phenotype_id = nep.phenotype_id - join cvterm on p2.cvalue_id = cvterm.cvterm_id - join nd_experiment_project nep2 on nep2.nd_experiment_id = nep.nd_experiment_id - join nd_experiment_stock nes on nes.nd_experiment_id = nep.nd_experiment_id - join stock s on s.stock_id = nes.stock_id - join project p3 on p3.project_id = nep2.project_id - where p3.name = ? - and cvterm.name = ? - group by s.uniquename; - "; - - my @outlier_names; - eval { - my $sth = $dbh->prepare($sql_outliers); - $sth->execute($trial_name, $trait_name); - while (my ($name) = $sth->fetchrow_array) { - push @outlier_names, $name; - } - }; - - # Handle errors - if ($@) { - warn "Error fetching outliers for trial '$trial_name' and trait '$trait_name': $@"; - next; - } - - # Flatten outlier names - my $outliers_sql = @outlier_names ? join(", ", ("?") x @outlier_names) : 'NULL'; - - # Step 2: Fetch phenotype data for this trial–trait pair, excluding outliers + # Fetch phenotype data for this trial–trait pair, excluding outliers my $sql_phenotypes = " - select p.name as location_name, s.uniquename as plot_name, s2.uniquename as accession, - cvterm.name as trait, phenotype.value + select p.name as location_name, s.uniquename as plot_name, s2.uniquename as accession, cvterm.name as trait, phenotype.value from phenotype join nd_experiment_phenotype nep ON nep.phenotype_id = phenotype.phenotype_id join nd_experiment_project nep2 on nep2.nd_experiment_id = nep.nd_experiment_id @@ -208,12 +174,12 @@ sub get_phenotype :Path('/ajax/validatedtrials/getphenotype') Args(0){ join stock s2 on s2.stock_id = sr.object_id where p.name = ? and cvterm.name = ? - and s.uniquename not in ($outliers_sql); + and phenotype.phenotype_id not in ( select phenotype_id from phenotypeprop p3 WHERE p3.type_id = ( select cvterm_id from cvterm where name = 'phenotype_outlier')); "; eval { my $sth = $dbh->prepare($sql_phenotypes); - $sth->execute($trial_name, $trait_name, @outlier_names); + $sth->execute($trial_name, $trait_name); while (my ($location, $plot, $accession, $trait, $value) = $sth->fetchrow_array) { push @data, { From 95313432a7501a306e8eb1f42ba6aa4ee6714370 Mon Sep 17 00:00:00 2001 From: Chris Simoes Date: Thu, 9 Jan 2025 14:56:06 -0500 Subject: [PATCH 17/20] adding graphics to validatedview --- .../tools/qualityControl/validated_trials.mas | 198 ++++++++++++++++-- 1 file changed, 181 insertions(+), 17 deletions(-) diff --git a/mason/tools/qualityControl/validated_trials.mas b/mason/tools/qualityControl/validated_trials.mas index 94deece302..a305a9289f 100644 --- a/mason/tools/qualityControl/validated_trials.mas +++ b/mason/tools/qualityControl/validated_trials.mas @@ -97,7 +97,7 @@ -<&| /page/info_section.mas, title=>"Bar Plot", collapsible=>1, collapsed=>1 &> +<&| /page/info_section.mas, title=>"Graphics", collapsible=>1, collapsed=>1 &> Show bottom 25% -

- - - +
+
- - -