Skip to content

Commit 4af6882

Browse files
Refactor header renaming logic to adress #1052, #1007 (#1058)
And updated test to match the improved renaming logic.
1 parent cf4bded commit 4af6882

File tree

2 files changed

+45
-52
lines changed

2 files changed

+45
-52
lines changed

Diff for: papaparse.js

+43-50
Original file line numberDiff line numberDiff line change
@@ -1480,61 +1480,14 @@ License: MIT
14801480
if (!input)
14811481
return returnable();
14821482

1483-
// Rename headers if there are duplicates
1484-
var firstLine;
1485-
if (config.header && !baseIndex)
1486-
{
1487-
firstLine = input.split(newline)[0];
1488-
var headers = firstLine.split(delim);
1489-
var separator = '_';
1490-
var headerMap = new Set();
1491-
var headerCount = {};
1492-
var duplicateHeaders = false;
1493-
1494-
// Using old-style 'for' loop to avoid prototype pollution that would be picked up with 'var j in headers'
1495-
for (var j = 0; j < headers.length; j++) {
1496-
var header = headers[j];
1497-
if (isFunction(config.transformHeader))
1498-
header = config.transformHeader(header, j);
1499-
var headerName = header;
1500-
1501-
var count = headerCount[header] || 0;
1502-
if (count > 0) {
1503-
duplicateHeaders = true;
1504-
headerName = header + separator + count;
1505-
// Initialise the variable if it hasn't been.
1506-
if (renamedHeaders === null) {
1507-
renamedHeaders = {};
1508-
}
1509-
}
1510-
headerCount[header] = count + 1;
1511-
// In case it already exists, we add more separators
1512-
while (headerMap.has(headerName)) {
1513-
headerName = headerName + separator + count;
1514-
}
1515-
headerMap.add(headerName);
1516-
if (count > 0) {
1517-
renamedHeaders[headerName] = header;
1518-
}
1519-
}
1520-
if (duplicateHeaders) {
1521-
var editedInput = input.split(newline);
1522-
editedInput[0] = Array.from(headerMap).join(delim);
1523-
input = editedInput.join(newline);
1524-
}
1525-
}
15261483
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
15271484
{
15281485
var rows = input.split(newline);
15291486
for (var i = 0; i < rows.length; i++)
15301487
{
15311488
row = rows[i];
1532-
// use firstline as row length may be changed due to duplicated headers
1533-
if (i === 0 && firstLine !== undefined) {
1534-
cursor += firstLine.length;
1535-
}else{
1536-
cursor += row.length;
1537-
}
1489+
cursor += row.length;
1490+
15381491
if (i !== rows.length - 1)
15391492
cursor += newline.length;
15401493
else if (ignoreLastRow)
@@ -1729,7 +1682,6 @@ License: MIT
17291682
break;
17301683
}
17311684

1732-
17331685
return finish();
17341686

17351687

@@ -1789,6 +1741,47 @@ License: MIT
17891741
/** Returns an object with the results, errors, and meta. */
17901742
function returnable(stopped)
17911743
{
1744+
if (config.header && !baseIndex && data.length)
1745+
{
1746+
const result = data[0];
1747+
const headerCount = {}; // To track the count of each base header
1748+
const usedHeaders = new Set(result); // To track used headers and avoid duplicates
1749+
let duplicateHeaders = false;
1750+
1751+
for (let i = 0; i < result.length; i++) {
1752+
let header = result[i];
1753+
if (isFunction(config.transformHeader))
1754+
header = config.transformHeader(header, i);
1755+
1756+
if (!headerCount[header]) {
1757+
headerCount[header] = 1;
1758+
result[i] = header;
1759+
} else {
1760+
let newHeader;
1761+
let suffixCount = headerCount[header];
1762+
1763+
// Find a unique new header
1764+
do {
1765+
newHeader = `${header}_${suffixCount}`;
1766+
suffixCount++;
1767+
} while (usedHeaders.has(newHeader));
1768+
1769+
usedHeaders.add(newHeader); // Mark this new Header as used
1770+
result[i] = newHeader;
1771+
headerCount[header]++;
1772+
duplicateHeaders = true;
1773+
if (renamedHeaders === null) {
1774+
renamedHeaders = {};
1775+
}
1776+
renamedHeaders[newHeader] = header;
1777+
}
1778+
1779+
usedHeaders.add(header); // Ensure the original header is marked as used
1780+
}
1781+
if (duplicateHeaders) {
1782+
console.warn('Duplicate headers found and renamed.');
1783+
}
1784+
}
17921785
return {
17931786
data: data,
17941787
errors: errors,

Diff for: tests/test-cases.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -618,10 +618,10 @@ var CORE_PARSER_TESTS = [
618618
input: 'c,c,c,c_1\n1,2,3,4',
619619
config: { header: true },
620620
expected: {
621-
data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
621+
data: [['c', 'c_2', 'c_3', 'c_1'], ['1', '2', '3', '4']],
622622
errors: [],
623623
meta: {
624-
renamedHeaders: {c_1: 'c', c_2: 'c'},
624+
renamedHeaders: {c_2: 'c', c_3: 'c'},
625625
cursor: 17
626626
}
627627
}

0 commit comments

Comments
 (0)