Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -2317,9 +2317,19 @@ class PartialEvaluator {
return;
}
// Other marked content types aren't supported yet.
let props = null;
if (args[1] instanceof Dict) {
const lang = args[1].get("Lang");
if (typeof lang === "string") {
props = Object.create(null);
props.lang = stringToPDFString(lang);
}
}

args = [
args[0].name,
args[1] instanceof Dict ? args[1].get("MCID") : null,
props,
];

break;
Expand Down Expand Up @@ -3505,15 +3515,21 @@ class PartialEvaluator {
markedContentData.level++;

let mcid = null;
let itemLang = null;
if (args[1] instanceof Dict) {
mcid = args[1].get("MCID");
const langString = args[1].get("Lang");
if (typeof langString === "string") {
itemLang = stringToPDFString(langString);
}
}
textContent.items.push({
type: "beginMarkedContentProps",
id: Number.isInteger(mcid)
? `${self.idFactory.getPageObjId()}_mc${mcid}`
: null,
tag: args[0] instanceof Name ? args[0].name : null,
lang: itemLang,
});
}
break;
Expand Down
2 changes: 2 additions & 0 deletions src/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,8 @@ class PDFDocumentProxy {
* 'beginMarkedContentProps', or 'endMarkedContent'.
* @property {string} id - The marked content identifier. Only used for type
* 'beginMarkedContentProps'.
* @property {string|null} tag - The marked content tag.
* @property {string|null} lang - The lang attribute for the marked content.
*/

/**
Expand Down
3 changes: 3 additions & 0 deletions src/display/text_layer.js
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@ class TextLayer {
if (item.id) {
this.#container.setAttribute("id", `${item.id}`);
}
if (item.lang) {
this.#container.setAttribute("lang", item.lang);
}
parent.append(this.#container);
} else if (item.type === "endMarkedContent") {
this.#container = this.#container.parentNode;
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,7 @@
!bug1937438_af_from_latex.pdf
!bug1937438_from_word.pdf
!bug1937438_mml_from_latex.pdf
!marked_content_lang.pdf
!bug1997343.pdf
!doc_1_3_pages.pdf
!doc_2_3_pages.pdf
Expand Down
Binary file added test/pdfs/marked_content_lang.pdf
Binary file not shown.
17 changes: 17 additions & 0 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -4488,6 +4488,23 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});

it("gets operatorList, with marked content lang", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("marked_content_lang.pdf")
);
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const opList = await pdfPage.getOperatorList({
annotationMode: AnnotationMode.DISABLE,
});
expect(opList.fnArray[0]).toEqual(OPS.beginMarkedContentProps);
expect(opList.argsArray[0][0]).toEqual("P");
expect(opList.argsArray[0][2]?.lang).toEqual("en-US");
expect(opList.fnArray[10]).toEqual(OPS.beginMarkedContentProps);
expect(opList.argsArray[10][0]).toEqual("P");
expect(opList.argsArray[10][2]?.lang).toEqual("es-ES");
});

it("gets operatorList, with page resources containing corrupt /CCITTFaxDecode data", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("poppler-90-0-fuzzed.pdf")
Expand Down
25 changes: 25 additions & 0 deletions test/unit/text_layer_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,29 @@ describe("textLayer", function () {

await loadingTask.destroy();
});

it("handles lang attribute for marked content", async function () {
if (isNodeJS) {
pending("document.createElement is not supported in Node.js.");
}
const loadingTask = getDocument(
buildGetDocumentParams("marked_content_lang.pdf")
);
const pdfDocument = await loadingTask.promise;
const page = await pdfDocument.getPage(1);

const container = document.createElement("div");
const textLayer = new TextLayer({
textContentSource: page.streamTextContent({
includeMarkedContent: true,
}),
container,
viewport: page.getViewport({ scale: 1 }),
});
await textLayer.render();

const span = container.querySelector("#p17R_mc1");
expect(span.getAttribute("lang")).toEqual("es-ES");
expect(span.textContent).toEqual("Esto es español");
});
});