diff --git a/grobid-core/src/main/java/org/grobid/core/data/Table.java b/grobid-core/src/main/java/org/grobid/core/data/Table.java index e6cb20d351..26885bab8c 100644 --- a/grobid-core/src/main/java/org/grobid/core/data/Table.java +++ b/grobid-core/src/main/java/org/grobid/core/data/Table.java @@ -32,6 +32,7 @@ import nu.xom.Element; import nu.xom.Node; +import static org.grobid.core.document.TEIFormatter.isNewParagraph; import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement; import static org.grobid.core.document.xml.XmlBuilderUtils.addXmlId; import static org.grobid.core.document.xml.XmlBuilderUtils.textNode; @@ -185,6 +186,7 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form } if (StringUtils.isNotBlank(labeledNote)) { + Element p = teiElement("p"); TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FULLTEXT, labeledNote, noteLayoutTokens); List clusters = clusteror.cluster(); for (TaggingTokenCluster cluster : clusters) { @@ -193,7 +195,7 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form } MarkerType citationMarkerType = null; - if (markerTypes != null && markerTypes.size()>0) { + if (CollectionUtils.isNotEmpty(markerTypes)) { citationMarkerType = markerTypes.get(0); } @@ -210,30 +212,27 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form citationMarkerType); if (refNodes != null) { for (Node n : refNodes) { - noteNode.appendChild(n); + p.appendChild(n); } } } catch(Exception e) { LOGGER.warn("Problem when serializing TEI fragment for table note", e); } } else { - noteNode.appendChild(textNode(clusterContent)); + if (p.getChildCount() > 0 && isNewParagraph(clusterLabel, p)) { + noteNode.appendChild(p); + p = teiElement("p"); + } + p.appendChild(textNode(clusterContent)); } - if (noteNode != null && config.isWithSentenceSegmentation()) { + if (config.isWithSentenceSegmentation()) { // we need a sentence segmentation of the figure caption formatter.segmentIntoSentences(noteNode, this.noteLayoutTokens, config, doc.getLanguage(), doc.getPDFAnnotations()); } - - // enclose note content in a

element - if (noteNode != null) { - noteNode.setLocalName("p"); - - Element tabNote = XmlBuilderUtils.teiElement("note"); - tabNote.appendChild(noteNode); - - noteNode = tabNote; - } + } + if (p.getChildCount() > 0) { + noteNode.appendChild(p); } } else { noteNode = XmlBuilderUtils.teiElement("note", LayoutTokensUtil.normalizeText(note.toString()).trim());