Skip to content

Commit

Permalink
Handle missing prefix on "rdf:about" (#5)
Browse files Browse the repository at this point in the history
* Handle the fact that sometimes "rdf:about" is lacking the prefix.
* Fix for XML parsers that read empty namespace.
  • Loading branch information
StefanOltmann authored Jul 24, 2023
1 parent 937285f commit cca179c
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 77 deletions.
112 changes: 71 additions & 41 deletions src/commonMain/kotlin/com/ashampoo/xmp/impl/Utils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,19 @@ object Utils {
*/
const val UUID_LENGTH = 32 + UUID_SEGMENT_COUNT

const val HEX_RADIX = 16

private const val XML_NAME_LENGTH = 0x0100

/**
* table of XML name start chars (<= 0xFF)
*/
private val xmlNameStartChars = BooleanArray(0x0100)
private val xmlNameStartChars = BooleanArray(XML_NAME_LENGTH)

/**
* table of XML name chars (<= 0xFF)
*/
private val xmlNameChars = BooleanArray(0x0100)
private val xmlNameChars = BooleanArray(XML_NAME_LENGTH)

private val controlCharRegex = Regex("[\\p{Cntrl}]")

Expand All @@ -48,18 +52,16 @@ object Utils {
* normalization rules:
*
* * The primary subtag is lower case, the suggested practice of ISO 639.
* * All 2 letter secondary subtags are upper case, the suggested
* practice of ISO 3166.
* * All 2 letter secondary subtags are upper case, the suggested practice of ISO 3166.
* * All other subtags are lower case.
*
*
* @param value raw value
* @return Returns the normalized value.
*/
@kotlin.jvm.JvmStatic
fun normalizeLangValue(value: String): String {

// don't normalize x-default
/* Don't normalize x-default */
if (XMPConst.X_DEFAULT == value)
return value

Expand All @@ -80,12 +82,14 @@ object Utils {
/* Leave as is. */
}

else ->
else -> {

/* Convert second subtag to uppercase, all other to lowercase */
if (subTag != 2)
buffer.append(value[i].lowercaseChar())
else
buffer.append(value[i].uppercaseChar())
}
}
}

Expand All @@ -98,10 +102,11 @@ object Utils {
* * [qualName="value"] - An element in an array of structs, chosen by a field value.
* * [?qualName="value"] - An element in an array, chosen by a qualifier value.
*
* The value portion is a string quoted by ''' or '"'. The value may contain
* any character including a doubled quoting character. The value may be
* empty. *Note:* It is assumed that the expression is formal
* correct
* The value portion is a string quoted by ''' or '"'.
* The value may contain any character including a doubled quoting character.
* The value may be empty.
*
* *Note:* It is assumed that the expression is formal correct
*
* @param selector the selector
* @return Returns an array where the first entry contains the name and the second the value.
Expand Down Expand Up @@ -149,12 +154,13 @@ object Utils {
* Check some requirements for an UUID:
*
* * Length of the UUID is 32
* * The Delimiter count is 4 and all the 4 delimiter are on their right position (8,13,18,23)
* * The Delimiter count is 4 and all the 4 delimiter are on their right position (8, 13, 18, 23)
*
* @param uuid uuid to test
* @return true - this is a well formed UUID, false - UUID has not the expected format
*/
@kotlin.jvm.JvmStatic
@Suppress("MagicNumber")
fun checkUUIDFormat(uuid: String?): Boolean {

var result = true
Expand All @@ -167,7 +173,9 @@ object Utils {
while (delimPos < uuid.length) {

if (uuid[delimPos] == '-') {

delimCnt++

result = result && (delimPos == 8 || delimPos == 13 || delimPos == 18 || delimPos == 23)
}

Expand All @@ -178,10 +186,10 @@ object Utils {
}

/**
* Simple check for valid XMLNames. Within ASCII range<br></br>
* ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br></br>
* are accepted, above all characters (which is not entirely
* correct according to the XML Spec.
* Simple check for valid XMLNames. Within ASCII range
* ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]
* are accepted, above all characters
* (which is not entirely correct according to the XML Spec).
*
* @param name an XML Name
* @return Return `true` if the name is correct.
Expand Down Expand Up @@ -233,18 +241,21 @@ object Utils {
* @return Returns the value ready for XML output.
*/
@kotlin.jvm.JvmStatic
@Suppress("ComplexCondition", "kotlin:S3776")
fun escapeXML(value: String, forAttribute: Boolean, escapeWhitespaces: Boolean): String {

// quick check if character are contained that need special treatment
var needsEscaping = false

for (index in 0 until value.length) {

val c = value[index]
val char = value[index]

val isControlChar = char == '\t' || char == '\n' || char == '\r'

if (
c == '<' || c == '>' || c == '&' || escapeWhitespaces &&
(c == '\t' || c == '\n' || c == '\r') || forAttribute && c == '"'
char == '<' || char == '>' || char == '&' ||
escapeWhitespaces && isControlChar || forAttribute && char == '"'
) {
needsEscaping = true
break
Expand All @@ -260,7 +271,9 @@ object Utils {
@Suppress("LoopWithTooManyJumpStatements")
for (char in value) {

if (!(escapeWhitespaces && (char == '\t' || char == '\n' || char == '\r'))) {
val isControlChar = char == '\t' || char == '\n' || char == '\r'

if (!(escapeWhitespaces && isControlChar)) {

when (char) {

Expand Down Expand Up @@ -295,7 +308,7 @@ object Utils {
// write control chars escaped,
// if there are others than tab, LF and CR the xml will become invalid.
buffer.append("&#x")
buffer.append(char.code.toString(16).uppercase())
buffer.append(char.code.toString(HEX_RADIX).uppercase())
buffer.append(';')
}
}
Expand All @@ -318,47 +331,64 @@ object Utils {
* All characters according to the XML Spec 1.1 are accepted:
* http://www.w3.org/TR/xml11/#NT-NameStartChar
*
* @param ch a character
* @param char a character
* @return Returns true if the character is a valid first char of an XML name.
*/
private fun isNameStartChar(ch: Char): Boolean =
ch.code <= 0xFF && xmlNameStartChars[ch.code] || ch.code >= 0x100 && ch.code <= 0x2FF ||
ch.code >= 0x370 && ch.code <= 0x37D || ch.code >= 0x37F && ch.code <= 0x1FFF ||
ch.code >= 0x200C && ch.code <= 0x200D || ch.code >= 0x2070 && ch.code <= 0x218F ||
ch.code >= 0x2C00 && ch.code <= 0x2FEF || ch.code >= 0x3001 && ch.code <= 0xD7FF ||
ch.code >= 0xF900 && ch.code <= 0xFDCF || ch.code >= 0xFDF0 && ch.code <= 0xFFFD ||
ch.code >= 0x10000 && ch.code <= 0xEFFFF
@Suppress("MagicNumber", "kotlin:S3776")
private fun isNameStartChar(char: Char): Boolean =
char.code <= 0xFF && xmlNameStartChars[char.code] ||
char.code >= 0x100 && char.code <= 0x2FF ||
char.code >= 0x370 && char.code <= 0x37D ||
char.code >= 0x37F && char.code <= 0x1FFF ||
char.code >= 0x200C && char.code <= 0x200D ||
char.code >= 0x2070 && char.code <= 0x218F ||
char.code >= 0x2C00 && char.code <= 0x2FEF ||
char.code >= 0x3001 && char.code <= 0xD7FF ||
char.code >= 0xF900 && char.code <= 0xFDCF ||
char.code >= 0xFDF0 && char.code <= 0xFFFD ||
char.code >= 0x10000 && char.code <= 0xEFFFF

/**
* Simple check if a character is a valid XML name char
* (every char except the first one), according to the XML Spec 1.1:
* http://www.w3.org/TR/xml11/#NT-NameChar
*
* @param ch a character
* @param char a character
* @return Returns true if the character is a valid char of an XML name.
*/
private fun isNameChar(ch: Char): Boolean =
ch.code <= 0xFF && xmlNameChars[ch.code] || isNameStartChar(ch) ||
ch.code >= 0x300 && ch.code <= 0x36F || ch.code >= 0x203F && ch.code <= 0x2040
@Suppress("MagicNumber")
private fun isNameChar(char: Char): Boolean =
char.code <= 0xFF && xmlNameChars[char.code] ||
isNameStartChar(char) ||
char.code >= 0x300 && char.code <= 0x36F ||
char.code >= 0x203F && char.code <= 0x2040

/**
* Initializes the char tables for the chars 0x00-0xFF for later use,
* according to the XML 1.1 specification at http://www.w3.org/TR/xml11
*/
@Suppress("MagicNumber")
private fun initCharTables() {

var ch = 0.toChar()
var char = 0.toChar()

while (ch < xmlNameChars.size.toChar()) {
while (char < xmlNameChars.size.toChar()) {

xmlNameStartChars[ch.code] = ch == ':' || 'A' <= ch && ch <= 'Z' || ch == '_' ||
'a' <= ch && ch <= 'z' || 0xC0 <= ch.code && ch.code <= 0xD6 ||
0xD8 <= ch.code && ch.code <= 0xF6 || 0xF8 <= ch.code && ch.code <= 0xFF
xmlNameStartChars[char.code] = char == ':' ||
'A' <= char && char <= 'Z' ||
char == '_' ||
'a' <= char && char <= 'z' ||
0xC0 <= char.code && char.code <= 0xD6 ||
0xD8 <= char.code && char.code <= 0xF6 ||
0xF8 <= char.code && char.code <= 0xFF

xmlNameChars[ch.code] = xmlNameStartChars[ch.code] || ch == '-' || ch == '.' ||
'0' <= ch && ch <= '9' || ch.code == 0xB7
xmlNameChars[char.code] = xmlNameStartChars[char.code] ||
char == '-' ||
char == '.' ||
'0' <= char && char <= '9' ||
char.code == 0xB7

ch++
char++
}
}
}
26 changes: 16 additions & 10 deletions src/commonMain/kotlin/com/ashampoo/xmp/impl/XMPRDFParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ internal object XMPRDFParser {
XMPError.BADRDF
)

// Fix a legacy DC namespace
/* Fix a legacy DC namespace */
if (XMPConst.NS_DC_DEPRECATED == namespace)
namespace = XMPConst.NS_DC

Expand Down Expand Up @@ -1107,20 +1107,26 @@ internal object XMPRDFParser {
*/
private fun getRDFTermKind(node: Node): Int {

var namespace = when (node) {
val namespace = when (node) {
is Element -> node.namespaceURI
is Attr -> node.namespaceURI
else -> throw XMPException("Unknown Node ${node.nodeType}", XMPError.BADXMP)
}

if (namespace == null &&
/*
* This code handles the fact that sometimes "rdf:about" and "rdf:ID"
* come without the prefix.
*
* Note that the check for the namespace must be for NULL or EMPTY, because depending
* on the used XML parser implementation the resulting namespace may be an empty string.
*/
@Suppress("ComplexCondition")
val mustBeRdfNamespace = namespace.isNullOrEmpty() &&
("about" == node.nodeName || "ID" == node.nodeName) &&
node is Attr && XMPConst.NS_RDF == node.ownerElement?.namespaceURI
) {
namespace = XMPConst.NS_RDF
}
node is Attr &&
XMPConst.NS_RDF == node.ownerElement?.namespaceURI

if (namespace == XMPConst.NS_RDF) {
if (mustBeRdfNamespace || namespace == XMPConst.NS_RDF) {

when (node.nodeName) {

Expand All @@ -1133,7 +1139,7 @@ internal object XMPRDFParser {
"rdf:Description" ->
return RDFTERM_DESCRIPTION

"rdf:about" ->
"rdf:about", "about" ->
return RDFTERM_ABOUT

"resource" ->
Expand All @@ -1142,7 +1148,7 @@ internal object XMPRDFParser {
"rdf:RDF" ->
return RDFTERM_RDF

"ID" ->
"rdf:ID", "ID" ->
return RDFTERM_ID

"nodeID" ->
Expand Down
52 changes: 26 additions & 26 deletions src/commonTest/resources/com/ashampoo/xmp/sample_1.xmp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image::ExifTool 12.27'>
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>

<rdf:Description rdf:about=''
<rdf:Description about=''
xmlns:darktable='http://darktable.sf.net/'>
<darktable:auto_presets_applied>1</darktable:auto_presets_applied>
<darktable:history>
Expand Down Expand Up @@ -181,28 +181,28 @@
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end='w'?>
























<?xpacket end='w'?>

0 comments on commit cca179c

Please sign in to comment.