From 534ce3c4d1a2024dda21e3e6411a717896d455fe Mon Sep 17 00:00:00 2001 From: OwenSanzas Date: Tue, 27 Jan 2026 11:40:18 +0000 Subject: [PATCH 1/3] Fix input validation in XMLTokener.unescapeEntity() Fix StringIndexOutOfBoundsException and NumberFormatException in XMLTokener.unescapeEntity() when parsing malformed XML numeric character references. Issues: - &#; (empty numeric reference) caused StringIndexOutOfBoundsException - &#txx; (invalid decimal) caused NumberFormatException - &#xGGG; (invalid hex) caused NumberFormatException Changes: - Add length validation before accessing character positions - Add isValidHex() and isValidDecimal() helper methods - Throw proper JSONException with descriptive messages Fixes #1035, Fixes #1036 --- src/main/java/org/json/XMLTokener.java | 76 +++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/json/XMLTokener.java b/src/main/java/org/json/XMLTokener.java index bc18b31c9..922589dec 100644 --- a/src/main/java/org/json/XMLTokener.java +++ b/src/main/java/org/json/XMLTokener.java @@ -151,9 +151,10 @@ public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSON /** * Unescape an XML entity encoding; * @param e entity (only the actual entity value, not the preceding & or ending ; - * @return + * @return the unescaped entity string + * @throws JSONException if the entity is malformed */ - static String unescapeEntity(String e) { + static String unescapeEntity(String e) throws JSONException { // validate if (e == null || e.isEmpty()) { return ""; @@ -161,23 +162,82 @@ static String unescapeEntity(String e) { // if our entity is an encoded unicode point, parse it. if (e.charAt(0) == '#') { int cp; + // Check minimum length for numeric character reference + if (e.length() < 2) { + throw new JSONException("Invalid numeric character reference: &#;"); + } if (e.charAt(1) == 'x' || e.charAt(1) == 'X') { - // hex encoded unicode - cp = Integer.parseInt(e.substring(2), 16); + // hex encoded unicode - need at least one hex digit after #x + if (e.length() < 3) { + throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";"); + } + String hex = e.substring(2); + if (!isValidHex(hex)) { + throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";"); + } + try { + cp = Integer.parseInt(hex, 16); + } catch (NumberFormatException nfe) { + throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe); + } } else { // decimal encoded unicode - cp = Integer.parseInt(e.substring(1)); + String decimal = e.substring(1); + if (!isValidDecimal(decimal)) { + throw new JSONException("Invalid decimal character reference: &#" + decimal + ";"); + } + try { + cp = Integer.parseInt(decimal); + } catch (NumberFormatException nfe) { + throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe); + } } - return new String(new int[] {cp},0,1); - } + return new String(new int[] {cp}, 0, 1); + } Character knownEntity = entity.get(e); - if(knownEntity==null) { + if (knownEntity == null) { // we don't know the entity so keep it encoded return '&' + e + ';'; } return knownEntity.toString(); } + /** + * Check if a string contains only valid hexadecimal digits. + * @param s the string to check + * @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F) + */ + private static boolean isValidHex(String s) { + if (s == null || s.isEmpty()) { + return false; + } + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { + return false; + } + } + return true; + } + + /** + * Check if a string contains only valid decimal digits. + * @param s the string to check + * @return true if s is non-empty and contains only digits (0-9) + */ + private static boolean isValidDecimal(String s) { + if (s == null || s.isEmpty()) { + return false; + } + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (c < '0' || c > '9') { + return false; + } + } + return true; + } + /** *
{@code 

From 6c1bfbc7a58185ba915f804c0aa6e00ae4fb621b Mon Sep 17 00:00:00 2001
From: OwenSanzas 
Date: Wed, 28 Jan 2026 09:52:25 +0000
Subject: [PATCH 2/3] Refactor XMLTokener.unescapeEntity() to reduce complexity

Extracted hex and decimal parsing logic into separate methods to
address SonarQube complexity warning:
- parseHexEntity(): handles ઼ format
- parseDecimalEntity(): handles { format

This reduces cyclomatic complexity while maintaining identical
functionality and all validation checks.
---
 .gitignore                             |  3 ++
 src/main/java/org/json/XMLTokener.java | 71 ++++++++++++++++----------
 2 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/.gitignore b/.gitignore
index b78af4db7..0e08d645c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,6 @@ build
 /gradlew
 /gradlew.bat
 .gitmodules
+
+# ignore compiled class files
+*.class
diff --git a/src/main/java/org/json/XMLTokener.java b/src/main/java/org/json/XMLTokener.java
index 922589dec..dad2e2897 100644
--- a/src/main/java/org/json/XMLTokener.java
+++ b/src/main/java/org/json/XMLTokener.java
@@ -161,37 +161,12 @@ static String unescapeEntity(String e) throws JSONException {
         }
         // if our entity is an encoded unicode point, parse it.
         if (e.charAt(0) == '#') {
-            int cp;
-            // Check minimum length for numeric character reference
             if (e.length() < 2) {
                 throw new JSONException("Invalid numeric character reference: &#;");
             }
-            if (e.charAt(1) == 'x' || e.charAt(1) == 'X') {
-                // hex encoded unicode - need at least one hex digit after #x
-                if (e.length() < 3) {
-                    throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
-                }
-                String hex = e.substring(2);
-                if (!isValidHex(hex)) {
-                    throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
-                }
-                try {
-                    cp = Integer.parseInt(hex, 16);
-                } catch (NumberFormatException nfe) {
-                    throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
-                }
-            } else {
-                // decimal encoded unicode
-                String decimal = e.substring(1);
-                if (!isValidDecimal(decimal)) {
-                    throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
-                }
-                try {
-                    cp = Integer.parseInt(decimal);
-                } catch (NumberFormatException nfe) {
-                    throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
-                }
-            }
+            int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X')
+                ? parseHexEntity(e)
+                : parseDecimalEntity(e);
             return new String(new int[] {cp}, 0, 1);
         }
         Character knownEntity = entity.get(e);
@@ -202,6 +177,46 @@ static String unescapeEntity(String e) throws JSONException {
         return knownEntity.toString();
     }
 
+    /**
+     * Parse a hexadecimal numeric character reference (e.g., "઼").
+     * @param e entity string starting with '#' (e.g., "#x1F4A9")
+     * @return the Unicode code point
+     * @throws JSONException if the format is invalid
+     */
+    private static int parseHexEntity(String e) throws JSONException {
+        // hex encoded unicode - need at least one hex digit after #x
+        if (e.length() < 3) {
+            throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
+        }
+        String hex = e.substring(2);
+        if (!isValidHex(hex)) {
+            throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
+        }
+        try {
+            return Integer.parseInt(hex, 16);
+        } catch (NumberFormatException nfe) {
+            throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
+        }
+    }
+
+    /**
+     * Parse a decimal numeric character reference (e.g., "{").
+     * @param e entity string starting with '#' (e.g., "#123")
+     * @return the Unicode code point
+     * @throws JSONException if the format is invalid
+     */
+    private static int parseDecimalEntity(String e) throws JSONException {
+        String decimal = e.substring(1);
+        if (!isValidDecimal(decimal)) {
+            throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
+        }
+        try {
+            return Integer.parseInt(decimal);
+        } catch (NumberFormatException nfe) {
+            throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
+        }
+    }
+
     /**
      * Check if a string contains only valid hexadecimal digits.
      * @param s the string to check

From 592e7828d9729c35053a595134e137098a053177 Mon Sep 17 00:00:00 2001
From: OwenSanzas 
Date: Wed, 28 Jan 2026 09:58:35 +0000
Subject: [PATCH 3/3] Add unit tests for XMLTokener.unescapeEntity() input
 validation

Added comprehensive test coverage for numeric character reference parsing:

Exception cases (should throw JSONException):
- Empty numeric entity: &#;
- Invalid decimal entity: &#txx;
- Empty hex entity: &#x;
- Invalid hex characters: &#xGGG;

Valid cases (should parse correctly):
- Decimal entity: A -> 'A'
- Lowercase hex entity: A -> 'A'
- Uppercase hex entity: A -> 'A'

These tests verify the fixes for issues #1035 and #1036.
---
 src/test/java/org/json/junit/XMLTest.java | 75 +++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/src/test/java/org/json/junit/XMLTest.java b/src/test/java/org/json/junit/XMLTest.java
index 2fa5daeea..25b0a0e42 100644
--- a/src/test/java/org/json/junit/XMLTest.java
+++ b/src/test/java/org/json/junit/XMLTest.java
@@ -1426,6 +1426,81 @@ public void clarifyCurrentBehavior() {
         assertEquals(jsonObject3.getJSONObject("color").getString("value"), "008E97");
     }
 
+    /**
+     * Tests that empty numeric character reference &#; throws JSONException.
+     * Previously threw StringIndexOutOfBoundsException.
+     * Related to issue #1035
+     */
+    @Test(expected = JSONException.class)
+    public void testEmptyNumericEntityThrowsJSONException() {
+        String xmlStr = "&#;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that malformed decimal entity &#txx; throws JSONException.
+     * Previously threw NumberFormatException.
+     * Related to issue #1036
+     */
+    @Test(expected = JSONException.class)
+    public void testInvalidDecimalEntityThrowsJSONException() {
+        String xmlStr = "&#txx;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that empty hex entity &#x; throws JSONException.
+     * Validates proper input validation for hex entities.
+     */
+    @Test(expected = JSONException.class)
+    public void testEmptyHexEntityThrowsJSONException() {
+        String xmlStr = "&#x;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that invalid hex entity &#xGGG; throws JSONException.
+     * Validates hex digit validation.
+     */
+    @Test(expected = JSONException.class)
+    public void testInvalidHexEntityThrowsJSONException() {
+        String xmlStr = "&#xGGG;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that valid decimal numeric entity A works correctly.
+     * Should decode to character 'A'.
+     */
+    @Test
+    public void testValidDecimalEntity() {
+        String xmlStr = "A";
+        JSONObject jsonObject = XML.toJSONObject(xmlStr);
+        assertEquals("A", jsonObject.getString("a"));
+    }
+
+    /**
+     * Tests that valid hex numeric entity A works correctly.
+     * Should decode to character 'A'.
+     */
+    @Test
+    public void testValidHexEntity() {
+        String xmlStr = "A";
+        JSONObject jsonObject = XML.toJSONObject(xmlStr);
+        assertEquals("A", jsonObject.getString("a"));
+    }
+
+    /**
+     * Tests that valid uppercase hex entity A works correctly.
+     * Should decode to character 'A'.
+     */
+    @Test
+    public void testValidUppercaseHexEntity() {
+        String xmlStr = "A";
+        JSONObject jsonObject = XML.toJSONObject(xmlStr);
+        assertEquals("A", jsonObject.getString("a"));
+    }
+
 }