diff --git a/.gitignore b/.gitignore index b78af4db7..0e08d645c 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ build /gradlew /gradlew.bat .gitmodules + +# ignore compiled class files +*.class diff --git a/src/main/java/org/json/XMLTokener.java b/src/main/java/org/json/XMLTokener.java index bc18b31c9..dad2e2897 100644 --- a/src/main/java/org/json/XMLTokener.java +++ b/src/main/java/org/json/XMLTokener.java @@ -151,33 +151,108 @@ public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSON /** * Unescape an XML entity encoding; * @param e entity (only the actual entity value, not the preceding & or ending ; - * @return + * @return the unescaped entity string + * @throws JSONException if the entity is malformed */ - static String unescapeEntity(String e) { + static String unescapeEntity(String e) throws JSONException { // validate if (e == null || e.isEmpty()) { return ""; } // if our entity is an encoded unicode point, parse it. if (e.charAt(0) == '#') { - int cp; - if (e.charAt(1) == 'x' || e.charAt(1) == 'X') { - // hex encoded unicode - cp = Integer.parseInt(e.substring(2), 16); - } else { - // decimal encoded unicode - cp = Integer.parseInt(e.substring(1)); + if (e.length() < 2) { + throw new JSONException("Invalid numeric character reference: &#;"); } - return new String(new int[] {cp},0,1); - } + int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X') + ? parseHexEntity(e) + : parseDecimalEntity(e); + return new String(new int[] {cp}, 0, 1); + } Character knownEntity = entity.get(e); - if(knownEntity==null) { + if (knownEntity == null) { // we don't know the entity so keep it encoded return '&' + e + ';'; } return knownEntity.toString(); } + /** + * Parse a hexadecimal numeric character reference (e.g., "઼"). + * @param e entity string starting with '#' (e.g., "#x1F4A9") + * @return the Unicode code point + * @throws JSONException if the format is invalid + */ + private static int parseHexEntity(String e) throws JSONException { + // hex encoded unicode - need at least one hex digit after #x + if (e.length() < 3) { + throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";"); + } + String hex = e.substring(2); + if (!isValidHex(hex)) { + throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";"); + } + try { + return Integer.parseInt(hex, 16); + } catch (NumberFormatException nfe) { + throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe); + } + } + + /** + * Parse a decimal numeric character reference (e.g., "{"). + * @param e entity string starting with '#' (e.g., "#123") + * @return the Unicode code point + * @throws JSONException if the format is invalid + */ + private static int parseDecimalEntity(String e) throws JSONException { + String decimal = e.substring(1); + if (!isValidDecimal(decimal)) { + throw new JSONException("Invalid decimal character reference: &#" + decimal + ";"); + } + try { + return Integer.parseInt(decimal); + } catch (NumberFormatException nfe) { + throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe); + } + } + + /** + * Check if a string contains only valid hexadecimal digits. + * @param s the string to check + * @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F) + */ + private static boolean isValidHex(String s) { + if (s == null || s.isEmpty()) { + return false; + } + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { + return false; + } + } + return true; + } + + /** + * Check if a string contains only valid decimal digits. + * @param s the string to check + * @return true if s is non-empty and contains only digits (0-9) + */ + private static boolean isValidDecimal(String s) { + if (s == null || s.isEmpty()) { + return false; + } + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (c < '0' || c > '9') { + return false; + } + } + return true; + } + /** *
{@code 
diff --git a/src/test/java/org/json/junit/XMLTest.java b/src/test/java/org/json/junit/XMLTest.java
index 2fa5daeea..25b0a0e42 100644
--- a/src/test/java/org/json/junit/XMLTest.java
+++ b/src/test/java/org/json/junit/XMLTest.java
@@ -1426,6 +1426,81 @@ public void clarifyCurrentBehavior() {
         assertEquals(jsonObject3.getJSONObject("color").getString("value"), "008E97");
     }
 
+    /**
+     * Tests that empty numeric character reference &#; throws JSONException.
+     * Previously threw StringIndexOutOfBoundsException.
+     * Related to issue #1035
+     */
+    @Test(expected = JSONException.class)
+    public void testEmptyNumericEntityThrowsJSONException() {
+        String xmlStr = "&#;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that malformed decimal entity &#txx; throws JSONException.
+     * Previously threw NumberFormatException.
+     * Related to issue #1036
+     */
+    @Test(expected = JSONException.class)
+    public void testInvalidDecimalEntityThrowsJSONException() {
+        String xmlStr = "&#txx;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that empty hex entity &#x; throws JSONException.
+     * Validates proper input validation for hex entities.
+     */
+    @Test(expected = JSONException.class)
+    public void testEmptyHexEntityThrowsJSONException() {
+        String xmlStr = "&#x;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that invalid hex entity &#xGGG; throws JSONException.
+     * Validates hex digit validation.
+     */
+    @Test(expected = JSONException.class)
+    public void testInvalidHexEntityThrowsJSONException() {
+        String xmlStr = "&#xGGG;";
+        XML.toJSONObject(xmlStr);
+    }
+
+    /**
+     * Tests that valid decimal numeric entity A works correctly.
+     * Should decode to character 'A'.
+     */
+    @Test
+    public void testValidDecimalEntity() {
+        String xmlStr = "A";
+        JSONObject jsonObject = XML.toJSONObject(xmlStr);
+        assertEquals("A", jsonObject.getString("a"));
+    }
+
+    /**
+     * Tests that valid hex numeric entity A works correctly.
+     * Should decode to character 'A'.
+     */
+    @Test
+    public void testValidHexEntity() {
+        String xmlStr = "A";
+        JSONObject jsonObject = XML.toJSONObject(xmlStr);
+        assertEquals("A", jsonObject.getString("a"));
+    }
+
+    /**
+     * Tests that valid uppercase hex entity A works correctly.
+     * Should decode to character 'A'.
+     */
+    @Test
+    public void testValidUppercaseHexEntity() {
+        String xmlStr = "A";
+        JSONObject jsonObject = XML.toJSONObject(xmlStr);
+        assertEquals("A", jsonObject.getString("a"));
+    }
+
 }