Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ build
/gradlew
/gradlew.bat
.gitmodules

# ignore compiled class files
*.class
99 changes: 87 additions & 12 deletions src/main/java/org/json/XMLTokener.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,33 +151,108 @@ public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSON
/**
* Unescape an XML entity encoding;
* @param e entity (only the actual entity value, not the preceding & or ending ;
* @return
* @return the unescaped entity string
* @throws JSONException if the entity is malformed
*/
static String unescapeEntity(String e) {
static String unescapeEntity(String e) throws JSONException {
// validate
if (e == null || e.isEmpty()) {
return "";
}
// if our entity is an encoded unicode point, parse it.
if (e.charAt(0) == '#') {
int cp;
if (e.charAt(1) == 'x' || e.charAt(1) == 'X') {
// hex encoded unicode
cp = Integer.parseInt(e.substring(2), 16);
} else {
// decimal encoded unicode
cp = Integer.parseInt(e.substring(1));
if (e.length() < 2) {
throw new JSONException("Invalid numeric character reference: &#;");
}
return new String(new int[] {cp},0,1);
}
int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X')
? parseHexEntity(e)
: parseDecimalEntity(e);
return new String(new int[] {cp}, 0, 1);
}
Character knownEntity = entity.get(e);
if(knownEntity==null) {
if (knownEntity == null) {
// we don't know the entity so keep it encoded
return '&' + e + ';';
}
return knownEntity.toString();
}

/**
* Parse a hexadecimal numeric character reference (e.g., "&#xABC;").
* @param e entity string starting with '#' (e.g., "#x1F4A9")
* @return the Unicode code point
* @throws JSONException if the format is invalid
*/
private static int parseHexEntity(String e) throws JSONException {
// hex encoded unicode - need at least one hex digit after #x
if (e.length() < 3) {
throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
}
String hex = e.substring(2);
if (!isValidHex(hex)) {
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
}
try {
return Integer.parseInt(hex, 16);
} catch (NumberFormatException nfe) {
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
}
}

/**
* Parse a decimal numeric character reference (e.g., "&#123;").
* @param e entity string starting with '#' (e.g., "#123")
* @return the Unicode code point
* @throws JSONException if the format is invalid
*/
private static int parseDecimalEntity(String e) throws JSONException {
String decimal = e.substring(1);
if (!isValidDecimal(decimal)) {
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
}
try {
return Integer.parseInt(decimal);
} catch (NumberFormatException nfe) {
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
}
}

/**
* Check if a string contains only valid hexadecimal digits.
* @param s the string to check
* @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F)
*/
private static boolean isValidHex(String s) {
if (s == null || s.isEmpty()) {
return false;
}
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
return false;
}
}
return true;
}

/**
* Check if a string contains only valid decimal digits.
* @param s the string to check
* @return true if s is non-empty and contains only digits (0-9)
*/
private static boolean isValidDecimal(String s) {
if (s == null || s.isEmpty()) {
return false;
}
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c < '0' || c > '9') {
return false;
}
}
return true;
}


/**
* <pre>{@code
Expand Down
75 changes: 75 additions & 0 deletions src/test/java/org/json/junit/XMLTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,81 @@ public void clarifyCurrentBehavior() {
assertEquals(jsonObject3.getJSONObject("color").getString("value"), "008E97");
}

/**
* Tests that empty numeric character reference &#; throws JSONException.
* Previously threw StringIndexOutOfBoundsException.
* Related to issue #1035
*/
@Test(expected = JSONException.class)
public void testEmptyNumericEntityThrowsJSONException() {
String xmlStr = "<a>&#;</a>";
XML.toJSONObject(xmlStr);
}

/**
* Tests that malformed decimal entity &#txx; throws JSONException.
* Previously threw NumberFormatException.
* Related to issue #1036
*/
@Test(expected = JSONException.class)
public void testInvalidDecimalEntityThrowsJSONException() {
String xmlStr = "<a>&#txx;</a>";
XML.toJSONObject(xmlStr);
}

/**
* Tests that empty hex entity &#x; throws JSONException.
* Validates proper input validation for hex entities.
*/
@Test(expected = JSONException.class)
public void testEmptyHexEntityThrowsJSONException() {
String xmlStr = "<a>&#x;</a>";
XML.toJSONObject(xmlStr);
}

/**
* Tests that invalid hex entity &#xGGG; throws JSONException.
* Validates hex digit validation.
*/
@Test(expected = JSONException.class)
public void testInvalidHexEntityThrowsJSONException() {
String xmlStr = "<a>&#xGGG;</a>";
XML.toJSONObject(xmlStr);
}

/**
* Tests that valid decimal numeric entity &#65; works correctly.
* Should decode to character 'A'.
*/
@Test
public void testValidDecimalEntity() {
String xmlStr = "<a>&#65;</a>";
JSONObject jsonObject = XML.toJSONObject(xmlStr);
assertEquals("A", jsonObject.getString("a"));
}

/**
* Tests that valid hex numeric entity &#x41; works correctly.
* Should decode to character 'A'.
*/
@Test
public void testValidHexEntity() {
String xmlStr = "<a>&#x41;</a>";
JSONObject jsonObject = XML.toJSONObject(xmlStr);
assertEquals("A", jsonObject.getString("a"));
}

/**
* Tests that valid uppercase hex entity &#X41; works correctly.
* Should decode to character 'A'.
*/
@Test
public void testValidUppercaseHexEntity() {
String xmlStr = "<a>&#X41;</a>";
JSONObject jsonObject = XML.toJSONObject(xmlStr);
assertEquals("A", jsonObject.getString("a"));
}

}


Expand Down
Loading