Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 49 additions & 7 deletions src/main/java/org/spdx/utility/DownloadCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import java.io.Reader;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
Expand All @@ -44,6 +46,9 @@
import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
Expand Down Expand Up @@ -107,7 +112,7 @@ private DownloadCache() {
try {
final File cacheDirectory = new File(cacheDir);
Files.createDirectories(cacheDirectory.toPath());
} catch (IOException ioe) {
} catch (final IOException ioe) {
logger.warn("Unable to create cache directory '{}'; continuing with cache disabled.", cacheDir, ioe);
tmpCacheEnabled = false;
}
Expand All @@ -116,7 +121,7 @@ private DownloadCache() {
long tmpCacheCheckIntervalSecs = DEFAULT_CACHE_CHECK_INTERVAL_SECS;
try {
tmpCacheCheckIntervalSecs = Long.parseLong(Configuration.getInstance().getProperty(CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS));
} catch(NumberFormatException nfe) {
} catch (final NumberFormatException nfe) {
// Ignore parse failures - in this case we use the default value of 24 hours
}
cacheCheckIntervalSecs = tmpCacheCheckIntervalSecs;
Expand Down Expand Up @@ -162,16 +167,41 @@ public void resetCache() throws IOException {
}

/**
* @param url The URL to get an input stream for. Note that redirects issued by this url are restricted to known
* SPDX hosts. Redirects to other hosts will cause an IOException to be thrown.
* @param url The URL to get an input stream for. Notes: redirects issued by this url are restricted to known
* SPDX hosts; redirects to other hosts will cause an IOException to be thrown.
* @return An InputStream for url, or null if url is null. Note that this InputStream may be of different concrete
* types, depending on whether the content is being served out of cache or not.
* types, depending on whether the content is being served out of cache or not.
* @throws IOException When an IO error of some kind occurs.
*/
public InputStream getUrlInputStream(final URL url) throws IOException {
return getUrlInputStream(url, true);
}

/**
* @param url The URL to normalize.
* @return A normalized rendition of the url, as a String.
*/
private static String normalizeURL(final URL url) {
String result = null;

if (url != null) {
try {
URI uri = new URI(url.toString()).normalize(); // JDK normalization

// Then manually strip fragment as well
uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), uri.getQuery(), null);
result = uri.toString();
} catch (final URISyntaxException e) {
result = url.toString(); // Fallback on naive stringification if normalization fails
}
}

return result;
}

// A collection of per-URL locks - note that this will grow without bound as the number of URLs requested through the cache grows
private final ConcurrentHashMap<String, ReentrantLock> perUrlLocks = new ConcurrentHashMap<>();

/**
* @param url The URL to get an input stream for.
* @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX
Expand All @@ -182,9 +212,21 @@ public InputStream getUrlInputStream(final URL url) throws IOException {
*/
public InputStream getUrlInputStream(final URL url, final boolean restrictRedirects) throws IOException {
InputStream result = null;

if (url != null) {
if (cacheEnabled) {
result = getUrlInputStreamThroughCache(url, restrictRedirects);
// Per-URL critical section (to prevent cache stampede)
final String normalizedUrl = normalizeURL(url);
perUrlLocks.computeIfAbsent(normalizedUrl, k -> new ReentrantLock());
final Lock lock = perUrlLocks.get(normalizedUrl);
lock.lock();

try {
result = getUrlInputStreamThroughCache(url, restrictRedirects);
} finally {
lock.unlock();
}
// End of per-URL critical section
} else {
result = getUrlInputStreamDirect(url, restrictRedirects);
}
Expand Down Expand Up @@ -224,7 +266,7 @@ private InputStream getUrlInputStreamDirect(URL url, boolean restrictRedirects)
* @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX
* hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF!
* @return An InputStream for url, or null if url is null. Note that this InputStream may be of different concrete
* types, depending on whether the content is being served out of cache or not.
* types, depending on whether the content is being served out of cache or not.
* @throws IOException When an IO error of some kind occurs.
*/
private InputStream getUrlInputStreamThroughCache(final URL url, boolean restrictRedirects) throws IOException {
Expand Down