diff --git a/.gitignore b/.gitignore index da26607..8717152 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ Gemfile.lock /test/tmp/ /test/version_tmp/ /tmp/ +/vendor # Used by dotenv library to load environment variables. # .env @@ -218,7 +219,8 @@ flycheck_*.el .LSOverride # Icon must end with two \r -Icon +Icon + # Thumbnails ._* diff --git a/.ruby-version b/.ruby-version index 0ddaf4d..eb39e53 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -~> 3.3 +3.3 diff --git a/CHANGES.md b/CHANGES.md index df4c001..2c1b190 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +# 0.2.1 (2026-02-04) + +- Add `URIs::HeadCheck`, an OkComputer check that performs a HEAD request + to verify the availability of a URL, with optional basic authentication. +- updates Requester to support timeouts values passed to RestClient + # 0.2.0 (2025-07-24) - Update to support Ruby 3.3+. diff --git a/berkeley_library-util.gemspec b/berkeley_library-util.gemspec index 6c592a5..399c353 100644 --- a/berkeley_library-util.gemspec +++ b/berkeley_library-util.gemspec @@ -28,6 +28,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency 'ci_reporter_rspec', '~> 1.0' spec.add_development_dependency 'colorize', '~> 1.0' spec.add_development_dependency 'dotenv', '~> 2.7' + spec.add_development_dependency 'okcomputer', '~> 1.19' spec.add_development_dependency 'rake', '~> 13.0' spec.add_development_dependency 'rspec', '~> 3.10' spec.add_development_dependency 'rubocop', '~> 1.78.0' diff --git a/lib/berkeley_library/util.rb b/lib/berkeley_library/util.rb index 6952166..994db74 100644 --- a/lib/berkeley_library/util.rb +++ b/lib/berkeley_library/util.rb @@ -1 +1 @@ -Dir.glob(File.expand_path('util/*.rb', __dir__)).sort.each(&method(:require)) +Dir.glob(File.expand_path('util/*.rb', __dir__)).each(&method(:require)) diff --git a/lib/berkeley_library/util/arrays.rb b/lib/berkeley_library/util/arrays.rb index 11d893e..6a4b24d 100644 --- a/lib/berkeley_library/util/arrays.rb +++ b/lib/berkeley_library/util/arrays.rb @@ -63,8 +63,8 @@ def count_while(values:) # @yieldparam target [Object] the value to compare against # @return [Array, nil] the indices in `in_array` of each value in `for_array`, # or `nil` if not all values could be found - def find_indices(for_array:, in_array:, &block) - return find_indices_matching(for_array, in_array, &block) if block_given? + def find_indices(for_array:, in_array:, &) + return find_indices_matching(for_array, in_array, &) if block_given? find_all_indices(for_array, in_array) end @@ -89,10 +89,10 @@ def find_indices(for_array:, in_array:, &block) # @param in_array [Array] the array to search # @param start_index [Integer] the index to start with # @return [Enumerator] a new enumerator - def find_index(*args, in_array:, start_index: 0, &block) + def find_index(*args, in_array:, start_index: 0, &) raise ArgumentError, "wrong number of arguments (given #{args.length}, expected 0..1" if args.size > 1 return Enumerator.new { |y| find_index(in_array: in_array, start_index: start_index, &y) } if args.empty? && !block_given? - return unless (relative_index = in_array[start_index..].find_index(*args, &block)) + return unless (relative_index = in_array[start_index..].find_index(*args, &)) relative_index + start_index end diff --git a/lib/berkeley_library/util/module_info.rb b/lib/berkeley_library/util/module_info.rb index 3dc1362..1f7f6fb 100644 --- a/lib/berkeley_library/util/module_info.rb +++ b/lib/berkeley_library/util/module_info.rb @@ -2,12 +2,12 @@ module BerkeleyLibrary module Util class ModuleInfo NAME = 'berkeley_library-util'.freeze - AUTHORS = ['David Moles', 'maría a. matienzo'].freeze - AUTHOR_EMAILS = ['dmoles@berkeley.edu', 'matienzo@berkeley.edu'].freeze + AUTHORS = ['David Moles', 'maría a. matienzo', 'Jason Raitz'].freeze + AUTHOR_EMAILS = ['dmoles@berkeley.edu', 'matienzo@berkeley.edu', 'raitz@berkeley.edu'].freeze SUMMARY = 'Miscellaneous Ruby utilities for the UC Berkeley Library'.freeze DESCRIPTION = 'A collection of miscellaneous Ruby routines for the UC Berkeley Library.'.freeze LICENSE = 'MIT'.freeze - VERSION = '0.2.0'.freeze + VERSION = '0.2.1'.freeze HOMEPAGE = 'https://github.com/BerkeleyLibrary/util'.freeze end end diff --git a/lib/berkeley_library/util/uris.rb b/lib/berkeley_library/util/uris.rb index a06ae4f..73dee1a 100644 --- a/lib/berkeley_library/util/uris.rb +++ b/lib/berkeley_library/util/uris.rb @@ -34,8 +34,8 @@ def append(uri, *elements) # @param log [Boolean] whether to log each request URL and response code # @return [String] the body as a string. # @raise [RestClient::Exception] in the event of an unsuccessful request. - def get(uri, params: {}, headers: {}, log: true) - Requester.get(uri, params: params, headers: headers, log: log) + def get(uri, params: {}, headers: {}, log: true, timeout: Requester::DEFAULT_TIMEOUT_SECONDS) + Requester.get(uri, params: params, headers: headers, log: log, timeout: timeout) end # Performs a HEAD request and returns the response status as an integer. @@ -47,8 +47,8 @@ def get(uri, params: {}, headers: {}, log: true) # @param headers [Hash] the request headers. # @param log [Boolean] whether to log each request URL and response code # @return [Integer] the response code as an integer. - def head(uri, params: {}, headers: {}, log: true) - Requester.head(uri, params: params, headers: headers, log: log) + def head(uri, params: {}, headers: {}, log: true, timeout: Requester::DEFAULT_TIMEOUT_SECONDS) + Requester.head(uri, params: params, headers: headers, log: log, timeout: timeout) end # Performs a GET request and returns the response, even in the event of @@ -59,8 +59,8 @@ def head(uri, params: {}, headers: {}, log: true) # @param headers [Hash] the request headers. # @param log [Boolean] whether to log each request URL and response code # @return [RestClient::Response] the response - def get_response(uri, params: {}, headers: {}, log: true) - Requester.get_response(uri, params: params, headers: headers, log: log) + def get_response(uri, params: {}, headers: {}, log: true, timeout: Requester::DEFAULT_TIMEOUT_SECONDS) + Requester.get_response(uri, params: params, headers: headers, log: log, timeout: timeout) end # Performs a HEAD request and returns the response, even in the event of @@ -71,8 +71,8 @@ def get_response(uri, params: {}, headers: {}, log: true) # @param headers [Hash] the request headers. # @param log [Boolean] whether to log each request URL and response code # @return [RestClient::Response] the response - def head_response(uri, params: {}, headers: {}, log: true) - Requester.head_response(uri, params: params, headers: headers, log: log) + def head_response(uri, params: {}, headers: {}, log: true, timeout: Requester::DEFAULT_TIMEOUT_SECONDS) + Requester.head_response(uri, params: params, headers: headers, log: log, timeout: timeout) end # Returns the specified URL as a URI, or `nil` if the URL is `nil`. diff --git a/lib/berkeley_library/util/uris/head_check.rb b/lib/berkeley_library/util/uris/head_check.rb new file mode 100644 index 0000000..b90dd2a --- /dev/null +++ b/lib/berkeley_library/util/uris/head_check.rb @@ -0,0 +1,27 @@ +require 'berkeley_library/util/uris' + +module BerkeleyLibrary + module Util + # :nocov: + if defined?(::OkComputer) + class HeadCheck < ::OkComputer::HttpCheck + + def perform_request + headers = {} + if basic_auth_options.any? + user, password = basic_auth_options + headers['Authorization'] = "Basic #{Base64.strict_encode64("#{user}:#{password}")}" + end + + options = { headers: headers, log: false } + options[:timeout] = request_timeout.to_i if request_timeout + + URIs.head_response(url, **options) + rescue StandardError => e + raise OkComputer::HttpCheck::ConnectionFailed, e + end + end + end + # :nocov: + end +end diff --git a/lib/berkeley_library/util/uris/requester.rb b/lib/berkeley_library/util/uris/requester.rb index 7f7c9ef..28edfe9 100644 --- a/lib/berkeley_library/util/uris/requester.rb +++ b/lib/berkeley_library/util/uris/requester.rb @@ -20,11 +20,12 @@ class Requester RETRY_STATUSES = [429, 503].freeze MAX_RETRY_DELAY_SECONDS = 10 MAX_RETRIES = 3 + DEFAULT_TIMEOUT_SECONDS = 10 # ------------------------------------------------------------ # Attributes - attr_reader :method, :url_str, :headers, :log, :max_retries, :max_retry_delay + attr_reader :method, :url_str, :headers, :log, :max_retries, :max_retry_delay, :timeout # ------------------------------------------------------------ # Initializer @@ -38,9 +39,11 @@ class Requester # @param log [Boolean] whether to log each request URL and response code # @param max_retries [Integer] the maximum number of times to retry after a 429 or 503 with Retry-After # @param max_retry_delay [Integer] the maximum retry delay (in seconds) to accept in a Retry-After header + # @param timeout [Integer] the request timeout in seconds (RestClient will use this to set both open and read timeouts) # @raise URI::InvalidURIError if the specified URL is invalid # rubocop:disable Metrics/ParameterLists - def initialize(method, url, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS) + def initialize(method, url, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS, + timeout: DEFAULT_TIMEOUT_SECONDS) raise ArgumentError, "#{method} not supported" unless SUPPORTED_METHODS.include?(method) raise ArgumentError, 'url cannot be nil' unless (uri = Validator.uri_or_nil(url)) @@ -50,6 +53,7 @@ def initialize(method, url, params: {}, headers: {}, log: true, max_retries: MAX @log = log @max_retries = max_retries @max_retry_delay = max_retry_delay + @timeout = timeout end # rubocop:enable Metrics/ParameterLists @@ -73,7 +77,7 @@ def make_request private def log_response(response) - return unless log + return unless log && response&.code logger.info("#{method.to_s.upcase} #{url_str} returned #{response.code}") end @@ -90,6 +94,8 @@ def url_str_with_params(uri, params) def execute_request(retries_remaining = max_retries) try_execute_request + rescue RestClient::Exceptions::Timeout + raise rescue RestClient::Exception => e response = e.response raise unless (retry_delay = retry_delay_from(response)) @@ -99,7 +105,7 @@ def execute_request(retries_remaining = max_retries) end def try_execute_request - RestClient::Request.execute(method: method, url: url_str, headers: headers).tap do |response| + RestClient::Request.execute(method: method, url: url_str, headers: headers, timeout: timeout).tap do |response| # Not all failed RestClient requests throw exceptions raise(exception_for(response)) unless response.code == 200 end diff --git a/lib/berkeley_library/util/uris/requester/class_methods.rb b/lib/berkeley_library/util/uris/requester/class_methods.rb index 456a559..ed5a155 100644 --- a/lib/berkeley_library/util/uris/requester/class_methods.rb +++ b/lib/berkeley_library/util/uris/requester/class_methods.rb @@ -13,9 +13,11 @@ module ClassMethods # @param log [Boolean] whether to log each request URL and response code # @param max_retries [Integer] the maximum number of times to retry after a 429 or 503 with Retry-After # @param max_retry_delay [Integer] the maximum retry delay (in seconds) to accept in a Retry-After header + # @param timeout [Integer] the request timeout in seconds (RestClient will use this to set both open and read timeouts) # @raise [RestClient::Exception] in the event of an unsuccessful request. - def get(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS) - resp = make_request(:get, uri, params, headers, log, max_retries, max_retry_delay) + def get(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS, + timeout: DEFAULT_TIMEOUT_SECONDS) + resp = make_request(:get, uri, params, headers, log, max_retries, max_retry_delay, timeout) resp.body end @@ -28,8 +30,10 @@ def get(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_r # @param headers [Hash] the request headers. # @param log [Boolean] whether to log each request URL and response code # @return [Integer] the response code as an integer. - def head(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS) - head_response(uri, params: params, headers: headers, log: log, max_retries: max_retries, max_retry_delay: max_retry_delay).code + def head(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS, + timeout: DEFAULT_TIMEOUT_SECONDS) + head_response(uri, params: params, headers: headers, log: log, max_retries: max_retries, max_retry_delay: max_retry_delay, + timeout: timeout).code end # Performs a GET request and returns the response, even in the event of @@ -40,8 +44,9 @@ def head(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_ # @param headers [Hash] the request headers. # @param log [Boolean] whether to log each request URL and response code # @return [RestClient::Response] the response - def get_response(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS) - make_request(:get, uri, params, headers, log, max_retries, max_retry_delay) + def get_response(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS, + timeout: DEFAULT_TIMEOUT_SECONDS) + make_request(:get, uri, params, headers, log, max_retries, max_retry_delay, timeout) rescue RestClient::Exception => e e.response end @@ -54,15 +59,18 @@ def get_response(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRI # @param headers [Hash] the request headers. # @param log [Boolean] whether to log each request URL and response code # @return [RestClient::Response] the response - def head_response(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS) - make_request(:head, uri, params, headers, log, max_retries, max_retry_delay) + def head_response(uri, params: {}, headers: {}, log: true, max_retries: MAX_RETRIES, max_retry_delay: MAX_RETRY_DELAY_SECONDS, + timeout: DEFAULT_TIMEOUT_SECONDS) + make_request(:head, uri, params, headers, log, max_retries, max_retry_delay, timeout) rescue RestClient::Exception => e - e.response + return e.response if e.response + + raise end private - def make_request(method, url, params, headers, log, max_retries, max_retry_delay) + def make_request(method, url, params, headers, log, max_retries, max_retry_delay, timeout) Requester.new( method, url, @@ -70,7 +78,8 @@ def make_request(method, url, params, headers, log, max_retries, max_retry_delay headers: headers, log: log, max_retries: max_retries, - max_retry_delay: max_retry_delay + max_retry_delay: max_retry_delay, + timeout: timeout ).make_request end diff --git a/spec/berkeley_library/util/uris/head_check_spec.rb b/spec/berkeley_library/util/uris/head_check_spec.rb new file mode 100644 index 0000000..d8c02d5 --- /dev/null +++ b/spec/berkeley_library/util/uris/head_check_spec.rb @@ -0,0 +1,61 @@ +require 'spec_helper' +require 'okcomputer' +require 'berkeley_library/util/uris/head_check' +require 'base64' + +module BerkeleyLibrary + module Util + RSpec.describe HeadCheck do + let(:url) { 'http://example.com' } + let(:check) { described_class.new(url) } + let(:mock_response) { instance_double(RestClient::Response) } + + before do + allow(BerkeleyLibrary::Util::URIs).to receive(:head_response).and_return(mock_response) + end + + describe '#perform_request' do + context 'without basic auth' do + it 'does not add Authorization header' do + check.perform_request + expect(BerkeleyLibrary::Util::URIs).not_to have_received(:head_response).with(anything, hash_including('Authorization' => anything), anything) + end + + it 'calls URIs.head_response with the correct URL' do + check.perform_request + expect(BerkeleyLibrary::Util::URIs).to have_received(:head_response).with(URI(url), headers: {}, log: false, timeout: 5) + end + end + + context 'with basic auth' do + let(:user) { 'user' } + let(:password) { 'pass' } + + # Stub the configuration on the instance directly + before do + allow(check).to receive(:basic_auth_options).and_return([user, password]) + end + + it 'adds the Authorization header' do + expected_headers = { 'Authorization' => "Basic #{Base64.strict_encode64("#{user}:#{password}")}" } + + check.perform_request + expect(BerkeleyLibrary::Util::URIs).to have_received(:head_response).with(URI(url), headers: expected_headers, log: false, timeout: 5) + end + end + + context 'when URIs.head_response raises an error' do + let(:error_message) { 'Something went wrong' } + + before do + allow(BerkeleyLibrary::Util::URIs).to receive(:head_response).and_raise(StandardError, error_message) + end + + it 'raises an OkComputer::HttpCheck::ConnectionFailed error' do + expect { check.perform_request }.to raise_error(OkComputer::HttpCheck::ConnectionFailed, error_message) + end + end + end + end + end +end diff --git a/spec/berkeley_library/util/uris/requester_spec.rb b/spec/berkeley_library/util/uris/requester_spec.rb index 00aae9a..c75f0cf 100644 --- a/spec/berkeley_library/util/uris/requester_spec.rb +++ b/spec/berkeley_library/util/uris/requester_spec.rb @@ -305,6 +305,13 @@ module URIs requester = Requester.new(:get, url) expect { requester.make_request }.to raise_error(RestClient::ServiceUnavailable) end + + it "raises #{RestClient::Exceptions::Timeout} when the request times out" do + url = 'http://example.edu/timeout' + stub_request(:get, url).to_raise(RestClient::Exceptions::Timeout) + + expect { Requester.get(url, timeout: 10) }.to raise_error(RestClient::Exceptions::Timeout) + end end end end @@ -374,6 +381,13 @@ module URIs end end + it "raises #{RestClient::Exceptions::Timeout} when the request times out" do + url = 'http://example.edu/timeout' + stub_request(:head, url).to_raise(RestClient::Exceptions::Timeout) + + expect { Requester.head(url, timeout: 10) }.to raise_error(RestClient::Exceptions::Timeout) + end + it 'handles redirects' do url1 = 'https://example.org/' url2 = 'https://example.edu/'