diff --git a/.rubocop.yml b/.rubocop.yml index c5b5d140..94ca05c8 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -38,7 +38,7 @@ Metrics/AbcSize: Max: 66 Metrics/ClassLength: - Max: 258 + Max: 300 Exclude: - spec/**/* diff --git a/lib/ferrum/page.rb b/lib/ferrum/page.rb index ac2d9463..844021df 100644 --- a/lib/ferrum/page.rb +++ b/lib/ferrum/page.rb @@ -78,15 +78,13 @@ def initialize(target_id, browser, proxy: nil) @target_id = target_id @timeout = @browser.timeout @event = Event.new.tap(&:set) + self.proxy = proxy @client = Browser::Client.new(ws_url, self, logger: @browser.options.logger, ws_max_receive_size: @browser.options.ws_max_receive_size, id_starts_with: 1000) - @proxy_user = proxy&.[](:user) || @browser.options.proxy&.[](:user) - @proxy_password = proxy&.[](:password) || @browser.options.proxy&.[](:password) - @mouse = Mouse.new(self) @keyboard = Keyboard.new(self) @headers = Headers.new(self) @@ -319,6 +317,14 @@ def subscribed?(event) @client.subscribed?(event) end + def use_proxy? + @proxy_host && @proxy_port + end + + def use_authorized_proxy? + use_proxy? && @proxy_user && @proxy_password + end + private def subscribe @@ -358,7 +364,7 @@ def prepare_page command("Log.enable") command("Network.enable") - if @proxy_user && @proxy_password + if use_authorized_proxy? network.authorize(user: @proxy_user, password: @proxy_password, type: :proxy) do |request, _index, _total| @@ -442,5 +448,12 @@ def document_node_id def ws_url "ws://#{@browser.process.host}:#{@browser.process.port}/devtools/page/#{@target_id}" end + + def proxy=(options) + @proxy_host = options&.[](:host) || @browser.options.proxy&.[](:host) + @proxy_port = options&.[](:port) || @browser.options.proxy&.[](:port) + @proxy_user = options&.[](:user) || @browser.options.proxy&.[](:user) + @proxy_password = options&.[](:password) || @browser.options.proxy&.[](:password) + end end end diff --git a/lib/ferrum/proxy.rb b/lib/ferrum/proxy.rb index af0a36c1..62974db3 100644 --- a/lib/ferrum/proxy.rb +++ b/lib/ferrum/proxy.rb @@ -18,7 +18,6 @@ def initialize(host: "127.0.0.1", port: 0, user: nil, password: nil) @port = port @user = user @password = password - at_exit { stop } end def start @@ -39,8 +38,10 @@ def start options.merge!(ProxyAuthProc: authenticator.method(:authenticate).to_proc) end - @server = WEBrick::HTTPProxyServer.new(**options) + @server = HTTPProxyServer.new(**options) @server.start + at_exit { stop } + @port = @server.config[:Port] end @@ -54,5 +55,93 @@ def stop @file&.unlink @server.shutdown end + + # Fix hanging proxy at exit + class HTTPProxyServer < WEBrick::HTTPProxyServer + # rubocop:disable all + def do_CONNECT(req, res) + # Proxy Authentication + proxy_auth(req, res) + + ua = Thread.current[:WEBrickSocket] # User-Agent + raise WEBrick::HTTPStatus::InternalServerError, + "[BUG] cannot get socket" unless ua + + host, port = req.unparsed_uri.split(":", 2) + # Proxy authentication for upstream proxy server + if proxy = proxy_uri(req, res) + proxy_request_line = "CONNECT #{host}:#{port} HTTP/1.0" + if proxy.userinfo + credentials = "Basic " + [proxy.userinfo].pack("m0") + end + host, port = proxy.host, proxy.port + end + + begin + @logger.debug("CONNECT: upstream proxy is `#{host}:#{port}'.") + os = TCPSocket.new(host, port) # origin server + + if proxy + @logger.debug("CONNECT: sending a Request-Line") + os << proxy_request_line << CRLF + @logger.debug("CONNECT: > #{proxy_request_line}") + if credentials + @logger.debug("CONNECT: sending credentials") + os << "Proxy-Authorization: " << credentials << CRLF + end + os << CRLF + proxy_status_line = os.gets(LF) + @logger.debug("CONNECT: read Status-Line from the upstream server") + @logger.debug("CONNECT: < #{proxy_status_line}") + if %r{^HTTP/\d+\.\d+\s+200\s*} =~ proxy_status_line + while line = os.gets(LF) + break if /\A(#{CRLF}|#{LF})\z/om =~ line + end + else + raise WEBrick::HTTPStatus::BadGateway + end + end + @logger.debug("CONNECT #{host}:#{port}: succeeded") + res.status = WEBrick::HTTPStatus::RC_OK + rescue => ex + @logger.debug("CONNECT #{host}:#{port}: failed `#{ex.message}'") + res.set_error(ex) + raise WEBrick::HTTPStatus::EOFError + ensure + # At exit os variable sometimes can be nil which results in hanging forever + raise WEBrick::HTTPStatus::EOFError unless os + + if handler = @config[:ProxyContentHandler] + handler.call(req, res) + end + res.send_response(ua) + access_log(@config, req, res) + + # Should clear request-line not to send the response twice. + # see: HTTPServer#run + req.parse(NullReader) rescue nil + end + + begin + while fds = IO::select([ua, os]) + if fds[0].member?(ua) + buf = ua.readpartial(1024); + @logger.debug("CONNECT: #{buf.bytesize} byte from User-Agent") + os.write(buf) + elsif fds[0].member?(os) + buf = os.readpartial(1024); + @logger.debug("CONNECT: #{buf.bytesize} byte from #{host}:#{port}") + ua.write(buf) + end + end + rescue + os.close + @logger.debug("CONNECT #{host}:#{port}: closed") + end + + raise WEBrick::HTTPStatus::EOFError + end + # rubocop:enable all + end end end