From 4e10c15368b5cdc6d6131d3376073b151cae09de Mon Sep 17 00:00:00 2001 From: Matthew Ford Date: Wed, 18 Sep 2024 22:58:51 +0100 Subject: [PATCH 1/2] Remove Vigilion code and references Use Carrierwave for file uploads Update to use DBT-Scanner via URL with credentials Add specs for new functionality and update existing specs where appropriate --- .env.example | 15 +- .env.test | 3 - .snyk | 4 - Gemfile | 3 +- Gemfile.lock | 61 +++---- Procfile | 2 +- Procfile.dev | 2 +- README.md | 13 +- app/jobs/file_scan_job.rb | 66 ++++++++ app/models/concerns/infected_file_cleaner.rb | 43 ----- app/models/concerns/scan_files.rb | 157 ++++++++++++++++++ app/models/concerns/shortlisted_document.rb | 6 +- app/models/form_answer_attachment.rb | 5 +- app/models/support_letter_attachment.rb | 6 +- app/uploaders/file_uploader.rb | 72 +++++++- .../figures_and_vat_returns/_file.html.slim | 4 +- .../_form_answer_attachment.html.slim | 2 +- ...tachment_with_virus_check_status.html.slim | 11 +- config/environments/development.rb | 1 + config/initializers/carrierwave.rb | 42 ++++- config/initializers/vigilion.rb | 19 --- config/initializers/virus_scanner.rb | 5 + .../diagrams/application_overview.puml | 8 +- lib/tasks/vigilion_migration.rake | 25 --- lib/virus_scanner.rb | 106 ++++++++++++ .../remove_audit_certificate_spec.rb | 1 + spec/lib/virus_scanner_spec.rb | 107 ++++++++++++ spec/models/concerns/scan_files_spec.rb | 102 ++++++++++++ spec/models/form_answer_attachment_spec.rb | 4 +- 29 files changed, 714 insertions(+), 181 deletions(-) create mode 100644 app/jobs/file_scan_job.rb delete mode 100644 app/models/concerns/infected_file_cleaner.rb create mode 100644 app/models/concerns/scan_files.rb delete mode 100644 config/initializers/vigilion.rb create mode 100644 config/initializers/virus_scanner.rb delete mode 100644 lib/tasks/vigilion_migration.rake create mode 100644 lib/virus_scanner.rb create mode 100644 spec/lib/virus_scanner_spec.rb create mode 100644 spec/models/concerns/scan_files_spec.rb diff --git a/.env.example b/.env.example index 6509a090bc..7ee7b138a5 100644 --- a/.env.example +++ b/.env.example @@ -3,17 +3,22 @@ DEVISE_SECRET_KEY=DEVISE_SECRET_KEY SENTRY_DSN=SENTRY_DSN AUTHY_API_KEY=AUTHY_API_KEY AUTHY_API_URL=https://api.authy.com -VIGILION_ACCESS_KEY_ID=XXXXXXXXXXXXXXXXXX -VIGILION_SECRET_ACCESS_KEY=XXXXXXXXXXXXXXXXXXXXXX DISABLE_VIRUS_SCANNER=false +ENABLE_VIRUS_SCANNER_BUCKETS=false +VIRUS_SCANNER_URL=http://localhost:80 +VIRUS_SCANNER_USERNAME=app1 +VIRUS_SCANNER_PASSWORD=letmein PROFILE_MODE=false LOG_LEVEL=info ASSET_HOST=http://localhost:3000/ MAILER_HOST=localhost -AWS_ACCESS_KEY_ID=xxx -AWS_SECRET_ACCESS_KEY=xxx +AWS_TMP_BUCKET_ACCESS_KEY_ID=xxx +AWS_TMP_BUCKET_SECRET_ACCESS_KEY=xxx +AWS_PERMANENT_BUCKET_ACCESS_KEY_ID=xxx +AWS_PERMANENT_BUCKET_SECRET_ACCESS_KEY=xxx AWS_REGION=xxx -AWS_S3_BUCKET_NAME=xxx +AWS_S3_TMP_BUCKET=xxx +AWS_S3_PERMANENT_BUCKET=xxx DISPLAY_SOCIAL_MOBILITY_AWARD=true PUSHER_SOCKET_HOST=localhost PUSHER_WS_PORT=8080 diff --git a/.env.test b/.env.test index 4e510fa224..61b1dfcd59 100644 --- a/.env.test +++ b/.env.test @@ -3,9 +3,6 @@ DEVISE_SECRET_KEY=DEVISE_SECRET_KEY SENTRY_DSN=SENTRY_DSN AUTHY_API_KEY=AUTHY_API_KEY AUTHY_API_URL=https://api.authy.com -VIGILION_ACCESS_KEY_ID=XXXXXXXXXXXXXXXXXX -VIGILION_SECRET_ACCESS_KEY=XXXXXXXXXXXXXXXXXXXXXX -DISABLE_VIRUS_SCANNER=true PROFILE_MODE=false LOG_LEVEL=info ASSET_HOST= diff --git a/.snyk b/.snyk index 325045673a..d63e090e6c 100644 --- a/.snyk +++ b/.snyk @@ -2,10 +2,6 @@ version: v1.7.0 # ignores vulnerabilities until expiry date; change duration by modifying expiry date ignore: - SNYK-RUBY-FARADAYMIDDLEWARE-20334: - - '* > vigilion@1.0.4': - reason: None given - expires: '2017-06-09T12:30:08.169Z' SNYK-RUBY-NOKOGIRI-20299: - '*': reason: an application using Nokogiri needs to be opt into the DTDLOAD option and opt out of the NONET option in order to be vulnerable diff --git a/Gemfile b/Gemfile index 5fff468a5c..1fcc2ab5e7 100644 --- a/Gemfile +++ b/Gemfile @@ -92,8 +92,7 @@ gem "nokogiri" # Uploads gem "carrierwave", "~> 3.0" gem "fog-aws" -gem "vigilion", "~> 1.0.4" -gem "vigilion-rails", "~> 2.2.0" +gem "aws-sdk-s3", "~> 1" # Background jobs gem "sidekiq", "~> 6.5.10" diff --git a/Gemfile.lock b/Gemfile.lock index 588d1845fb..e9bb831e78 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -83,6 +83,22 @@ GEM httpclient (>= 2.5.3.3) autoprefixer-rails (10.4.19.0) execjs (~> 2) + aws-eventstream (1.3.0) + aws-partitions (1.983.0) + aws-sdk-core (3.209.1) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.651.0) + aws-sigv4 (~> 1.9) + jmespath (~> 1, >= 1.6.1) + aws-sdk-kms (1.94.0) + aws-sdk-core (~> 3, >= 3.207.0) + aws-sigv4 (~> 1.5) + aws-sdk-s3 (1.167.0) + aws-sdk-core (~> 3, >= 3.207.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.5) + aws-sigv4 (1.10.0) + aws-eventstream (~> 1, >= 1.0.2) axiom-types (0.1.1) descendants_tracker (~> 0.0.4) ice_nine (~> 0.11.0) @@ -205,37 +221,7 @@ GEM factory_bot_rails (6.2.0) factory_bot (~> 6.2.0) railties (>= 5.0.0) - faraday (1.10.3) - faraday-em_http (~> 1.0) - faraday-em_synchrony (~> 1.0) - faraday-excon (~> 1.1) - faraday-httpclient (~> 1.0) - faraday-multipart (~> 1.0) - faraday-net_http (~> 1.0) - faraday-net_http_persistent (~> 1.0) - faraday-patron (~> 1.0) - faraday-rack (~> 1.0) - faraday-retry (~> 1.0) - ruby2_keywords (>= 0.0.4) - faraday-detailed_logger (2.5.0) - faraday (>= 0.16, < 3) - faraday-em_http (1.0.0) - faraday-em_synchrony (1.0.0) - faraday-excon (1.1.0) - faraday-httpclient (1.0.1) - faraday-multipart (1.0.4) - multipart-post (~> 2) - faraday-net_http (1.0.1) - faraday-net_http_persistent (1.2.0) - faraday-patron (1.0.0) - faraday-rack (1.0.0) - faraday-retry (1.0.3) - faraday_middleware (1.2.0) - faraday (~> 1.0) - ffi (1.17.0-aarch64-linux-gnu) - ffi (1.17.0-arm64-darwin) - ffi (1.17.0-x86_64-darwin) - ffi (1.17.0-x86_64-linux-gnu) + ffi (1.15.5) fog-aws (3.19.0) fog-core (~> 2.1) fog-json (~> 1.1) @@ -284,6 +270,7 @@ GEM ruby-vips (>= 2.0.17, < 3) jbuilder (2.10.2) activesupport (>= 5.0.0) + jmespath (1.6.2) jquery-rails (4.4.0) rails-dom-testing (>= 1, < 3) railties (>= 4.2.0) @@ -352,7 +339,6 @@ GEM money (6.16.0) i18n (>= 0.6.4, <= 2) multi_json (1.15.0) - multipart-post (2.3.0) mustermann (3.0.0) ruby2_keywords (~> 0.0.1) net-imap (0.4.10) @@ -685,14 +671,6 @@ GEM activesupport (>= 5.2.0, < 8.0) concurrent-ruby (~> 1.0) method_source (~> 1.0) - vigilion (1.0.4) - addressable (~> 2) - faraday - faraday-detailed_logger - faraday_middleware - vigilion-rails (2.2.0) - rails (>= 6.0.3.7) - vigilion (~> 1.0.4) virtus (2.0.0) axiom-types (~> 0.1) coercible (~> 1.0) @@ -734,6 +712,7 @@ PLATFORMS DEPENDENCIES active_hash amoeba (= 3.0.0) + aws-sdk-s3 (~> 1) binding_of_caller bootscale bootstrap-sass (~> 3.4) @@ -831,8 +810,6 @@ DEPENDENCIES timecop turnip (~> 4.2.0) uglifier (>= 2.7.2) - vigilion (~> 1.0.4) - vigilion-rails (~> 2.2.0) virtus webmock (= 3.18.1) webpacker (= 6.0.0.rc.6) diff --git a/Procfile b/Procfile index 91529f8438..b7a49533eb 100644 --- a/Procfile +++ b/Procfile @@ -1,2 +1,2 @@ web: bundle exec rake cf:run_migrations db:migrate && bundle exec puma -C config/puma.rb -worker: bundle exec sidekiq -L ./log/worker.log -C ./config/sidekiq.yml +worker: bundle exec sidekiq -C ./config/sidekiq.yml diff --git a/Procfile.dev b/Procfile.dev index 91529f8438..b7a49533eb 100644 --- a/Procfile.dev +++ b/Procfile.dev @@ -1,2 +1,2 @@ web: bundle exec rake cf:run_migrations db:migrate && bundle exec puma -C config/puma.rb -worker: bundle exec sidekiq -L ./log/worker.log -C ./config/sidekiq.yml +worker: bundle exec sidekiq -C ./config/sidekiq.yml diff --git a/README.md b/README.md index c9a26ee163..a89c489a9a 100644 --- a/README.md +++ b/README.md @@ -62,9 +62,18 @@ If you need to test collaborators editing the application at the same time, inst ### Installing Malware Scanning -Files are uploaded to S3 and then scanned with ClamAV via the Vigilion service. +Files are uploaded to S3 and then scanned with ClamAV via the DBT scanner service. + +If you need to test malware scanning locally, run the [DBT scanner](https://github.com/uktrade/dit-clamav-rest) via docker-compose. + +You will also need to set the following environment variables in the `.env` file: + +``` +VIRUS_SCANNER_URL=http://localhost:80 +VIRUS_SCANNER_USERNAME=app1 +VIRUS_SCANNER_PASSWORD=letmein +``` -If you need to test malware scanning locally, install [Vigilion](https://github.com/bitzesty/vigilion-scanner) and set the `VIGILION_ACCESS_KEY_ID` and `VIGILION_SECRET_ACCESS_KEY` and `DISABLE_VIRUS_SCANNER` to `false` in the `.env` file. ### Running the tests diff --git a/app/jobs/file_scan_job.rb b/app/jobs/file_scan_job.rb new file mode 100644 index 0000000000..4b685a9567 --- /dev/null +++ b/app/jobs/file_scan_job.rb @@ -0,0 +1,66 @@ +class FileScanJob < ApplicationJob + queue_as :default + + def perform(key, class_name, record_id, attribute_name) + record = class_name.constantize.find(record_id) + file = record.send(attribute_name) + + return if file.blank? + + begin + file_to_scan = get_file_to_scan(file) + scan_result = VirusScanner.scan_file(file_to_scan) + status = scan_result[:malware] ? "infected" : "clean" + record.send(:"on_scan_#{attribute_name}", status: status) + rescue VirusScanner::AuthenticationError => e + handle_authentication_error(record, attribute_name, e) + rescue VirusScanner::FileTooLargeError => e + handle_file_too_large_error(record, attribute_name, e) + rescue VirusScanner::ScanError => e + handle_scan_error(record, attribute_name, e) + ensure + file_to_scan.close if file_to_scan.respond_to?(:close) + end + end + + private + + def get_file_to_scan(file) + if file.is_a?(String) + File.open(file, "rb") + elsif file.respond_to?(:read) + file + elsif file.is_a?(CarrierWave::SanitizedFile) + File.open(file.file, "rb") + elsif file.respond_to?(:file) + if file.file.is_a?(CarrierWave::SanitizedFile) + File.open(file.file.file, "rb") + elsif file.file.respond_to?(:path) + File.open(file.file.path, "rb") + elsif file.file.respond_to?(:read) + file.file + else + raise ArgumentError, "Don't know how to handle #{file.file.class}" + end + elsif file.respond_to?(:path) + File.open(file.path, "rb") + else + raise ArgumentError, "Don't know how to handle #{file.class}" + end + end + + def handle_authentication_error(record, attribute_name, error) + Rails.logger.error("VirusScanner Authentication Error: #{error.message}") + record.send(:"on_scan_#{attribute_name}", status: :error) + end + + def handle_file_too_large_error(record, attribute_name, error) + Rails.logger.warn("File too large for virus scanning: #{error.message}") + record.send(:"on_scan_#{attribute_name}", status: :error) + end + + def handle_scan_error(record, attribute_name, error) + Rails.logger.error("VirusScanner Error: #{error.message}") + record.send(:"on_scan_#{attribute_name}", status: :error) + end +end diff --git a/app/models/concerns/infected_file_cleaner.rb b/app/models/concerns/infected_file_cleaner.rb deleted file mode 100644 index 22197aa21c..0000000000 --- a/app/models/concerns/infected_file_cleaner.rb +++ /dev/null @@ -1,43 +0,0 @@ -module InfectedFileCleaner - extend ActiveSupport::Concern - - class_methods do - def clean_after_scan(*file_attr_names) - file_attr_names.each do |attr_name| - override_on_scan_callback(attr_name) - end - end - - def override_on_scan_callback(file_attr_name) - class_eval <<-EVAL, __FILE__, __LINE__ + 1 - def on_scan_#{file_attr_name}_with_cleanup(params) - on_scan_#{file_attr_name}_without_cleanup(params) - - if #{file_attr_name}_scan_results == "infected" - public_send("remove_#{file_attr_name}!") - save! - end - end - - # scan_file! method is called in check_scan_file after commit callback - # if the file is infected it gets deleted, then AR model is saved again - # in order to avoid sending a scanning request without the file - # we're adding a check to see if it's present or not - # scan_file_with_cleanup! is an alias new method that gets called with scan_file! - # scan_file_without_cleanup! is calling to the unmodified gem's code - - def scan_#{file_attr_name}_with_cleanup! - return unless #{file_attr_name}.present? - - scan_#{file_attr_name}_without_cleanup! - end - - alias_method :scan_#{file_attr_name}_without_cleanup!, :scan_#{file_attr_name}! - alias_method :scan_#{file_attr_name}!, :scan_#{file_attr_name}_with_cleanup! - - alias_method :on_scan_#{file_attr_name}_without_cleanup, :on_scan_#{file_attr_name} - alias_method :on_scan_#{file_attr_name}, :on_scan_#{file_attr_name}_with_cleanup - EVAL - end - end -end diff --git a/app/models/concerns/scan_files.rb b/app/models/concerns/scan_files.rb new file mode 100644 index 0000000000..cc719cd682 --- /dev/null +++ b/app/models/concerns/scan_files.rb @@ -0,0 +1,157 @@ +module ScanFiles + extend ActiveSupport::Concern + + included do + before_save :set_scan_results_to_pending + after_commit :perform_virus_scan, on: [:create, :update] + end + + def perform_virus_scan + self.class.file_attributes_to_scan.each do |attr_name| + file = send(attr_name) + next if file.blank? + + key = { model: self.class.name, column: attr_name, id: id }.to_json + FileScanJob.perform_later(key, self.class.name, id, attr_name) + end + end + + private + + def move_to_clean_bucket(attr_name) + file = send(attr_name) + if file.present? + if Rails.env.production? || ENV["ENABLE_VIRUS_SCANNER_BUCKETS"] == "true" + move_to_permanent_s3_bucket(file) + else + move_to_permanent_local_folder(attr_name) + end + Rails.logger.info("File moved to clean storage: #{self.class.name} ID #{id}, #{attr_name}") + end + end + + def move_to_permanent_s3_bucket(file) + tmp_bucket_s3_client = Aws::S3::Client.new({ + region: ENV["AWS_REGION"], + access_key_id: ENV["AWS_TMP_BUCKET_ACCESS_KEY_ID"], + secret_access_key: ENV["AWS_TMP_BUCKET_SECRET_ACCESS_KEY"], + }) + clean_bucket_s3_client = Aws::S3::Client.new({ + region: ENV["AWS_REGION"], + access_key_id: ENV["AWS_PERMANENT_BUCKET_ACCESS_KEY_ID"], + secret_access_key: ENV["AWS_PERMANENT_BUCKET_SECRET_ACCESS_KEY"], + }) + + object_to_copy = tmp_bucket_s3_client.get_object( + bucket: ENV["AWS_S3_TMP_BUCKET"], + key: file.path, + ) + + clean_bucket_s3_client.put_object( + bucket: ENV["AWS_S3_PERMANENT_BUCKET"], + body: object_to_copy.body.read, + key: file.permanent_path, + ) + + tmp_bucket_s3_client.delete_object( + bucket: ENV["AWS_S3_TMP_BUCKET"], + key: file.path, + ) + end + + def move_to_permanent_local_folder(attribute_name) + file = send(attribute_name) + if file.respond_to?(:path) + new_path = file.path.sub("/tmp/", "/permanent/") + Rails.logger.debug "Moving file from #{file.path} to #{new_path}" + FileUtils.mkdir_p(File.dirname(new_path)) + FileUtils.mv(file.path, new_path) unless File.exist?(new_path) + file.instance_variable_set(:@path, new_path) + Rails.logger.debug "File moved successfully. New path: #{file.path}" + else + Rails.logger.debug "File not present for #{attribute_name}" + end + end + + class_methods do + def scan_for_viruses(*file_attr_names) + @file_attributes_to_scan = file_attr_names + file_attr_names.each do |attr_name| + define_scan_method(attr_name) + define_clean_after_scan_method(attr_name) + define_clean_method(attr_name) + define_set_scan_results_to_pending_method(attr_name) + end + end + + def file_attributes_to_scan + @file_attributes_to_scan || [] + end + + private + + def define_scan_method(file_attr_name) + class_eval <<-RUBY, __FILE__, __LINE__ + 1 + def scan_#{file_attr_name}! + return unless #{file_attr_name}.present? && #{file_attr_name}.file.present? + + if ENV['DISABLE_VIRUS_SCANNER'] == 'true' + update_column('#{file_attr_name}_scan_results', 'clean') + move_to_clean_bucket(:#{file_attr_name}) + return true + end + + key = { model: self.class.name, column: '#{file_attr_name}', id: id }.to_json + FileScanJob.perform_later(key, self.class.name, id, '#{file_attr_name}') + + update_column('#{file_attr_name}_scan_results', 'scanning') + true + end + + after_commit :check_scan_#{file_attr_name}, on: [:create, :update] + + def check_scan_#{file_attr_name} + scan_#{file_attr_name}! if #{file_attr_name}_changed? + end + RUBY + end + + def define_clean_after_scan_method(file_attr_name) + class_eval <<-RUBY, __FILE__, __LINE__ + 1 + def on_scan_#{file_attr_name}(params) + if params[:status] == 'clean' + move_to_clean_bucket(:#{file_attr_name}) + update_column('#{file_attr_name}_scan_results', 'clean') + elsif params[:status] == 'infected' + update_column('#{file_attr_name}_scan_results', 'infected') + Rails.logger.warn("Infected file detected: \#{self.class.name} ID \#{id}, #{file_attr_name}") + else + update_column('#{file_attr_name}_scan_results', params[:status]) + end + end + RUBY + end + + def define_clean_method(file_attr_name) + class_eval <<-RUBY, __FILE__, __LINE__ + 1 + def clean? + #{file_attr_name}_scan_results == "clean" + end + def pending_or_scanning? + #{file_attr_name}_scan_results == "pending" || #{file_attr_name}_scan_results == "scanning" + end + def infected? + #{file_attr_name}_scan_results == "infected" + end + RUBY + end + + def define_set_scan_results_to_pending_method(file_attr_name) + class_eval <<-RUBY, __FILE__, __LINE__ + 1 + def set_scan_results_to_pending + self.#{file_attr_name}_scan_results = 'pending' if #{file_attr_name}_changed? + end + RUBY + end + end +end diff --git a/app/models/concerns/shortlisted_document.rb b/app/models/concerns/shortlisted_document.rb index 4c8e479010..ff55b863bc 100644 --- a/app/models/concerns/shortlisted_document.rb +++ b/app/models/concerns/shortlisted_document.rb @@ -2,11 +2,11 @@ module ShortlistedDocument extend ActiveSupport::Concern included do + include ScanFiles + mount_uploader :attachment, AuditCertificateUploader - scan_file :attachment - include ::InfectedFileCleaner - clean_after_scan :attachment + scan_for_viruses :attachment belongs_to :form_answer, optional: true end diff --git a/app/models/form_answer_attachment.rb b/app/models/form_answer_attachment.rb index 9f6d2e07c4..8007c3aadd 100644 --- a/app/models/form_answer_attachment.rb +++ b/app/models/form_answer_attachment.rb @@ -1,12 +1,11 @@ class FormAnswerAttachment < ApplicationRecord + include ScanFiles belongs_to :form_answer, optional: true belongs_to :attachable, polymorphic: true, optional: true mount_uploader :file, FormAnswerAttachmentUploader - scan_file :file - include ::InfectedFileCleaner - clean_after_scan :file + scan_for_viruses :file scope :uploaded_by_user, -> { where attachable_type: "User" } scope :uploaded_not_by_user, -> { where.not(attachable_type: "User") } diff --git a/app/models/support_letter_attachment.rb b/app/models/support_letter_attachment.rb index 3fa95dae42..e728eeeae1 100644 --- a/app/models/support_letter_attachment.rb +++ b/app/models/support_letter_attachment.rb @@ -1,9 +1,9 @@ class SupportLetterAttachment < ApplicationRecord + include ScanFiles + mount_uploader :attachment, FormAnswerAttachmentUploader - scan_file :attachment - include ::InfectedFileCleaner - clean_after_scan :attachment + scan_for_viruses :attachment # associations belongs_to :user, optional: true diff --git a/app/uploaders/file_uploader.rb b/app/uploaders/file_uploader.rb index bf112d380a..0ac94f3fed 100644 --- a/app/uploaders/file_uploader.rb +++ b/app/uploaders/file_uploader.rb @@ -2,15 +2,79 @@ class FileUploader < CarrierWave::Uploader::Base POSSIBLE_IMG_EXTENSIONS = %w[jpg jpeg gif png] POSSIBLE_DOC_EXTENSIONS = %w[chm csv diff doc docx dot dxf eps gml ics kml odp ods odt pdf ppt pptx ps rdf rtf sch txt wsdl xls xlsm xlsx xlt xsd xslt zip msg] - def extension_allowlist - POSSIBLE_IMG_EXTENSIONS + POSSIBLE_DOC_EXTENSIONS - end + storage :custom def store_dir - "uploads/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}" + base_dir = (model.respond_to?(:clean?) && model.clean?) ? "permanent" : "tmp" + "uploads/#{base_dir}/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}" + end + + def read + if model.respond_to?(:clean?) && model.clean? + read_from_permanent_storage + else + super + end + end + + def extension_allowlist + POSSIBLE_IMG_EXTENSIONS + POSSIBLE_DOC_EXTENSIONS end def filename "#{@original_filename.gsub(/\W/, "").gsub(/#{file.extension}\z/, "")}.#{file.extension}" if @original_filename.present? end + + def fog_credentials + if fog_directory.include?("clean") + clean_bucket_credentials + else + tmp_bucket_credentials + end + end + + def fog_directory + if model.respond_to?(:clean?) && model.clean? + ENV["AWS_S3_PERMANENT_BUCKET"] + else + ENV["AWS_S3_TMP_BUCKET"] + end + end + + def permanent_path + path.sub("tmp", "permanent") + end + + private + + def read_from_permanent_storage + if Rails.env.production? || ENV["ENABLE_VIRUS_SCANNER_BUCKETS"] == "true" + permanent_file = CarrierWave::Storage::Fog::File.new(self, permanent_storage, store_path) + permanent_file.read + else + File.read(permanent_path) + end + end + + def permanent_storage + @permanent_storage ||= CarrierWave::Storage::Fog.new(self) + end + + def tmp_bucket_credentials + { + provider: "AWS", + aws_access_key_id: ENV["AWS_TMP_BUCKET_ACCESS_KEY_ID"], + aws_secret_access_key: ENV["AWS_TMP_BUCKET_SECRET_ACCESS_KEY"], + region: ENV["AWS_REGION"], + } + end + + def clean_bucket_credentials + { + provider: "AWS", + aws_access_key_id: ENV["AWS_PERMANENT_BUCKET_ACCESS_KEY_ID"], + aws_secret_access_key: ENV["AWS_PERMANENT_BUCKET_SECRET_ACCESS_KEY"], + region: ENV["AWS_REGION"], + } + end end diff --git a/app/views/admin/figures_and_vat_returns/_file.html.slim b/app/views/admin/figures_and_vat_returns/_file.html.slim index da968f22c3..4461a116d3 100644 --- a/app/views/admin/figures_and_vat_returns/_file.html.slim +++ b/app/views/admin/figures_and_vat_returns/_file.html.slim @@ -12,12 +12,12 @@ span.visible-lg.visible-md ' Remove - - if Rails.env.test? || attachment.clean? + - if attachment.clean? = link_to [namespace_name, attachment.form_answer, attachment], target: "_blank", class: "action-title" span.glyphicon.glyphicon-file = attachment.original_filename - - elsif attachment.attachment_scan_results.to_s == "infected" + - elsif attachment.infected? span.glyphicon.glyphicon-file = "Failed virus scanner check (#{attachment.original_filename})" diff --git a/app/views/admin/form_answer_attachments/_form_answer_attachment.html.slim b/app/views/admin/form_answer_attachments/_form_answer_attachment.html.slim index 9bbe3484dc..7451e47777 100644 --- a/app/views/admin/form_answer_attachments/_form_answer_attachment.html.slim +++ b/app/views/admin/form_answer_attachments/_form_answer_attachment.html.slim @@ -18,7 +18,7 @@ span.glyphicon.glyphicon-file = form_answer_attachment.decorate.display_name - - elsif form_answer_attachment.file_scan_results.to_s == "infected" + - elsif form_answer_attachment.infected? span.glyphicon.glyphicon-file = "Failed virus scanner check (#{form_answer_attachment.decorate.display_name})" diff --git a/app/views/shared/_attachment_with_virus_check_status.html.slim b/app/views/shared/_attachment_with_virus_check_status.html.slim index d66a8fdf60..12176450f3 100644 --- a/app/views/shared/_attachment_with_virus_check_status.html.slim +++ b/app/views/shared/_attachment_with_virus_check_status.html.slim @@ -1,19 +1,18 @@ p.govuk-body.display-inline - if item && item.send(mount_name) - - scan_results = item.send("#{mount_name}_scan_results") - if item.clean? = link_to item.try(:original_filename), item.send(mount_name).url, target: "_blank" - - elsif scan_results == "scanning" || scan_results == "pending" + - elsif item.pending_or_scanning? = item.try(:original_filename) br - | (scanning on viruses) - - elsif scan_results == "infected" + | Scanning file + - elsif item.infected? = item.try(:original_filename) br - | has been blocked (virus detected), please remove. + | Uploaded file has been blocked (virus detected), please remove - else = item.try(:original_filename) br - | didn't pass virus scanner check, please remove + | File didn't pass virus scanner check, please remove diff --git a/config/environments/development.rb b/config/environments/development.rb index e3ff2e6029..a59f62d5ba 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -84,6 +84,7 @@ :user_agent, ) else + config.log_level = ENV.fetch("LOG_LEVEL") { "debug" } # normal development logging configuration config.logger = ActiveSupport::TaggedLogging.new(ActiveSupport::Logger.new($stdout)) end diff --git a/config/initializers/carrierwave.rb b/config/initializers/carrierwave.rb index eae618ea80..c677059a3a 100644 --- a/config/initializers/carrierwave.rb +++ b/config/initializers/carrierwave.rb @@ -2,21 +2,51 @@ require "carrierwave/storage/fog" require "fog/aws" +class CustomStorage + def self.new(uploader) + if Rails.env.production? || ENV["ENABLE_VIRUS_SCANNER_BUCKETS"] == "true" + CustomFogStorage.new(uploader) + else + CustomFileStorage.new(uploader) + end + end +end + +class CustomFogStorage < CarrierWave::Storage::Fog +end + +class CustomFileStorage < CarrierWave::Storage::File + def retrieve!(identifier) + file = super + if file.respond_to?(:uploader) && file.uploader.model.respond_to?(:clean?) && file.uploader.model.clean? + new_path = file.path.sub("/tmp/", "/permanent/") + FileUtils.mkdir_p(File.dirname(new_path)) + FileUtils.mv(file.path, new_path) unless File.exist?(new_path) + file.instance_variable_set(:@path, new_path) + end + file + end +end + CarrierWave.configure do |config| - if Rails.env.production? + if Rails.env.production? || ENV["ENABLE_VIRUS_SCANNER_BUCKETS"] == "true" config.fog_credentials = { provider: "AWS", - aws_access_key_id: ENV["AWS_ACCESS_KEY_ID"], - aws_secret_access_key: ENV["AWS_SECRET_ACCESS_KEY"], + aws_access_key_id: ENV["AWS_TMP_BUCKET_ACCESS_KEY_ID"], + aws_secret_access_key: ENV["AWS_TMP_BUCKET_SECRET_ACCESS_KEY"], region: ENV["AWS_REGION"], } - config.fog_directory = ENV["AWS_S3_BUCKET_NAME"] + config.fog_directory = ENV["AWS_S3_TMP_BUCKET"] config.storage = :fog config.fog_public = false config.cache_dir = "/tmp/carrierwave" + config.cache_storage = :fog else config.storage = :file - config.cache_dir = Rails.root.join("tmp/uploads") + config.enable_processing = false if Rails.env.test? + config.root = Rails.root.join("public") + config.cache_dir = "uploads/tmp" + config.cache_storage = :file end - config.cache_storage = :file + config.storage_engines = { custom: "CustomStorage" } end diff --git a/config/initializers/vigilion.rb b/config/initializers/vigilion.rb deleted file mode 100644 index 4944d631a1..0000000000 --- a/config/initializers/vigilion.rb +++ /dev/null @@ -1,19 +0,0 @@ -Vigilion.configure do |config| - config.access_key_id = ENV["VIGILION_ACCESS_KEY_ID"] || "Replace me" - config.secret_access_key = ENV["VIGILION_SECRET_ACCESS_KEY"] || "Replace me" - - config.server_url = ENV["VIGILION_SERVER_URL"] if ENV["VIGILION_SERVER_URL"].present? - # Integration strategy (default is :url) - # config.integration = :local - - # By default vigilion will be bypassed in development and test environments. - # Disable vigilion scanning entirely even in production environments: - # config.loopback = true - # Enable vigilion scanning even in development and test environments: - # (Note that the callback URL probably won't be reached) - config.loopback = ENV["DISABLE_VIRUS_SCANNER"] == "true" - # Specify different loopback_response (default is 'clean') - # config.loopback_response = 'infected' - config.debug = ENV["DEBUG_VIRUS_SCANNER"] == "true" - config.active_job = ENV["VIRUS_SCANNER_ACTIVE_JOB"] == "true" -end diff --git a/config/initializers/virus_scanner.rb b/config/initializers/virus_scanner.rb new file mode 100644 index 0000000000..65062c5462 --- /dev/null +++ b/config/initializers/virus_scanner.rb @@ -0,0 +1,5 @@ +VirusScanner.configure( + base_url: ENV["VIRUS_SCANNER_URL"], # DBT-scanner URL + username: ENV["VIRUS_SCANNER_USERNAME"], + password: ENV["VIRUS_SCANNER_PASSWORD"], +) diff --git a/doc/architecture/diagrams/application_overview.puml b/doc/architecture/diagrams/application_overview.puml index dc3effb56f..1655ba56e0 100644 --- a/doc/architecture/diagrams/application_overview.puml +++ b/doc/architecture/diagrams/application_overview.puml @@ -12,7 +12,7 @@ System_Boundary(kae, "Kings Awards For Enterprise") { ContainerDb(message_broker, "Message Broker", "Container: Redis", "Provides message broker capability, caching, and background job queue") ContainerDb(application_db, "Application Database", "Container: PostgreSQL", "Provides storage for structured data") ContainerDb(document_storage, "Document Storage", "Container: AWS S3", "Provides document storage capability") - Container(vigilion, "Vigilion", "Container: Ruby", "Provides antivirus and vulnerability scanning") + Container(dbt_scanner, "DBT Scanner", "Container: Ruby", "Provides antivirus and vulnerability scanning") } System_Ext(cloudfront, "AWS CloudFront", "WAF and CDN") @@ -29,9 +29,9 @@ Rel(kae_service, message_broker, "Writes to") Rel(kae_worker, message_broker, "Reads from") Rel(kae_service, application_db, "Reads from and writes to") Rel(kae_service, document_storage, "Reads from and writes to") -Rel(vigilion, document_storage, "Scans documents in") -Rel(kae_service, vigilion, "Sends scan requests to") -Rel(vigilion, kae_service, "Sends scan results to") +Rel(dbt_scanner, document_storage, "Scans documents in") +Rel(kae_service, dbt_scanner, "Sends scan requests to") +Rel(dbt_scanner, kae_service, "Sends scan results to") Rel(kae_service, poxa, "Manages user access to") Rel(kae_service, sentry, "Sends logs, performance metrics and errors to") Rel(sentry, slack, "Sends alerts to") diff --git a/lib/tasks/vigilion_migration.rake b/lib/tasks/vigilion_migration.rake deleted file mode 100644 index d33ae5463e..0000000000 --- a/lib/tasks/vigilion_migration.rake +++ /dev/null @@ -1,25 +0,0 @@ -namespace :vigilion_migration do - desc "copies old VirusScans information into Vigilion columns" - task convert_to_vigilion: :environment do - ActiveRecord::Base.connection.execute <<-EOS - UPDATE form_answer_attachments - SET file_scan_results=scans.status - FROM (select * from scans) as scans - WHERE form_answer_attachments.id = scans.form_answer_attachment_id - EOS - - ActiveRecord::Base.connection.execute <<-EOS - UPDATE audit_certificates - SET attachment_scan_results=scans.status - FROM (select * from scans) as scans - WHERE audit_certificates.id = scans.audit_certificate_id - EOS - - ActiveRecord::Base.connection.execute <<-EOS - UPDATE support_letter_attachments - SET attachment_scan_results=scans.status - FROM (select * from scans) as scans - WHERE support_letter_attachments.id = scans.support_letter_attachment_id - EOS - end -end diff --git a/lib/virus_scanner.rb b/lib/virus_scanner.rb new file mode 100644 index 0000000000..833494290a --- /dev/null +++ b/lib/virus_scanner.rb @@ -0,0 +1,106 @@ +require "net/http" +require "uri" +require "json" +require "tempfile" + +class VirusScanner + class << self + attr_accessor :base_url, :username, :password + + def configure(base_url:, username:, password:) + @base_url = base_url + @username = username + @password = password + end + + def scan_file(file) + return { malware: false, reason: nil, scan_time: 0 } if ENV["DISABLE_VIRUS_SCANNER"] == "true" + + uri = URI.join(@base_url, "/v2/scan-chunked") + request = Net::HTTP::Post.new(uri) + request.basic_auth(@username, @password) + request["Content-Type"] = "application/octet-stream" + request["Transfer-Encoding"] = "chunked" + + temp_file = download_to_tempfile(file) + begin + request.body_stream = temp_file + response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http| + http.request(request) + end + + handle_response(response) + ensure + temp_file.close + temp_file.unlink + end + end + + private + + def download_to_tempfile(file) + temp_file = Tempfile.new("virus_scan", encoding: "UTF-8") + if file.is_a?(String) + File.open(file, "rb") do |f| + IO.copy_stream(f, temp_file) + end + elsif file.class.to_s.include?("Uploader") + IO.copy_stream(StringIO.new(file.read), temp_file) + elsif file.respond_to?(:read) + IO.copy_stream(file, temp_file) + elsif file.respond_to?(:file) + if file.file.respond_to?(:path) + File.open(file.file.path, "rb") do |f| + IO.copy_stream(f, temp_file) + end + elsif file.file.respond_to?(:read) + temp_file.write(file.file.read) + else + raise ArgumentError, "Don't know how to handle #{file.file.class}" + end + elsif file.respond_to?(:path) + File.open(file.path, "rb") do |f| + IO.copy_stream(f, temp_file) + end + else + raise ArgumentError, "Unsupported file type: #{file.class}" + end + temp_file.rewind + temp_file + end + + def handle_response(response) + case response.code.to_i + when 200 + parse_scan_result(response.body) + when 400 + raise BadRequestError, "Invalid request: None or more than one file specified" + when 401 + raise AuthenticationError, "Invalid credentials" + when 413 + raise FileTooLargeError, "File is too large (max 1GB)" + when 500 + raise ScanError, "Unexpected server error" + else + raise ScanError, "Unexpected error: #{response.code} #{response.message}" + end + end + + def parse_scan_result(body) + result = JSON.parse(body) + { + malware: result["malware"], + reason: result["reason"], + scan_time: result["time"], + } + end + end + + class BadRequestError < StandardError; end + + class AuthenticationError < StandardError; end + + class FileTooLargeError < StandardError; end + + class ScanError < StandardError; end +end diff --git a/spec/features/admin/form_answers/remove_audit_certificate_spec.rb b/spec/features/admin/form_answers/remove_audit_certificate_spec.rb index 4d252a4736..8d4fed16ed 100644 --- a/spec/features/admin/form_answers/remove_audit_certificate_spec.rb +++ b/spec/features/admin/form_answers/remove_audit_certificate_spec.rb @@ -49,6 +49,7 @@ let!(:admin) { create(:admin) } before do + allow_any_instance_of(AuditCertificate).to receive(:clean?).and_return(true) login_admin(admin) visit admin_form_answer_path(form_answer) diff --git a/spec/lib/virus_scanner_spec.rb b/spec/lib/virus_scanner_spec.rb new file mode 100644 index 0000000000..063b0c1245 --- /dev/null +++ b/spec/lib/virus_scanner_spec.rb @@ -0,0 +1,107 @@ +require "rails_helper" + +describe VirusScanner do + let(:base_url) { "http://example.com" } + let(:username) { "testuser" } + let(:password) { "testpass" } + let(:file_path) { "/path/to/test/file.txt" } + let(:file_content) { "file content" } + + before do + VirusScanner.configure(base_url: base_url, username: username, password: password) + end + + describe ".configure" do + it "sets the base_url, username, and password" do + expect(VirusScanner.base_url).to eq(base_url) + expect(VirusScanner.username).to eq(username) + expect(VirusScanner.password).to eq(password) + end + end + + describe ".scan_file" do + let(:uri) { URI.join(base_url, "/v2/scan-chunked") } + let(:http_response) { instance_double(Net::HTTPResponse) } + let(:http) { instance_double(Net::HTTP) } + let(:temp_file) { instance_double(Tempfile, close: nil, unlink: nil, path: "/tmp/virus_scan") } + + before do + allow(Tempfile).to receive(:new).and_return(temp_file) + allow(temp_file).to receive(:write) + allow(temp_file).to receive(:rewind) + allow(File).to receive(:open).with(file_path, "rb").and_yield(StringIO.new(file_content)) + allow(IO).to receive(:copy_stream) + allow(Net::HTTP).to receive(:start).and_yield(http) + allow(http).to receive(:request).and_return(http_response) + allow_any_instance_of(Net::HTTP::Post).to receive(:body_stream=) + end + + context "when the scan is successful and no malware is found" do + let(:response_body) { '{"malware": false, "reason": null, "time": 0.16444095800397918}' } + + before do + allow(http_response).to receive(:code).and_return("200") + allow(http_response).to receive(:body).and_return(response_body) + end + + it "returns the parsed scan result" do + result = VirusScanner.scan_file(file_path) + expect(result).to eq({ malware: false, reason: nil, scan_time: 0.16444095800397918 }) + end + end + + context "when the scan is successful and malware is found" do + let(:response_body) { '{"malware": true, "reason": "Win.Test.EICAR_HDB-1", "time": 0.24377495900262147}' } + + before do + allow(http_response).to receive(:code).and_return("200") + allow(http_response).to receive(:body).and_return(response_body) + end + + it "returns the parsed scan result with malware details" do + result = VirusScanner.scan_file(file_path) + expect(result).to eq({ malware: true, reason: "Win.Test.EICAR_HDB-1", scan_time: 0.24377495900262147 }) + end + end + + context "when the request is invalid" do + before do + allow(http_response).to receive(:code).and_return("400") + end + + it "raises a BadRequestError" do + expect { VirusScanner.scan_file(file_path) }.to raise_error(VirusScanner::BadRequestError, "Invalid request: None or more than one file specified") + end + end + + context "when authentication fails" do + before do + allow(http_response).to receive(:code).and_return("401") + end + + it "raises an AuthenticationError" do + expect { VirusScanner.scan_file(file_path) }.to raise_error(VirusScanner::AuthenticationError, "Invalid credentials") + end + end + + context "when the file is too large" do + before do + allow(http_response).to receive(:code).and_return("413") + end + + it "raises a FileTooLargeError" do + expect { VirusScanner.scan_file(file_path) }.to raise_error(VirusScanner::FileTooLargeError, "File is too large (max 1GB)") + end + end + + context "when an unexpected server error occurs" do + before do + allow(http_response).to receive(:code).and_return("500") + end + + it "raises a ScanError" do + expect { VirusScanner.scan_file(file_path) }.to raise_error(VirusScanner::ScanError, "Unexpected server error") + end + end + end +end diff --git a/spec/models/concerns/scan_files_spec.rb b/spec/models/concerns/scan_files_spec.rb new file mode 100644 index 0000000000..42efda88b1 --- /dev/null +++ b/spec/models/concerns/scan_files_spec.rb @@ -0,0 +1,102 @@ +require "rails_helper" + +RSpec.describe ScanFiles do + let(:record) { create(:form_answer_attachment) } + + describe "callbacks" do + describe "#set_scan_results_to_pending" do + it "defines the method" do + record.save + + expect(record.respond_to?(:set_scan_results_to_pending)).to be_truthy + end + + it "calls the method and sets the scan results to pending" do + expect(record).to receive(:set_scan_results_to_pending) + + record.save + + expect(record.reload.file_scan_results).to eq "pending" + end + end + + describe "#perform_virus_scan" do + it "calls the method on create/update" do + expect(record).to receive(:perform_virus_scan) + + record.update(attachable: create(:user)) + end + + it "enqueues a FileScanJob for each file attribute to scan" do + expect(FileScanJob).to receive(:perform_later).with( + { + model: "FormAnswerAttachment", + column: :file, + id: record.id, + }.to_json, + "FormAnswerAttachment", + record.id, + :file, + ) + + record.update(attachable: create(:user)) + end + end + end + + describe ".scan_for_viruses" do + it "defines methods for scanning and cleaning per given attribute" do + # assumes [scan_for_viruses :file] stated in model + expect(record.respond_to?(:scan_file!)).to be_truthy + expect(record.respond_to?(:on_scan_file)).to be_truthy + expect(record.respond_to?(:clean?)).to be_truthy + expect(record.respond_to?(:pending_or_scanning?)).to be_truthy + expect(record.respond_to?(:infected?)).to be_truthy + expect(record.respond_to?(:set_scan_results_to_pending)).to be_truthy + end + end + + describe ".scan_[attr_name]" do + context "when ENV['DISABLE_VIRUS_SCANNER'] == 'true'" do + before do + ENV["DISABLE_VIRUS_SCANNER"] = "true" + allow(record).to receive(:move_to_clean_bucket) + end + + after do + ENV["DISABLE_VIRUS_SCANNER"] = "false" + end + + it "updates [attr_name]_scan_results to clean" do + record.scan_file! + + expect(record.reload.file_scan_results).to eq "clean" + end + + it "calls the move_to_clean_bucket method" do + expect(record).to receive(:move_to_clean_bucket).with(:file) + + record.scan_file! + end + end + + context "when ENV['DISABLE_VIRUS_SCANNER'] == 'false'" do + it "enqueues a FileScanJob and updates [attr_name]_scan_results to scanning" do + expect(FileScanJob).to receive(:perform_later).with( + { + model: "FormAnswerAttachment", + column: :file, + id: record.id, + }.to_json, + "FormAnswerAttachment", + record.id, + "file", + ) + + record.scan_file! + + expect(record.reload.file_scan_results).to eq "scanning" + end + end + end +end diff --git a/spec/models/form_answer_attachment_spec.rb b/spec/models/form_answer_attachment_spec.rb index 3299c5c31f..d2f1b41e7d 100644 --- a/spec/models/form_answer_attachment_spec.rb +++ b/spec/models/form_answer_attachment_spec.rb @@ -12,13 +12,13 @@ context "scan" do it "should scan new file" do - expect_any_instance_of(FormAnswerAttachment).to receive(:scan_file!) + expect_any_instance_of(FormAnswerAttachment).to receive(:perform_virus_scan) create(:form_answer_attachment, question_key: "org_chart") end it "should not scan if the file is infected and removed" do attachment = create(:form_answer_attachment, question_key: "org_chart") - expect_any_instance_of(FormAnswerAttachment).not_to receive(:scan_file_without_cleanup!) + expect_any_instance_of(FormAnswerAttachment).not_to receive(:scan_file!) attachment.remove_file! attachment.save! end From a1c590c7f1c3951739709bd939d5ea6a0241f41d Mon Sep 17 00:00:00 2001 From: phil-l-brockwell Date: Wed, 20 Nov 2024 10:56:18 +0000 Subject: [PATCH 2/2] Remove clean? ambiguity from FileUploader --- app/uploaders/file_uploader.rb | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/app/uploaders/file_uploader.rb b/app/uploaders/file_uploader.rb index 0ac94f3fed..f40169df2b 100644 --- a/app/uploaders/file_uploader.rb +++ b/app/uploaders/file_uploader.rb @@ -5,16 +5,11 @@ class FileUploader < CarrierWave::Uploader::Base storage :custom def store_dir - base_dir = (model.respond_to?(:clean?) && model.clean?) ? "permanent" : "tmp" "uploads/#{base_dir}/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}" end def read - if model.respond_to?(:clean?) && model.clean? - read_from_permanent_storage - else - super - end + clean? ? read_from_permanent_storage : super end def extension_allowlist @@ -26,19 +21,11 @@ def filename end def fog_credentials - if fog_directory.include?("clean") - clean_bucket_credentials - else - tmp_bucket_credentials - end + clean? ? clean_bucket_credentials : tmp_bucket_credentials end def fog_directory - if model.respond_to?(:clean?) && model.clean? - ENV["AWS_S3_PERMANENT_BUCKET"] - else - ENV["AWS_S3_TMP_BUCKET"] - end + clean? ? ENV["AWS_S3_PERMANENT_BUCKET"] : ENV["AWS_S3_TMP_BUCKET"] end def permanent_path @@ -47,6 +34,14 @@ def permanent_path private + def base_dir + clean? ? "permanent" : "tmp" + end + + def clean? + model.respond_to?(:clean?) && model.clean? + end + def read_from_permanent_storage if Rails.env.production? || ENV["ENABLE_VIRUS_SCANNER_BUCKETS"] == "true" permanent_file = CarrierWave::Storage::Fog::File.new(self, permanent_storage, store_path)