Skip to content

Commit

Permalink
Merge pull request #5 from lyrasis/csv-testing
Browse files Browse the repository at this point in the history
Add automated CSV EOL test utility
  • Loading branch information
kspurgin authored Dec 11, 2024
2 parents 71f1cdf + 0925819 commit 6eeba10
Show file tree
Hide file tree
Showing 6 changed files with 193 additions and 48 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ jobs:
- uses: ruby/setup-ruby@v1
with:
bundler-cache: true
ruby-version: "3.3"
# Setting this later than 3.1 requires use of anonymous positional
# arguments forwarding, which breaks all versions earlier than 3.2
# in our test matrix
ruby-version: "3.1"
- name: Install dependencies
run: bundle install
- name: Run the tests
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ csvlint-earl.ttl
.byebug_history

gemfiles/*.lock
/util/csv_testing.csv
1 change: 1 addition & 0 deletions .standard.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruby_version: 3.1
42 changes: 21 additions & 21 deletions csvlint.gemspec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
lib = File.expand_path("../lib", __FILE__)
lib = File.expand_path("lib", __dir__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require "csvlint/version"

Expand All @@ -18,35 +18,35 @@ Gem::Specification.new do |spec|

spec.required_ruby_version = [">= 2.5", "< 3.4"]

spec.add_dependency "csv"
spec.add_dependency "rainbow"
spec.add_dependency "open_uri_redirections"
spec.add_dependency "activesupport"
spec.add_dependency "addressable"
spec.add_dependency "typhoeus"
spec.add_dependency "csv"
spec.add_dependency "escape_utils"
spec.add_dependency "uri_template"
spec.add_dependency "thor"
spec.add_dependency "rack"
spec.add_dependency "net-http-persistent"
spec.add_dependency "open_uri_redirections"
spec.add_dependency "rack"
spec.add_dependency "rainbow"
spec.add_dependency "thor"
spec.add_dependency "typhoeus"
spec.add_dependency "uri_template"

spec.add_development_dependency "appraisal"
spec.add_development_dependency "aruba"
spec.add_development_dependency "bundler", ">= 1.3"
spec.add_development_dependency "rake"
spec.add_development_dependency "cucumber"
spec.add_development_dependency "simplecov"
spec.add_development_dependency "simplecov-rcov"
spec.add_development_dependency "spork"
spec.add_development_dependency "webmock"
spec.add_development_dependency "rspec"
spec.add_development_dependency "rspec-pride"
spec.add_development_dependency "rspec-expectations"
spec.add_development_dependency "coveralls"
spec.add_development_dependency "byebug"
spec.add_development_dependency "coveralls"
spec.add_development_dependency "cucumber"
spec.add_development_dependency "github_changelog_generator"
spec.add_development_dependency "aruba"
spec.add_development_dependency "henry"
spec.add_development_dependency "rake"
spec.add_development_dependency "rdf", "< 4.0"
spec.add_development_dependency "rdf-turtle"
spec.add_development_dependency "henry"
spec.add_development_dependency "rspec"
spec.add_development_dependency "rspec-expectations"
spec.add_development_dependency "rspec-pride"
spec.add_development_dependency "simplecov"
spec.add_development_dependency "simplecov-rcov"
spec.add_development_dependency "spork"
spec.add_development_dependency "standardrb"
spec.add_development_dependency "appraisal"
spec.add_development_dependency "webmock"
end
72 changes: 46 additions & 26 deletions spec/validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
end

it "should validate from a URL" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")

expect(validator.valid?).to eql(true)
Expand All @@ -19,7 +20,8 @@
end

it "should validate from a file path" do
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures",
"valid.csv")))

expect(validator.valid?).to eql(true)
expect(validator.instance_variable_get(:@expected_columns)).to eql(3)
Expand All @@ -28,7 +30,8 @@
end

it "should validate from a file path including whitespace" do
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "white space in filename.csv")))
validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__), "..", "features", "fixtures",
"white space in filename.csv")))

expect(validator.valid?).to eql(true)
end
Expand All @@ -40,7 +43,7 @@
end

context "validation with multiple lines: " do
# TODO multiple lines permits testing of warnings
# TODO: multiple lines permits testing of warnings
# TODO need more assertions in each test IE @formats
# TODO the phrasing of col_counts if only consulting specs might be confusing
# TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
Expand All @@ -53,7 +56,7 @@
validator = Csvlint::Validator.new(data)

expect(validator.valid?).to eql(true)
# TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
# TODO: would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
# TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
expect(validator.instance_variable_get(:@expected_columns)).to eql(3)
expect(validator.instance_variable_get(:@col_counts).count).to eql(4)
Expand Down Expand Up @@ -221,7 +224,7 @@
expect(validator.errors.first.type).to eql(:unclosed_quote)
end

# TODO stray quotes is not covered in any spec in this library
# TODO: stray quotes is not covered in any spec in this library
# it "checks for stray quotes" do
# stream = "\"a\",“b“,\"c\"" "\r\n"
# validator = Csvlint::Validator.new(stream)
Expand All @@ -241,7 +244,7 @@
end

it "returns line break errors if incorrectly specified" do
# TODO the logic for catching this error message is very esoteric
# TODO: the logic for catching this error message is very esoteric
stream = "\"a\",\"b\",\"c\"\n"
validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
expect(validator.valid?).to eql(false)
Expand All @@ -255,7 +258,7 @@
data = StringIO.new("minimum, minimum")
validator = Csvlint::Validator.new(data)
validator.reset
expect(validator.validate_header(["minimum", "minimum"])).to eql(true)
expect(validator.validate_header(%w[minimum minimum])).to eql(true)
expect(validator.warnings.size).to eql(1)
expect(validator.warnings.first.type).to eql(:duplicate_column_name)
expect(validator.warnings.first.category).to eql(:schema)
Expand Down Expand Up @@ -338,7 +341,7 @@

validator = Csvlint::Validator.new("http://example.com/example.csv")

rows.each_with_index do |row, i|
rows.each_with_index do |row, _i|
validator.build_formats(row)
end

Expand All @@ -354,7 +357,7 @@

validator = Csvlint::Validator.new("http://example.com/example.csv")

rows.each_with_index do |row, i|
rows.each_with_index do |row, _i|
validator.build_formats(row)
end

Expand Down Expand Up @@ -415,7 +418,7 @@
end
end

# TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
# TODO: the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
context "when detecting headers" do
it "should default to expecting a header" do
validator = Csvlint::Validator.new("http://example.com/example.csv")
Expand All @@ -436,21 +439,24 @@
end

it "should look in content-type for header=absent" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=absent"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=absent"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(false)
expect(validator.errors.size).to eql(0)
end

it "should look in content-type for header=present" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv; header=present"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(true)
expect(validator.errors.size).to eql(0)
end

it "assume header present if not specified in content type" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(true)
expect(validator.errors.size).to eql(0)
Expand All @@ -459,7 +465,8 @@
end

it "give wrong content type error if content type is wrong" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/html"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/html"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.header?).to eql(true)
expect(validator.errors.size).to eql(1)
Expand Down Expand Up @@ -504,30 +511,37 @@
end

it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.valid?).to eql(true)
end

it "should be valid if we have a dialect and the data is from the web" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
# header defaults to true in csv dialect, so this is valid
validator = Csvlint::Validator.new("http://example.com/example.csv", {})
expect(validator.valid?).to eql(true)

stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header" => true})
expect(validator.valid?).to eql(true)

stub_request(:get, "http://example.com/example.csv").to_return(status: 200, headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv"}, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
validator = Csvlint::Validator.new("http://example.com/example.csv", {"header" => false})
expect(validator.valid?).to eql(true)
end
end

context "accessing metadata" do
before :all do
stub_request(:get, "http://example.com/crlf.csv").to_return(status: 200, body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "windows-line-endings.csv")))
stub_request(:get, "http://example.com/crlf.csv").to_return(status: 200,
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "windows-line-endings.csv"
)))
stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(status: 404)
end

Expand All @@ -540,33 +554,39 @@
it "should give access to the complete CSV data file" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"},
body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"
)))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.valid?).to eql(true)
data = validator.data

expect(data.count).to eql 3
expect(data[0]).to eql ["Foo", "Bar", "Baz"]
expect(data[2]).to eql ["3", "2", "1"]
expect(data[0]).to eql %w[Foo Bar Baz]
expect(data[2]).to eql %w[3 2 1]
end

it "should count the total number of rows read" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"},
body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"
)))
validator = Csvlint::Validator.new("http://example.com/example.csv")
expect(validator.row_count).to eq(3)
end

it "should limit number of lines read" do
stub_request(:get, "http://example.com/example.csv").to_return(status: 200,
headers: {"Content-Type" => "text/csv; header=present"},
body: File.read(File.join(File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv")))
body: File.read(File.join(
File.dirname(__FILE__), "..", "features", "fixtures", "valid.csv"
)))
validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
expect(validator.valid?).to eql(true)
data = validator.data
expect(data.count).to eql 2
expect(data[0]).to eql ["Foo", "Bar", "Baz"]
expect(data[0]).to eql %w[Foo Bar Baz]
end

context "with a lambda" do
Expand Down
Loading

0 comments on commit 6eeba10

Please sign in to comment.