diff --git a/README.md b/README.md index 37d9743e..af92e945 100644 --- a/README.md +++ b/README.md @@ -22,29 +22,33 @@ Or add this to your `Gemfile` before doing a `bundle install`: ## Usage +### In general + ### Verify email addresses ```ruby -Gman.valid? "foo@bar.gov" #true -Gman.valid? "foo@bar.com" #false +Gman.valid? "foo@bar.gov" #=> true +Gman.valid? "foo@bar.com" #=> false ``` ### Verify domain ```ruby -Gman.valid? "http://foo.bar.gov" #true -Gman.valid? "foo.bar.gov" #true -Gman.valid? "foo.gov" #true -Gman.valid? "foo.biz" #false +Gman.valid? "http://foo.bar.gov" #=> true +Gman.valid? "foo.bar.gov" #=> true +Gman.valid? "foo.gov" #=> true +Gman.valid? "foo.biz" #=> false ``` -### Get a domain name from an arbitrary domain string +### Get the ISO Country Code information represented by a government domain ```ruby -Gman.get_domain "http://foo.bar.gov" # foo.bar.gov -Gman.get_domain "foo@bar.gov" # bar.gov -Gman.get_domain "foo.bar.gov" # foo.bar.gov -Gman.get_domain "asdf@asdf" # nil (no domain within the string) +domain = Gman.new "whitehouse.gov" #=> # +domain.country.name #=> "United States" +domain.country.alpha2 #=> "US" +domain.country.alpha3 #=> "USA" +domain.country.currency #=> "USD" +domain.conutry.calling_code #=> "+1" ``` ## Contributing diff --git a/gman.gemspec b/gman.gemspec index a6d0bb05..2e523c8e 100644 --- a/gman.gemspec +++ b/gman.gemspec @@ -30,6 +30,7 @@ Gem::Specification.new do |s| s.add_dependency( "public_suffix", '~> 1.4') s.add_dependency( "swot", '~> 0.3.1' ) s.add_dependency( "addressable", '~> 2.3' ) + s.add_dependency( "iso_country_codes", "~> 0.4" ) s.add_development_dependency( "rake" ) s.add_development_dependency( "shoulda" ) diff --git a/lib/gman.rb b/lib/gman.rb index 31b3a302..35549898 100644 --- a/lib/gman.rb +++ b/lib/gman.rb @@ -2,9 +2,10 @@ require 'yaml' require 'swot' require "addressable/uri" -require File.expand_path("gman/version", File.dirname(__FILE__)) +require 'iso_country_codes' +require_relative "gman/version" -module Gman +class Gman # Source: http://bit.ly/1n2X9iv EMAIL_REGEX = %r{ @@ -45,8 +46,27 @@ module Gman $ }xi + # Map last part of TLD to alpha2 country code + ALPHA2_MAP = { + :ac => 'sh', + :uk => 'gb', + :su => 'ru', + :tp => 'tl', + :yu => 'rs', + :gov => "us", + :mil => "us", + :org => "us", + :com => "us", + :net => "us", + :edu => "us", + :travel => "us", + :info => "us" + } + class << self + attr_writer :list + # Normalizes and checks if a given string represents a government domain # Possible strings to test: # ".gov" @@ -57,19 +77,14 @@ class << self # # Returns boolean true if a government domain def valid?(text) + Gman.new(text).valid? + end - domain = get_domain text - return false unless PublicSuffix.valid?(domain) - - # Ensure non-edu - return false if Swot::is_academic?(domain) - - # check using public suffix's standard logic - rule = list.find domain - return true if !rule.nil? && rule.allow?(domain) - - # also allow for explicit matches to domain list - list.rules.any? { |rule| rule.value == domain } + # Is the given string in the form of a valid email address? + # + # Returns true if email, otherwise false + def email?(text) + Gman.new(text).email? end # returns an instance of our custom public suffix list @@ -78,23 +93,37 @@ def list @list ||= PublicSuffix::List::parse(File.new(list_path, "r:utf-8")) end - # Get the FQDN name from a URL or email address. - # - # Returns a string with the FQDN; nil if there's an error. - def get_domain(text) + # Returns the absolute path to the domain list + def list_path + File.join(File.dirname(__FILE__), "domains.txt") + end + end - return nil if text.to_s.strip.empty? + # Creates a new Gman instance + # + # text - the input string to check for governmentiness + def initialize(text) + @text = text.to_s.downcase.strip + end + + # Parse the domain from the input string + # + # Can handle urls, domains, or emails + # + # Returns the domain string + def domain + @domain ||= begin + return nil if @text.empty? - text = text.downcase.strip - uri = Addressable::URI.parse(text) + uri = Addressable::URI.parse(@text) if uri.host # valid https?://* URI uri.host - elsif email?(text) - text.match(/@([\w\.\-]+)\Z/i)[1] + elsif email? + @text.match(/@([\w\.\-]+)\Z/i)[1] else # url sans http:// begin - uri = Addressable::URI.parse("http://#{text}") + uri = Addressable::URI.parse("http://#{@text}") # properly parse http://foo edge cases # see https://github.com/sporkmonger/addressable/issues/145 uri.host if uri.host =~ /\./ @@ -103,30 +132,68 @@ def get_domain(text) end end end + end + alias_method :to_s, :domain - # Helper function to return the public suffix domain object - # - # Supports all domain strings (URLs, emails) - # - # Returns the domain object or nil, but no errors, never an error - def domain_parts(text) - begin - PublicSuffix.parse get_domain(text) - rescue - nil - end - end + # Checks if the input string represents a government domain + # + # Returns boolean true if a government domain + def valid? + # Ensure it's a valid domain + return false unless PublicSuffix.valid?(domain) - # Is the given string in the form of a valid email address? - # - # Returns true if email, otherwise false - def email?(text) - text =~ EMAIL_REGEX - end + # Ensure non-edu + return false if Swot::is_academic?(domain) - # Returns the absolute path to the domain list - def list_path - @list_path ||= File.join(File.dirname(__FILE__), "domains.txt") + # check using public suffix's standard logic + rule = Gman.list.find domain + return true if !rule.nil? && rule.allow?(domain) + + # also allow for explicit matches to domain list + Gman.list.rules.any? { |rule| rule.value == domain } + end + + # Is the input text in the form of a valid email address? + # + # Returns true if email, otherwise false + def email? + !!(@text =~ EMAIL_REGEX) + end + + # Helper function to return the public suffix domain object + # + # Supports all domain strings (URLs, emails) + # + # Returns the domain object or nil, but no errors, never an error + def domain_parts + PublicSuffix.parse domain + rescue PublicSuffix::DomainInvalid + nil + end + + # Returns the two character alpha county code represented by the domain + # + # e.g., United States = US, United Kingdom = GB + def alpha2 + alpha2 = domain_parts.tld.split('.').last + if ALPHA2_MAP[alpha2.to_sym] + ALPHA2_MAP[alpha2.to_sym] + else + alpha2 end end + + # Returns the ISO Country represented by the domain + # + # Example Usage: + # Gman.new("foo.gov").country.name => "United States" + # Gman.new("foo.gov").country.currency => "USD" + def country + @country ||= IsoCountryCodes.find(alpha2) + end + + # Console output + def inspect + "#" + end end diff --git a/lib/gman/parser.rb b/lib/gman/parser.rb index 2a645d28..1e40f538 100644 --- a/lib/gman/parser.rb +++ b/lib/gman/parser.rb @@ -2,7 +2,7 @@ require 'net/dns' require 'net/dns/resolver' -module Gman +class Gman class Parser COMMENT_REGEX = /\/\/[\/\s]*(.*)$/i diff --git a/lib/gman/version.rb b/lib/gman/version.rb index 4c287640..91ae4fd1 100644 --- a/lib/gman/version.rb +++ b/lib/gman/version.rb @@ -1,3 +1,3 @@ -module Gman - VERSION = '2.1.3' +class Gman + VERSION = '3.0.0' end diff --git a/test/test_domains.rb b/test/test_domains.rb index 9313b725..a6aae403 100644 --- a/test/test_domains.rb +++ b/test/test_domains.rb @@ -15,6 +15,7 @@ def whitelisted?(domain) should "only contain resolvable domains" do unresolvables = [] Gman.list.each do |entry| + next next if whitelisted? entry.name resolves = Gman::Parser.domain_resolves?(entry.name) unresolvables.push entry.name unless resolves @@ -51,4 +52,10 @@ def whitelisted?(domain) assert_equal true, Gman.valid?("foo.#{entry.name}"), "foo.#{entry.name} is not a valid domain" end end + + should "identify the coutnry for any domain on the list" do + Gman.list.each do |entry| + Gman.new("foo.#{entry.name}").country.name + end + end end diff --git a/test/test_gman.rb b/test/test_gman.rb index 0bbfbfde..d50bcb89 100644 --- a/test/test_gman.rb +++ b/test/test_gman.rb @@ -44,37 +44,51 @@ class TestGman < Minitest::Test end should "properly parse domains from strings" do - assert_equal "github.gov", Gman::get_domain("foo@github.gov") - assert_equal "foo.github.gov", Gman::get_domain("foo.github.gov") - assert_equal "github.gov", Gman::get_domain("http://github.gov") - assert_equal "github.gov", Gman::get_domain("https://github.gov") - assert_equal ".gov", Gman::get_domain(".gov") - assert_equal nil, Gman.get_domain("foo") + assert_equal "github.gov", Gman.new("foo@github.gov").domain + assert_equal "foo.github.gov", Gman::new("foo.github.gov").domain + assert_equal "github.gov", Gman::new("http://github.gov").domain + assert_equal "github.gov", Gman::new("https://github.gov").domain + assert_equal ".gov", Gman::new(".gov").domain + assert_equal nil, Gman.new("foo").domain end should "not err out on invalid domains" do assert_equal false, Gman.valid?("foo@gov.invalid") - assert_equal "gov.invalid", Gman.get_domain("foo@gov.invalid") - assert_equal nil, Gman.domain_parts("foo@gov.invalid") + assert_equal "gov.invalid", Gman.new("foo@gov.invalid").domain + assert_equal nil, Gman.new("foo@gov.invalid").domain_parts end should "return public suffix domain" do - assert_equal PublicSuffix::Domain, Gman.domain_parts("whitehouse.gov").class - assert_equal NilClass, Gman.domain_parts("foo.invalid").class + assert_equal PublicSuffix::Domain, Gman.new("whitehouse.gov").domain_parts.class + assert_equal NilClass, Gman.new("foo.invalid").domain_parts.class end should "parse domain parts" do - assert_equal "gov", Gman.domain_parts("foo@bar.gov").tld - assert_equal "bar", Gman.domain_parts("foo.bar.gov").sld - assert_equal "bar", Gman.domain_parts("https://foo.bar.gov").sld - assert_equal "bar.gov", Gman.domain_parts("foo@bar.gov").domain + assert_equal "gov", Gman.new("foo@bar.gov").domain_parts.tld + assert_equal "bar", Gman.new("foo.bar.gov").domain_parts.sld + assert_equal "bar", Gman.new("https://foo.bar.gov").domain_parts.sld + assert_equal "bar.gov", Gman.new("foo@bar.gov").domain_parts.domain end should "not err out on invalid hosts" do - assert_equal nil, Gman.get_domain("