Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ GIT

GIT
remote: https://github.com/MothOnMars/robotex
revision: e51e6085d21ba532aaa3d54e979c92090c842af4
revision: 7e91000a3592e96ead47591e6c5c15233c19ab43
specs:
robotex (1.0.0)
open_uri_redirections (~> 0.2.1)

GIT
remote: https://github.com/MothOnMars/sitemaps
Expand Down Expand Up @@ -433,6 +434,7 @@ GEM
nokogumbo (1.5.0)
nokogiri
oj (3.3.10)
open_uri_redirections (0.2.1)
os (0.9.6)
paperclip (5.2.1)
activemodel (>= 4.2.0)
Expand Down
7 changes: 7 additions & 0 deletions app/models/searchgov_domain.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ class SearchgovDomain < ActiveRecord::Base

attr_readonly :domain

def delay
@delay ||= begin
robotex = Robotex.new 'usasearch'
robotex.delay("http://#{domain}/") || 1
end
end

private

def valid_domain?
Expand Down
43 changes: 43 additions & 0 deletions spec/models/searchgov_domain_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,47 @@
end
end
end

describe '#delay' do
subject(:delay) { searchgov_domain.delay }

before do
stub_request(:get, "http://#{domain}/robots.txt").
to_return(status: [200, "OK"], headers: { content_type: 'text/plain' }, body: robots)
end

context 'when a delay is specified in robots.txt' do
let(:robots) { "User-agent: *\nCrawl-delay: 10" }

it { is_expected.to eq 10 }

context 'when the domain is redirected' do
before do
stub_request(:get, "http://#{domain}/robots.txt").
to_return(status: 301, headers: { location: "https://#{domain}/robots.txt" }, body: "")
stub_request(:get, "https://#{domain}/robots.txt").
to_return(status: [200, "OK"], headers: { content_type: 'text/plain' }, body: robots)
end

it { is_expected.to eq 10 }
end
end

context 'when no delay is specified' do
let(:robots) { "User-agent: *\nDisallow: /somedir/" }

it 'defaults to 1' do
expect(delay).to eq 1
end
end

context 'when a delay is specified for the "usasearch" user agent' do
let(:robots) { "User-agent: *\nCrawl-delay: 10\nUser-agent: usasearch\nCrawl-delay: 2" }

# This needs to be fixed in the robotex gem:
# https://github.com/chriskite/robotex/issues/9
# https://www.pivotaltracker.com/story/show/157329443
xit { is_expected.to eq 2 }
end
end
end