Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
[//]: # (comment: Don't forget to update lib/datadog/statsd/version.rb:DogStatsd::Statsd::VERSION when releasing a new version)


## Unreleased

* [BUGFIX] Recover from a wedged UDS socket on Datadog Agent restart.
`BadSocketError` now inherits from a new `Connection::RetryableError`, so
`Connection#write` closes the socket and reconnects instead of dropping
every subsequent metric. [#330][] by [@joshuay03][]

## 5.7.1 / 2025.08.20

* [IMPROVEMENT] Suppress external env if origin detection is configured off. [#316][] by [@StephenWakely][]
Expand Down Expand Up @@ -505,6 +512,7 @@ Future versions are likely to introduce backward incompatibilities with < Ruby 1
[#306]: https://github.com/DataDog/dogstatsd-ruby/issues/306
[#310]: https://github.com/DataDog/dogstatsd-ruby/issues/310
[#316]: https://github.com/DataDog/dogstatsd-ruby/pull/316
[#330]: https://github.com/DataDog/dogstatsd-ruby/pull/330
[@AMekss]: https://github.com/AMekss
[@abicky]: https://github.com/abicky
[@adimitrov]: https://github.com/adimitrov
Expand All @@ -527,6 +535,7 @@ Future versions are likely to introduce backward incompatibilities with < Ruby 1
[@janester]: https://github.com/janester
[@jhawthorn]: https://github.com/jhawthorn
[@jordan-brough]: https://github.com/jordan-brough
[@joshuay03]: https://github.com/joshuay03
[@jtzemp]: https://github.com/jtzemp
[@kazu9su]: https://github.com/kazu9su
[@kazwolfe]: https://github.com/kazwolfe
Expand Down
5 changes: 4 additions & 1 deletion lib/datadog/statsd/connection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
module Datadog
class Statsd
class Connection
class RetryableError < StandardError; end

def initialize(telemetry: nil, logger: nil)
@telemetry = telemetry
@logger = logger
Expand All @@ -25,7 +27,8 @@ def write(payload)
# Try once to reconnect if the socket has been closed
retries ||= 1
if retries <= 1 &&
(boom.is_a?(Errno::ENOTCONN) or
(boom.is_a?(RetryableError) or
boom.is_a?(Errno::ENOTCONN) or
boom.is_a?(Errno::ECONNREFUSED) or
boom.is_a?(IOError) && boom.message =~ /closed stream/i)
retries += 1
Expand Down
5 changes: 1 addition & 4 deletions lib/datadog/statsd/uds_connection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
module Datadog
class Statsd
class UDSConnection < Connection
class BadSocketError < StandardError; end
class BadSocketError < RetryableError; end

# DogStatsd unix socket path
attr_reader :socket_path
Expand Down Expand Up @@ -39,9 +39,6 @@ def send_message(message)
connect unless @socket
@socket.sendmsg_nonblock(message)
rescue Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ENOENT => e
# TODO: FIXME: This error should be considered as a retryable error in the
# Connection class. An even better solution would be to make BadSocketError inherit
# from a specific retryable error class in the Connection class.
raise BadSocketError, "#{e.class}: #{e}"
end
end
Expand Down
115 changes: 57 additions & 58 deletions spec/integrations/connection_edge_case_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,16 @@
let(:fake_socket) do
instance_double(Socket,
connect: true,
sendmsg_nonblock: true
sendmsg_nonblock: true,
close: true
)
end

let(:fake_socket_retry) do
instance_double(Socket,
connect: true,
sendmsg_nonblock: true
sendmsg_nonblock: true,
close: true
)
end

Expand All @@ -262,26 +264,18 @@
subject.write('foobar')
end

it 'retries on the second opened socket' # do
# expect(fake_socket_retry)
# .to receive(:sendmsg_nonblock)
# .with('foobar')

# subject.write('foobar')
# end

# FIXME: BadSocketError is not correctly caught by Connection class to retry
it 'does not correctly retry (1)' do
it 'retries on the second opened socket' do
expect(fake_socket_retry)
.not_to receive(:sendmsg_nonblock)
.to receive(:sendmsg_nonblock)
.with('foobar')

subject.write('foobar')
end

it 'does not correctly retry (2)' do
subject.write('foobar')
it 'closes the original socket before reconnecting' do
expect(fake_socket).to receive(:close)

expect(log.string).to match 'Statsd: Datadog::Statsd::UDSConnection::BadSocketError Errno::ECONNRESET: Connection reset by peer'
subject.write('foobar')
end
end

Expand All @@ -299,8 +293,7 @@
end.not_to raise_error
end

# the mecanism to retry is broken, once it's fixed, this test should pass
it 'logs the error message', pending: true do
it 'logs the error message' do
subject.write('foobar')
expect(log.string).to match 'Statsd: RuntimeError yolo'
end
Expand All @@ -319,8 +312,7 @@
end.not_to raise_error
end

# the mecanism to retry is broken, once it's fixed, this test should pass
it 'logs the error message', pending: true do
it 'logs the error message' do
subject.write('foobar')
expect(log.string).to match 'Statsd: SocketError yolo'
end
Expand All @@ -344,26 +336,18 @@
subject.write('foobar')
end

it 'retries on the second opened socket' # do
# expect(fake_socket_retry)
# .to receive(:sendmsg_nonblock)
# .with('foobar')

# subject.write('foobar')
# end

# FIXME: BadSocketError is not correctly caught by Connection class to retry
it 'does not correctly retry (1)' do
it 'retries on the second opened socket' do
expect(fake_socket_retry)
.not_to receive(:sendmsg_nonblock)
.to receive(:sendmsg_nonblock)
.with('foobar')

subject.write('foobar')
end

it 'does not correctly retry (2)' do
subject.write('foobar')
it 'closes the original socket before reconnecting' do
expect(fake_socket).to receive(:close)

expect(log.string).to match 'Statsd: Datadog::Statsd::UDSConnection::BadSocketError Errno::ECONNREFUSED: Connection refused - closed stream'
subject.write('foobar')
end
end

Expand All @@ -381,8 +365,7 @@
end.not_to raise_error
end

# the mecanism to retry is broken, once it's fixed, this test should pass
it 'logs the error message', pending: true do
it 'logs the error message' do
subject.write('foobar')
expect(log.string).to match 'Statsd: RuntimeError yolo'
end
Expand All @@ -401,48 +384,64 @@
end.not_to raise_error
end

# the mecanism to retry is broken, once it's fixed, this test should pass
it 'logs the error message', pending: true do
it 'logs the error message' do
subject.write('foobar')
expect(log.string).to match 'Errno::ECONNREFUSED Connection refused - yolo'
expect(log.string).to match 'Statsd: Datadog::Statsd::UDSConnection::BadSocketError Errno::ECONNREFUSED: Connection refused - yolo'
end
end
end
end

context 'when there is no socket (drop strategy)' do
context 'when the socket file is missing (retry strategy)' do
before do
allow(fake_socket)
.to receive(:sendmsg_nonblock)
.and_raise(Errno::ENOENT)
end

it 'sends using the first socket' do
expect(fake_socket)
.to receive(:sendmsg_nonblock)
.with('foobar')
context 'when retrying is working' do
it 'tries with the initial socket' do
expect(fake_socket)
.to receive(:sendmsg_nonblock)
.with('foobar')

subject.write('foobar')
end
subject.write('foobar')
end

it 'retries on the second opened socket' do
expect(fake_socket_retry)
.to receive(:sendmsg_nonblock)
.with('foobar')

it 'ignores the writing failure (message dropped)' do
expect do
subject.write('foobar')
end.not_to raise_error
end
end

it 'does not retry to send message' do
expect(fake_socket_retry)
.not_to receive(:sendmsg_nonblock)
it 'closes the original socket before reconnecting' do
expect(fake_socket).to receive(:close)

subject.write('foobar')
subject.write('foobar')
end
end

# TODO: FIXME: we got to exclude the Errno::ENOENT for the retry strategy
it 'logs the error message', pending: true do
subject.write('foobar')
context 'when retrying fails' do
context 'because the socket file is still missing' do
before do
allow(fake_socket_retry)
.to receive(:sendmsg_nonblock)
.and_raise(Errno::ENOENT)
end

expect(log.string).to match 'Statsd: Errno::ENOENT No such file or directory'
it 'ignores the connection failure' do
expect do
subject.write('foobar')
end.not_to raise_error
end

it 'logs the error message' do
subject.write('foobar')
expect(log.string).to match 'Statsd: Datadog::Statsd::UDSConnection::BadSocketError Errno::ENOENT: No such file or directory'
end
end
end
end

Expand Down
Loading