Skip to content

Commit

Permalink
Initial Bandit.Trace implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
mtrudel committed Jan 10, 2025
1 parent ab734a4 commit 27c4b5c
Showing 1 changed file with 147 additions and 0 deletions.
147 changes: 147 additions & 0 deletions lib/bandit/trace.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
defmodule Bandit.Trace do
@moduledoc """
Helper functions to provide visibility into a running Bandit instance
Intended to be used within an IEx session attached to a running Bandit instance, as follows:
```
iex> Bandit.Trace.start_tracing()
... # Wait for traces to show up whenever exceptions are raised
iex> Bandit.Trace.stop_tracing()
```
By default, `Bandit.Trace` will emit a trace on every exception that is Bandit sees (both those
emitted from within your Plug and also those that are internal to Bandit). These traces consist
of a complete dump of all telemetry events that occur in the offending request's parent
connection.
Tracing imposes a modest but non-zero load; it *should* be safe to run in most production
environments, but it is not intended to run on an ongoing basis.
By default, `Bandit.Trace` maintains a FIFO log of the last 10000 telemetry events that Bandit
has emitted. Events which correlate to the parent connection which have been evicted from this
queue will not be included in this output.
**WARNING** The emitted logs contains a *complete* copy of your request's Plug data, as well as *all* data
sent and received on all requests which are contained in the output. It is therefore of the utmost
importance that you carefully redact the output before sharing it publicly.
"""

defstruct queue: nil, size: 0, max_size: 10_000, trace_on_exception: true

use GenServer

require Logger

@events [
[:bandit, :request, :start],
[:bandit, :request, :stop],
[:bandit, :request, :exception],
[:bandit, :websocket, :start],
[:bandit, :websocket, :stop],
[:thousand_island, :connection, :start],
[:thousand_island, :connection, :stop],
[:thousand_island, :connection, :ready],
[:thousand_island, :connection, :async_recv],
[:thousand_island, :connection, :recv],
[:thousand_island, :connection, :recv_error],
[:thousand_island, :connection, :send],
[:thousand_island, :connection, :send_error],
[:thousand_island, :connection, :sendfile],
[:thousand_island, :connection, :sendfile_error],
[:thousand_island, :connection, :socket_shutdown]
]

@doc """
Start tracing of all Bandit requests
See module documentation for intended usage. Accepts the following options:
* `max_size`: The size of the telemetry event queue to maintain. By default, `Bandit.Trace` maintains a
queue of the last 10000 telemetry events
* `trace_on_exception`: Whether or not to emit traces when an error is raised within
Bandit. Defaults to `true`
"""
def start_tracing(opts \\ []), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__)

@doc """
Stop any active trace session
"""
def stop_tracing(), do: GenServer.stop(__MODULE__)

Check warning on line 70 in lib/bandit/trace.ex

View workflow job for this annotation

GitHub Actions / lint / lint (1.17.x, 27.x)

Do not use parentheses when defining a function which has no arguments.

def handle_event(event, measurements, metadata, pid),
do: GenServer.cast(pid, {:event, {event, measurements, metadata, :os.perf_counter()}})

@doc """
Return the complete queue of telemetry events that `Bandit.Trace` is currently tracking
"""
def get_events, do: GenServer.call(__MODULE__, :get_events)

@impl GenServer
def init(opts) do
:telemetry.attach_many(self(), @events, &__MODULE__.handle_event/4, self())
{:ok, struct!(%__MODULE__{queue: :queue.new()}, opts)}
end

@impl GenServer
def terminate(_, _), do: :telemetry.detach(self())

@impl GenServer
def handle_cast({:event, event}, state) do
state
|> maybe_pop()
|> push(event)
|> tap(&maybe_trace(&1, event))
|> then(&{:noreply, &1})
end

defp maybe_pop(%{size: size, max_size: max_size} = state) when size >= max_size,
do: maybe_pop(%{state | queue: :queue.drop(state.queue), size: size - 1})

defp maybe_pop(state), do: state

defp push(state, event),
do: %{state | queue: :queue.in(event, state.queue), size: state.size + 1}

defp maybe_trace(
%{trace_on_exception: true} = state,
{[:bandit, :request, :exception], _, metadata, _}
) do
connection_span_context = Map.get(metadata, :connection_telemetry_span_context)

IO.puts("=====================================")
IO.puts("Starting telemetry trace for exception")
IO.puts("=====================================")

:queue.to_list(state.queue)
|> Enum.filter(fn {_, _, metadata, _} ->
Map.get(metadata, :telemetry_span_context) == connection_span_context ||
Map.get(metadata, :connection_telemetry_span_context) == connection_span_context
end)
|> format_list()
|> IO.inspect(limit: :infinity, pretty: true, printable_limit: :infinity)

Check warning on line 122 in lib/bandit/trace.ex

View workflow job for this annotation

GitHub Actions / lint / lint (1.17.x, 27.x)

There should be no calls to `IO.inspect/1`.

IO.puts("======================================")
IO.puts("Completed telemetry trace for exception")
IO.puts("======================================")

:ok
end

defp maybe_trace(_state, _event), do: :ok

@impl GenServer
def handle_call(:get_events, _from, state),
do: {:reply, :queue.to_list(state.queue) |> format_list(), state}

defp format_list([]), do: :ok

defp format_list([{_, _, _, start_time} | _] = events),
do: Enum.map(events, &format_tuple(&1, start_time))

defp format_tuple({event, measurements, metadata, time}, start_time) do
time = :erlang.convert_time_unit(time - start_time, :perf_counter, :microsecond)
%{telemetry_span_context: span_id} = metadata
{time, span_id, event, measurements, metadata}
end
end

0 comments on commit 27c4b5c

Please sign in to comment.