Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 94 additions & 47 deletions extra/lib/plausible/stats/exploration.ex
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@ defmodule Plausible.Stats.Exploration do
@max_steps 20
@max_candidates 20

@next_steps_defaults [search_term: "", direction: :forward, max_candidates: 10]
@next_steps_defaults [
search_term: "",
direction: :forward,
max_candidates: 10,
include_wildcard?: true
]

@spec max_steps() :: pos_integer()
def max_steps, do: @max_steps
Expand All @@ -84,10 +89,11 @@ defmodule Plausible.Stats.Exploration do
direction = Keyword.fetch!(opts, :direction)
search_term = Keyword.fetch!(opts, :search_term)
max_candidates = min(Keyword.fetch!(opts, :max_candidates), @max_candidates)
include_wilcard? = Keyword.fetch!(opts, :include_wildcard?)

query
|> Base.base_event_query()
|> next_steps_query(journey, search_term, direction, max_candidates)
|> next_steps_query(journey, search_term, direction, max_candidates, include_wilcard?)
# We pass the query struct to record query metadata for
# the CH debug console.
|> ClickhouseRepo.all(query: query)
Expand Down Expand Up @@ -133,38 +139,60 @@ defmodule Plausible.Stats.Exploration do
* `:max_steps` - maximum number of funnel steps to build (default: `6`)
* `:max_candidates` - passed to `next_steps/3`, limiting
how many candidate next steps are fetched per step (default: `10`)
* `:include_wildcard?` - passed to `next_steps/3`, deciding whether
to include implicit wildcard pathnames in suggestions or not
(default: true)
"""
@spec interesting_funnel(Query.t(), keyword()) ::
{:ok, [funnel_step()]} | {:error, :not_found}
def interesting_funnel(query, opts \\ []) do
max_steps = min(Keyword.get(opts, :max_steps, 6), @max_steps)
max_candidates = min(Keyword.get(opts, :max_candidates, 10), @max_candidates)

case build_interesting_journey(query, max_steps, max_candidates) do
include_wildcard? =
Keyword.get(
opts,
:include_wildcard?,
Keyword.fetch!(@next_steps_defaults, :include_wildcard?)
)

case build_interesting_journey(query, max_steps, max_candidates, include_wildcard?) do
[] -> {:error, :not_found}
journey -> journey_funnel(query, journey)
end
end

defp build_interesting_journey(query, max_steps, max_candidates) do
do_build_journey(query, [], MapSet.new(), max_steps, max_candidates)
defp build_interesting_journey(query, max_steps, max_candidates, include_wildcard?) do
do_build_journey(query, [], MapSet.new(), max_steps, max_candidates, include_wildcard?)
end

defp do_build_journey(_query, journey, _seen, max_steps, _max_candidates)
defp do_build_journey(_query, journey, _seen, max_steps, _max_candidates, _include_wildcard?)
when length(journey) >= max_steps do
journey
end

defp do_build_journey(query, journey, seen, max_steps, max_candidates) do
{:ok, candidates} = next_steps(query, journey, max_candidates: max_candidates)
defp do_build_journey(query, journey, seen, max_steps, max_candidates, include_wildcard?) do
{:ok, candidates} =
next_steps(query, journey,
max_candidates: max_candidates,
include_wildcard?: include_wildcard?
)

case find_unseen_step(candidates, seen) do
nil ->
journey

step ->
new_seen = MapSet.put(seen, normalize_step_key(step))
do_build_journey(query, journey ++ [step], new_seen, max_steps, max_candidates)

do_build_journey(
query,
journey ++ [step],
new_seen,
max_steps,
max_candidates,
include_wildcard?
)
end
end

Expand All @@ -181,14 +209,7 @@ defmodule Plausible.Stats.Exploration do
defp normalize_pathname("/"), do: "/"
defp normalize_pathname(pathname), do: String.trim_trailing(pathname, "/")

@wildcard_array_join """
if(? = 'pageview', arrayFold(
acc, x -> arrayPushBack(acc, concat(acc[-1], '/', x)),
arraySlice(splitByChar('/', ?) AS split_pathname, 2),
arraySlice(split_pathname, 1, 1)), [?])
"""

defp next_steps_query(query, steps, search_term, direction, max_candidates)
defp next_steps_query(query, steps, search_term, direction, max_candidates, include_wildcard?)
when is_direction(direction) do
next_step_idx = length(steps) + 1
q_steps = steps_query(query, next_step_idx, direction)
Expand All @@ -214,41 +235,13 @@ defmodule Plausible.Stats.Exploration do
from(s in q, where: ^step_condition)
end)

# Expand each (name, pathname, user_id) row into all prefix paths via
# ARRAY JOIN, then aggregate once to get both exact and wildcard visitor
# counts in a single scan of events_v2.
#
# The arrayFold expansion includes the original pathname as the last
# element, so uniqIf(user_id, original_pathname = prefix_pathname) gives the
# exact-match count for free, alongside the wildcard uniq(user_id) and the
# uniq(original_pathname) subpath count — all in one GROUP BY.
#
# Non-pageview events are included in the expansion but produce only a
# single prefix (their exact pathname), so they naturally get
# subpaths_count = 1 and are only emitted as exact rows.
q_per_user_matches =
from(m in q_matches,
select_merge: %{user_id: m.user_id, _sample_factor: fragment("any(?)", m._sample_factor)},
group_by: [selected_as(:name), selected_as(:pathname), m.user_id]
)

q_combined =
from(em in subquery(q_per_user_matches),
join: pname in fragment(@wildcard_array_join, em.name, em.pathname, em.pathname),
on: true,
hints: "ARRAY",
where: selected_as(:pathname) != "" and selected_as(:pathname) != "/",
select: %{
name: em.name,
pathname: selected_as(fragment("?", pname), :pathname),
exact_visitors:
scale_sample(fragment("uniqIf(?, ? = ?)", em.user_id, em.pathname, pname)),
wildcard_visitors:
selected_as(scale_sample(fragment("uniq(?)", em.user_id)), :wildcard_visitors),
subpaths_count: scale_sample(fragment("uniq(?)", em.pathname))
},
group_by: [em.name, selected_as(:pathname)]
)
q_combined = combined_query(q_per_user_matches, include_wildcard?)

# Fan out each q_combined row into up to two output rows (exact + wildcard)
# using ARRAY JOIN over a small boolean array.
Expand All @@ -264,11 +257,12 @@ defmodule Plausible.Stats.Exploration do
is_wildcard in fragment(
"""
arrayFilter(
x -> x = false OR (? = 'pageview' AND ? > 1 AND ? != ?),
x -> x = false OR (? = 'pageview' AND ? != '/' AND ? > 1 AND ? != ?),
[false, true]
)
""",
m.name,
m.pathname,
m.subpaths_count,
m.wildcard_visitors,
m.exact_visitors
Expand Down Expand Up @@ -315,6 +309,59 @@ defmodule Plausible.Stats.Exploration do
|> maybe_search(search_term)
end

# Expand each (name, pathname, user_id) row into all prefix paths via
# ARRAY JOIN, then aggregate once to get both exact and wildcard visitor
# counts in a single scan of events_v2.
#
# The arrayFold expansion includes the original pathname as the last
# element, so uniqIf(user_id, original_pathname = prefix_pathname) gives the
# exact-match count for free, alongside the wildcard uniq(user_id) and the
# uniq(original_pathname) subpath count — all in one GROUP BY.
#
# Non-pageview events are included in the expansion but produce only a
# single prefix (their exact pathname), so they naturally get
# subpaths_count = 1 and are only emitted as exact rows.
@wildcard_array_join """
if(? = 'pageview', arrayFold(
acc, x -> arrayPushBack(acc, concat(acc[-1], '/', x)),
arraySlice(splitByChar('/', ?) AS split_pathname, 2),
arraySlice(split_pathname, 1, 1)), [?])
"""

defp combined_query(q_matches, true = _include_wildcard?) do
from(em in subquery(q_matches),
join: pname in fragment(@wildcard_array_join, em.name, em.pathname, em.pathname),
on: true,
hints: "ARRAY",
where: selected_as(:pathname) != "",
select: %{
name: em.name,
pathname: selected_as(fragment("?", pname), :pathname),
exact_visitors:
scale_sample(fragment("uniqIf(?, ? = ?)", em.user_id, em.pathname, pname)),
wildcard_visitors:
selected_as(scale_sample(fragment("uniq(?)", em.user_id)), :wildcard_visitors),
subpaths_count: scale_sample(fragment("uniq(?)", em.pathname))
},
group_by: [em.name, selected_as(:pathname)]
)
end

defp combined_query(q_matches, false = _include_wildcard?) do
from(em in subquery(q_matches),
where: selected_as(:pathname) != "",
select: %{
name: em.name,
pathname: selected_as(em.pathname, :pathname),
exact_visitors:
selected_as(scale_sample(fragment("uniq(?)", em.user_id)), :exact_visitors),
wildcard_visitors: selected_as(:exact_visitors),
subpaths_count: 1
},
group_by: [em.name, selected_as(:pathname)]
)
end

defp journey_funnel_query(query, steps, direction) do
q_steps = steps_query(query, length(steps), direction)

Expand Down
10 changes: 8 additions & 2 deletions lib/plausible_web/controllers/api/stats_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ defmodule PlausibleWeb.Api.StatsController do
end

on_ee do
@exploration_wildcard_disabled_flag :exploration_wildcard_disabled

def exploration_next(conn, %{"journey" => steps} = params) do
site = conn.assigns.site
search_term = params["search_term"] || ""
Expand All @@ -147,7 +149,9 @@ defmodule PlausibleWeb.Api.StatsController do
{:ok, next_steps} <-
Plausible.Stats.Exploration.next_steps(query, journey,
search_term: search_term,
direction: direction
direction: direction,
include_wildcard?:
not FunWithFlags.enabled?(@exploration_wildcard_disabled_flag, for: site)
) do
json(conn, next_steps)
else
Expand Down Expand Up @@ -180,7 +184,9 @@ defmodule PlausibleWeb.Api.StatsController do

case Plausible.Stats.Exploration.interesting_funnel(query,
max_steps: params["max_steps"],
max_candidates: params["max_candidates"]
max_candidates: params["max_candidates"],
include_wildcard?:
not FunWithFlags.enabled?(@exploration_wildcard_disabled_flag, for: site)
) do
{:ok, funnel} -> json(conn, funnel)
{:error, :not_found} -> json(conn, [])
Expand Down
Loading
Loading