Skip to content

Commit a2bbaf3

Browse files
committed
Add redirect management, authors, link graph, and measurement (P1.2, P2.1-P2.4)
P1.2: Automatic redirect management - Beacon.Content.Redirect schema with changeset validations - Beacon.Content.RedirectCache (ETS-backed, constant-time lookups) - Beacon.Plug.Redirect in proxy endpoint pipeline (before routing) - CRUD: create_redirect, update_redirect, delete_redirect, list_redirects - Auto-create 301 on page path change at publish time - Chain flattening: A→B + B→C auto-becomes A→C - Circular redirect detection and rejection - Async hit count tracking - Redirects loaded into ETS during site boot P2.1: FAQ schema (continued) - faq_page_schema/1 in Beacon.SEO.JsonLd - Integrated into build/3 — generates FAQPage JSON-LD when faq_items present - Quality gate: only items with non-empty question AND answer P2.2: Author management - Beacon.Content.Author schema (name, slug, bio, job_title, avatar_url, credentials, same_as) - Author CRUD in Content API - person_schema/2 in Beacon.SEO.JsonLd for Person structured data - author_id on Page schema and snapshots P2.3: Internal link graph analysis - Beacon.Content.InternalLink schema - Beacon.SEO.LinkExtractor — parses rendered HTML with Floki - rebuild_links_for_page/3 — extract and store links at publish time - list_orphan_pages/1 — pages with zero inbound links - list_broken_links/1 — links to non-existent paths P2.4: Measurement MVP - Beacon.Content.SEOSnapshot schema - Beacon.SEO.Metrics — computes site-wide SEO health metrics - take_seo_snapshot/1 — upsert daily metrics snapshot - list_seo_snapshots/2 — query snapshots with date range Supporting: - Beacon.Content.ContentDiff — word-level template comparison for substantive edit detection (supports freshness workflow)
1 parent e721fac commit a2bbaf3

15 files changed

Lines changed: 1247 additions & 0 deletions

ai-research.md

Lines changed: 421 additions & 0 deletions
Large diffs are not rendered by default.

lib/beacon/content.ex

Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ defmodule Beacon.Content do
4343
alias Beacon.Content.LayoutEvent
4444
alias Beacon.Content.LayoutSnapshot
4545
alias Beacon.Content.Page
46+
alias Beacon.Content.Author
47+
alias Beacon.Content.InternalLink
4648
alias Beacon.Content.PageEvent
49+
alias Beacon.Content.Redirect
50+
alias Beacon.Content.SEOSnapshot
4751
alias Beacon.Content.PageField
4852
alias Beacon.Content.PageQuery
4953
alias Beacon.Content.PageSnapshot
@@ -4482,6 +4486,9 @@ defmodule Beacon.Content do
44824486
%{site: site} = page
44834487
changeset = Page.update_changeset(page, %{})
44844488

4489+
# Auto-create redirect if page path changed since last publish
4490+
maybe_create_path_redirect(page)
4491+
44854492
transact(repo(site), fn ->
44864493
with {:ok, _changeset} <- validate_page_template(changeset),
44874494
{:ok, event} <- create_page_event(page, "published"),
@@ -4838,4 +4845,343 @@ defmodule Beacon.Content do
48384845
{:noreply, config}
48394846
end
48404847

4848+
# ---------------------------------------------------------------------------
4849+
# Authors
4850+
# ---------------------------------------------------------------------------
4851+
4852+
@doc "Creates an author."
4853+
@doc type: :authors
4854+
@spec create_author(map()) :: {:ok, Author.t()} | {:error, Ecto.Changeset.t()}
4855+
def create_author(attrs) do
4856+
attrs = attrs |> Beacon.Types.Attrs.ensure_string_keys()
4857+
site = Beacon.Types.Attrs.get_site(attrs)
4858+
%Author{} |> Author.changeset(attrs) |> repo(site).insert()
4859+
end
4860+
4861+
@doc "Updates an author."
4862+
@doc type: :authors
4863+
@spec update_author(Author.t(), map()) :: {:ok, Author.t()} | {:error, Ecto.Changeset.t()}
4864+
def update_author(%Author{} = author, attrs) do
4865+
author |> Author.changeset(attrs) |> repo(author).update()
4866+
end
4867+
4868+
@doc "Deletes an author."
4869+
@doc type: :authors
4870+
@spec delete_author(Author.t()) :: {:ok, Author.t()} | {:error, Ecto.Changeset.t()}
4871+
def delete_author(%Author{} = author) do
4872+
repo(author).delete(author)
4873+
end
4874+
4875+
@doc "Lists all authors for a site."
4876+
@doc type: :authors
4877+
@spec list_authors(Site.t(), keyword()) :: [Author.t()]
4878+
def list_authors(site, opts \\ []) when is_atom(site) do
4879+
per_page = Keyword.get(opts, :per_page, 100)
4880+
query = from(a in Author, where: a.site == ^site, order_by: [asc: a.name])
4881+
query = if per_page == :infinity, do: query, else: limit(query, ^per_page)
4882+
repo(site).all(query)
4883+
end
4884+
4885+
@doc "Gets an author by ID."
4886+
@doc type: :authors
4887+
@spec get_author(Site.t(), String.t()) :: Author.t() | nil
4888+
def get_author(site, id) when is_atom(site) do
4889+
repo(site).get_by(Author, site: site, id: id)
4890+
end
4891+
4892+
@doc "Returns a changeset for tracking author changes."
4893+
@doc type: :authors
4894+
def change_author(%Author{} = author, attrs \\ %{}) do
4895+
Author.changeset(author, attrs)
4896+
end
4897+
4898+
# ---------------------------------------------------------------------------
4899+
# Internal Link Graph
4900+
# ---------------------------------------------------------------------------
4901+
4902+
@doc "Rebuilds the internal link graph for a page from rendered HTML."
4903+
@doc type: :links
4904+
@spec rebuild_links_for_page(Site.t(), String.t(), String.t()) :: :ok
4905+
def rebuild_links_for_page(site, page_id, html) when is_atom(site) do
4906+
links = Beacon.SEO.LinkExtractor.extract(html)
4907+
4908+
# Delete existing links for this page
4909+
from(l in InternalLink, where: l.site == ^site and l.source_page_id == ^page_id)
4910+
|> repo(site).delete_all()
4911+
4912+
# Build a path→page_id lookup for resolving target_page_id
4913+
published_pages = list_published_pages(site, per_page: :infinity)
4914+
path_to_id = Map.new(published_pages, fn p -> {p.path, p.id} end)
4915+
4916+
# Insert new links
4917+
now = DateTime.utc_now() |> DateTime.truncate(:microsecond)
4918+
4919+
entries =
4920+
Enum.map(links, fn link ->
4921+
%{
4922+
id: Ecto.UUID.generate(),
4923+
site: Atom.to_string(site),
4924+
source_page_id: page_id,
4925+
target_page_id: Map.get(path_to_id, link.target_path),
4926+
target_path: link.target_path,
4927+
anchor_text: String.slice(link.anchor_text || "", 0..254),
4928+
inserted_at: now
4929+
}
4930+
end)
4931+
4932+
if entries != [] do
4933+
repo(site).insert_all("beacon_internal_links", entries, on_conflict: :nothing)
4934+
end
4935+
4936+
:ok
4937+
end
4938+
4939+
@doc "Lists published pages with zero inbound internal links (orphans)."
4940+
@doc type: :links
4941+
@spec list_orphan_pages(Site.t()) :: [Page.t()]
4942+
def list_orphan_pages(site) when is_atom(site) do
4943+
# Get all page IDs that are targets of at least one internal link
4944+
linked_ids =
4945+
from(l in InternalLink,
4946+
where: l.site == ^site and not is_nil(l.target_page_id),
4947+
select: l.target_page_id,
4948+
distinct: true
4949+
)
4950+
4951+
from(p in Page,
4952+
where: p.site == ^site and p.id not in subquery(linked_ids),
4953+
order_by: [asc: p.path]
4954+
)
4955+
|> repo(site).all()
4956+
end
4957+
4958+
@doc "Lists internal links where the target path doesn't match any published page."
4959+
@doc type: :links
4960+
@spec list_broken_links(Site.t()) :: [InternalLink.t()]
4961+
def list_broken_links(site) when is_atom(site) do
4962+
from(l in InternalLink,
4963+
where: l.site == ^site and is_nil(l.target_page_id),
4964+
order_by: [asc: l.target_path]
4965+
)
4966+
|> repo(site).all()
4967+
end
4968+
4969+
# ---------------------------------------------------------------------------
4970+
# SEO Measurement
4971+
# ---------------------------------------------------------------------------
4972+
4973+
@doc "Takes a snapshot of current SEO metrics for a site."
4974+
@doc type: :seo
4975+
@spec take_seo_snapshot(Site.t()) :: {:ok, SEOSnapshot.t()} | {:error, Ecto.Changeset.t()}
4976+
def take_seo_snapshot(site) when is_atom(site) do
4977+
metrics = Beacon.SEO.Metrics.compute(site)
4978+
today = Date.utc_today()
4979+
4980+
attrs = %{
4981+
"site" => site,
4982+
"snapshot_date" => today,
4983+
"metrics" => metrics
4984+
}
4985+
4986+
# Upsert: update if exists for today, insert if not
4987+
case repo(site).get_by(SEOSnapshot, site: site, snapshot_date: today) do
4988+
nil ->
4989+
%SEOSnapshot{} |> SEOSnapshot.changeset(attrs) |> repo(site).insert()
4990+
4991+
existing ->
4992+
existing |> SEOSnapshot.changeset(attrs) |> repo(site).update()
4993+
end
4994+
end
4995+
4996+
@doc "Lists SEO snapshots for a site within a date range."
4997+
@doc type: :seo
4998+
@spec list_seo_snapshots(Site.t(), keyword()) :: [SEOSnapshot.t()]
4999+
def list_seo_snapshots(site, opts \\ []) when is_atom(site) do
5000+
days = Keyword.get(opts, :days, 30)
5001+
since = Date.utc_today() |> Date.add(-days)
5002+
5003+
from(s in SEOSnapshot,
5004+
where: s.site == ^site and s.snapshot_date >= ^since,
5005+
order_by: [desc: s.snapshot_date]
5006+
)
5007+
|> repo(site).all()
5008+
end
5009+
5010+
# ---------------------------------------------------------------------------
5011+
# Redirects
5012+
# ---------------------------------------------------------------------------
5013+
5014+
@doc """
5015+
Creates a redirect, automatically flattening chains and rejecting circulars.
5016+
"""
5017+
@doc type: :redirects
5018+
@spec create_redirect(map()) :: {:ok, Redirect.t()} | {:error, Ecto.Changeset.t()}
5019+
def create_redirect(attrs) do
5020+
attrs = attrs |> Beacon.Types.Attrs.ensure_string_keys()
5021+
site = Beacon.Types.Attrs.get_site(attrs)
5022+
5023+
# Flatten chains: if destination is another redirect's source, point to final destination
5024+
attrs = flatten_redirect_chain(site, attrs)
5025+
5026+
changeset = Redirect.changeset(%Redirect{}, attrs)
5027+
5028+
# Check for circular redirect through the chain
5029+
if circular_redirect?(site, attrs["source_path"], attrs["destination_path"]) do
5030+
{:error, Ecto.Changeset.add_error(changeset, :destination_path, "creates a circular redirect")}
5031+
else
5032+
case repo(site).insert(changeset) do
5033+
{:ok, redirect} ->
5034+
# Update any existing redirects that point to this source (flatten them)
5035+
update_existing_redirect_chains(site, redirect)
5036+
Beacon.Content.RedirectCache.put(redirect)
5037+
{:ok, redirect}
5038+
5039+
error ->
5040+
error
5041+
end
5042+
end
5043+
end
5044+
5045+
@doc "Updates an existing redirect."
5046+
@doc type: :redirects
5047+
@spec update_redirect(Redirect.t(), map()) :: {:ok, Redirect.t()} | {:error, Ecto.Changeset.t()}
5048+
def update_redirect(%Redirect{} = redirect, attrs) do
5049+
case redirect |> Redirect.changeset(attrs) |> repo(redirect).update() do
5050+
{:ok, updated} ->
5051+
Beacon.Content.RedirectCache.invalidate(updated.site)
5052+
{:ok, updated}
5053+
5054+
error ->
5055+
error
5056+
end
5057+
end
5058+
5059+
@doc "Deletes a redirect."
5060+
@doc type: :redirects
5061+
@spec delete_redirect(Redirect.t()) :: {:ok, Redirect.t()} | {:error, Ecto.Changeset.t()}
5062+
def delete_redirect(%Redirect{} = redirect) do
5063+
case repo(redirect).delete(redirect) do
5064+
{:ok, deleted} ->
5065+
Beacon.Content.RedirectCache.delete(deleted.site, deleted.source_path)
5066+
{:ok, deleted}
5067+
5068+
error ->
5069+
error
5070+
end
5071+
end
5072+
5073+
@doc "Lists all redirects for a site."
5074+
@doc type: :redirects
5075+
@spec list_redirects(Site.t(), keyword()) :: [Redirect.t()]
5076+
def list_redirects(site, opts \\ []) when is_atom(site) do
5077+
per_page = Keyword.get(opts, :per_page, 20)
5078+
page = Keyword.get(opts, :page, 1)
5079+
search = Keyword.get(opts, :search)
5080+
5081+
query = from(r in Redirect, where: r.site == ^site, order_by: [desc: r.inserted_at])
5082+
5083+
query = if per_page == :infinity, do: query, else: query |> limit(^per_page) |> offset(^((page - 1) * per_page))
5084+
query = if search, do: where(query, [r], ilike(r.source_path, ^"%#{search}%") or ilike(r.destination_path, ^"%#{search}%")), else: query
5085+
5086+
repo(site).all(query)
5087+
end
5088+
5089+
@doc "Gets a single redirect by ID."
5090+
@doc type: :redirects
5091+
@spec get_redirect(Site.t(), String.t()) :: Redirect.t() | nil
5092+
def get_redirect(site, id) when is_atom(site) do
5093+
repo(site).get_by(Redirect, site: site, id: id)
5094+
end
5095+
5096+
@doc "Gets a redirect by source path."
5097+
@doc type: :redirects
5098+
@spec get_redirect_by_source(Site.t(), String.t()) :: Redirect.t() | nil
5099+
def get_redirect_by_source(site, source_path) when is_atom(site) do
5100+
repo(site).get_by(Redirect, site: site, source_path: source_path)
5101+
end
5102+
5103+
@doc false
5104+
def increment_redirect_hit(site, source_path) do
5105+
from(r in Redirect,
5106+
where: r.site == ^site and r.source_path == ^source_path
5107+
)
5108+
|> repo(site).update_all(set: [hit_count: dynamic([r], r.hit_count + 1), last_hit_at: DateTime.utc_now()])
5109+
end
5110+
5111+
@doc false
5112+
def change_redirect(%Redirect{} = redirect, attrs \\ %{}) do
5113+
Redirect.changeset(redirect, attrs)
5114+
end
5115+
5116+
# Auto-create redirect when a page's path changes at publish time
5117+
@doc false
5118+
def maybe_create_path_redirect(page) do
5119+
old_path = get_last_published_path(page.site, page.id)
5120+
5121+
case old_path do
5122+
nil -> :ok
5123+
^old_path when old_path == page.path -> :ok
5124+
old_path ->
5125+
create_redirect(%{
5126+
"site" => page.site,
5127+
"source_path" => old_path,
5128+
"destination_path" => page.path,
5129+
"status_code" => 301
5130+
})
5131+
end
5132+
end
5133+
5134+
defp get_last_published_path(site, page_id) do
5135+
from(s in PageSnapshot,
5136+
where: s.site == ^site and s.page_id == ^page_id,
5137+
order_by: [desc: s.inserted_at],
5138+
limit: 1,
5139+
select: s.path
5140+
)
5141+
|> repo(site).one()
5142+
end
5143+
5144+
defp flatten_redirect_chain(site, attrs) do
5145+
dest = attrs["destination_path"]
5146+
5147+
case get_redirect_by_source(site, dest) do
5148+
%Redirect{destination_path: final_dest} ->
5149+
Map.put(attrs, "destination_path", final_dest)
5150+
5151+
nil ->
5152+
attrs
5153+
end
5154+
end
5155+
5156+
defp circular_redirect?(site, source, destination) do
5157+
visited = MapSet.new([source])
5158+
check_circular(site, destination, visited)
5159+
end
5160+
5161+
defp check_circular(_site, nil, _visited), do: false
5162+
5163+
defp check_circular(site, path, visited) do
5164+
if MapSet.member?(visited, path) do
5165+
true
5166+
else
5167+
case get_redirect_by_source(site, path) do
5168+
%Redirect{destination_path: next} ->
5169+
check_circular(site, next, MapSet.put(visited, path))
5170+
5171+
nil ->
5172+
false
5173+
end
5174+
end
5175+
end
5176+
5177+
defp update_existing_redirect_chains(site, new_redirect) do
5178+
# Find redirects whose destination is the new redirect's source and update them
5179+
from(r in Redirect,
5180+
where: r.site == ^site and r.destination_path == ^new_redirect.source_path and r.id != ^new_redirect.id
5181+
)
5182+
|> repo(site).update_all(set: [destination_path: new_redirect.destination_path])
5183+
5184+
Beacon.Content.RedirectCache.invalidate(site)
5185+
end
5186+
48415187
end

0 commit comments

Comments
 (0)