Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/plausible/imported/google_analytics4.ex
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
defmodule Plausible.Imported.GoogleAnalytics4 do
@moduledoc """
Import implementation for Google Analytics 4.
Expand Down Expand Up @@ -352,6 +352,6 @@
defp parse_source("duckduckgo"), do: "DuckDuckGo"

defp parse_source(ref) do
Plausible.Ingestion.Source.parse("https://" <> ref)
Plausible.Ingestion.Source.from_referrer("https://" <> ref)
end
end
24 changes: 20 additions & 4 deletions lib/plausible/ingestion/acquisition.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ defmodule Plausible.Ingestion.Acquisition do

Notable differences from GA4 that have been implemented just for Plausible:
1. The @custom_source_categories module attribute contains a list of custom source categories that we have manually
added based on our own judgement and user feedback. For example we treat AI tools (ChatGPT, Perplexity) as search engines.
added based on our own judgement and user feedback. For example we group AI assistants (ChatGPT, Claude, Gemini, etc.)
into their own "AI Assistants" channel.
2. Google is in a privileged position to analyze paid traffic from within their own network. The biggest use-case is auto-tagged adwords campaigns.
We do our best by categorizing as paid search when source is Google and the url has `gclid` parameter. Same for source Bing and `msclkid` url parameter.
3. The @paid_sources module attribute in Plausible.Ingestion.Source contains a list of utm_sources that we will automatically categorize as paid traffic
Expand All @@ -30,13 +31,23 @@ defmodule Plausible.Ingestion.Acquisition do
{"vkontakte", "SOURCE_CATEGORY_SOCIAL"},
{"threads", "SOURCE_CATEGORY_SOCIAL"},
{"ecosia", "SOURCE_CATEGORY_SEARCH"},
{"perplexity", "SOURCE_CATEGORY_SEARCH"},
{"kagi", "SOURCE_CATEGORY_SEARCH"},
{"brave", "SOURCE_CATEGORY_SEARCH"},
{"chatgpt.com", "SOURCE_CATEGORY_SEARCH"},
{"temu.com", "SOURCE_CATEGORY_SHOPPING"},
{"discord", "SOURCE_CATEGORY_SOCIAL"},
{"sogou", "SOURCE_CATEGORY_SEARCH"},
{"microsoft teams", "SOURCE_CATEGORY_SOCIAL"}
{"microsoft teams", "SOURCE_CATEGORY_SOCIAL"},
{"bluesky", "SOURCE_CATEGORY_SOCIAL"},
{"mastodon", "SOURCE_CATEGORY_SOCIAL"},
{"chatgpt", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"claude", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"phind", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"deepseek", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"microsoft copilot", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"grok", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"google gemini", "SOURCE_CATEGORY_AI_ASSISTANTS"},
{"perplexity", "SOURCE_CATEGORY_AI_ASSISTANTS"},

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not strictly related to this PR but: To reduce a risk of a missed typo causing headaches, perhaps we could consider putting category string labels in module attributes?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're used in sql eex 😅 - how would you make them available there for compile-time validation?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They could be put under module attribute as a map of %{social: "SOURCE_CATEGORY_SOCIAL", ...} and then exposed via a function, which should be usable during compile time with no issues I think?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, since this is currently mostly convenience, I'll follow-up in a separate change

{"x (twitter)", "SOURCE_CATEGORY_SOCIAL"}
]
@source_categories Application.app_dir(:plausible, "priv/ga4-source-categories.csv")
|> File.read!()
Expand Down Expand Up @@ -69,6 +80,7 @@ defmodule Plausible.Ingestion.Acquisition do
organic_shopping?(source, utm_campaign) -> "Organic Shopping"
organic_social?(source, utm_medium) -> "Organic Social"
organic_video?(source, utm_medium) -> "Organic Video"
ai_assistants?(source) -> "AI Assistants"
search_source?(source) -> "Organic Search"
email?(source, utm_source, utm_medium) -> "Email"
affiliates?(utm_medium) -> "Affiliates"
Expand Down Expand Up @@ -188,6 +200,10 @@ defmodule Plausible.Ingestion.Acquisition do
@source_categories[source] == "SOURCE_CATEGORY_EMAIL"
end

defp ai_assistants?(source) do
@source_categories[source] == "SOURCE_CATEGORY_AI_ASSISTANTS"
end

defp shopping_campaign?(utm_campaign) do
Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, utm_campaign)
end
Expand Down
85 changes: 56 additions & 29 deletions lib/plausible/ingestion/source.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ defmodule Plausible.Ingestion.Source do
|> then(&["adwords" | &1])
|> MapSet.new()

@custom_source_suffixes %{
"officeapps.live.com" => "Microsoft 365",
"wikipedia.org" => "Wikipedia"
}

@external_resource "priv/ref_inspector/referers.yml"
@referers_yaml Application.app_dir(:plausible, "priv/ref_inspector/referers.yml")

Expand All @@ -36,11 +41,12 @@ defmodule Plausible.Ingestion.Source do
|> Map.put(String.downcase(val), val)
end)

@spec from_custom_sources(String.t()) :: String.t() | nil
for {k, v} <- Enum.sort(lookup) do
def src(unquote(k)), do: unquote(v)
def from_custom_sources(unquote(k)), do: unquote(v)
end

def src(_), do: nil
def from_custom_sources(_), do: nil

def paid_sources() do
@paid_sources |> MapSet.to_list()
Expand All @@ -64,39 +70,60 @@ defmodule Plausible.Ingestion.Source do
perfectly, but at least we're making an effort for the most commonly used ones. For example, `ig -> Instagram` and `adwords -> Google`.
"""
def resolve(request) do
tagged_source =
request.query_params["utm_source"] ||
request.query_params["source"] ||
request.query_params["ref"]

source =
cond do
tagged_source -> tagged_source
has_valid_referral?(request) -> parse(request.referrer)
true -> nil
end

find_mapping(source)
cond do
tagged = tagged_param(request) -> canonical(tagged)
has_valid_referral?(request) -> from_referrer(request.referrer)
true -> nil
end
end

def parse(ref) do
case RefInspector.parse(ref).source do
:unknown ->
uri = URI.parse(String.trim(ref))
format_referrer_host(uri)

source ->
source
@doc """
Resolves a source from a bare referrer URL. This is also the entry point used
when importing referrers from Google Analytics, so that imported data
is normalized identically to live ingestion.
"""
def from_referrer(referrer) do
host =
referrer
|> String.trim()
|> URI.parse()
|> format_referrer_host()

downcased_host = String.downcase(host)

# Prefer custom source overrides over RefInspector so subdomain matches win
# (e.g. gemini.google.com resolves to Google Gemini, not Google). Exact host
# matches take priority, then suffix families, then RefInspector.
cond do
name = from_custom_sources(downcased_host) ->
name

name = from_custom_source_suffix(downcased_host) ->
name

true ->
case RefInspector.parse(referrer).source do
:unknown -> host
# Normalize RefInspector names through our aliases (e.g. Twitter to X (Twitter)).
name -> canonical(name)
end
end
end

def find_mapping(nil), do: nil
defp from_custom_source_suffix(host) do
Enum.find_value(@custom_source_suffixes, fn {suffix, name} ->
if host == suffix or String.ends_with?(host, "." <> suffix), do: name
end)
end

def find_mapping(source) do
case src(String.downcase(source)) do
name when is_binary(name) -> name
_ -> source
end
defp tagged_param(request) do
request.query_params["utm_source"] ||
request.query_params["source"] ||
request.query_params["ref"]
end

defp canonical(source) do
from_custom_sources(String.downcase(source)) || source
end

def format_referrer(request) do
Expand Down
15 changes: 14 additions & 1 deletion lib/plausible_web/plugs/favicon.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,25 @@ defmodule PlausibleWeb.Favicon do
@external_resource @placeholder_icon_location
@custom_icons %{
"Brave" => "search.brave.com",
"Kagi" => "kagi.com",
"Sogou" => "sogou.com",
"Wikipedia" => "en.wikipedia.org",
"Discord" => "discord.com",
"Perplexity" => "perplexity.ai",
"Microsoft Teams" => "microsoft.com",
"LinkedIn" => "linkedin.com"
"LinkedIn" => "linkedin.com",
"Linktree" => "linktr.ee",
"Bluesky" => "bsky.app",
"Mastodon" => "mastodon.social",
"Google Gemini" => "gemini.google.com",
"ChatGPT" => "chatgpt.com",
"Claude" => "claude.ai",
"Phind" => "phind.com",
"DeepSeek" => "deepseek.com",
"Microsoft Copilot" => "copilot.microsoft.com",
"Grok" => "grok.com",
"X (Twitter)" => "x.com",
"Microsoft 365" => "office.com"
}

def init(_) do
Expand Down
Loading
Loading