Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 91 additions & 1 deletion src/project/types/website/website-llms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { basename, join, relative } from "../../../deno_ral/path.ts";
import { existsSync } from "../../../deno_ral/fs.ts";
import { pathWithForwardSlashes } from "../../../core/path.ts";

import { Document, Element } from "../../../core/deno-dom.ts";
import { Document, Element, Node } from "../../../core/deno-dom.ts";
import { execProcess } from "../../../core/process.ts";
import { pandocBinaryPath, resourcePath } from "../../../core/resources.ts";

Expand Down Expand Up @@ -83,9 +83,36 @@ export function llmsHtmlFinalizer(

// Convert HTML to markdown using Pandoc with the llms.lua filter
await convertHtmlToLlmsMarkdown(htmlContent, llmsOutputPath);

// Clean up conditional content markers from the original HTML doc
cleanupConditionalContent(doc);
};
}

/**
* Clean up conditional content markers from the HTML document.
* - Remove llms-only content (should not appear in HTML output)
* - Unwrap llms-hidden markers (keep content, remove wrapper div)
*/
function cleanupConditionalContent(doc: Document): void {
// Remove llms-only content from HTML output
for (const el of doc.querySelectorAll(".llms-conditional-content")) {
(el as Element).remove();
}

// Unwrap llms-hidden markers (keep content, remove wrapper div)
for (const el of doc.querySelectorAll(".llms-hidden-content")) {
const parent = (el as Element).parentElement;
if (parent) {
const element = el as Element;
while (element.firstChild) {
parent.insertBefore(element.firstChild as Node, element as Node);
}
element.remove();
}
}
}

/**
* Extract the main content from an HTML document, removing navigation,
* sidebars, footers, scripts, and styles.
Expand All @@ -104,6 +131,7 @@ function extractMainContent(doc: Document): string {
".sidebar",
".quarto-search",
"nav.navbar",
".quarto-page-breadcrumbs",
"script",
"style",
"link[rel='stylesheet']",
Expand All @@ -128,6 +156,9 @@ function extractMainContent(doc: Document): string {
return "";
}

// Preprocess annotated code blocks before converting to markdown
preprocessAnnotatedCodeBlocks(clone, main as Element);

// Return a minimal HTML document with just the content
return `<!DOCTYPE html>
<html>
Expand All @@ -138,6 +169,65 @@ ${main.innerHTML}
</html>`;
}

/**
* Preprocess annotated code blocks for llms output.
* Restores original code text (with annotation markers) and converts
* the annotation definition list to an ordered list.
*/
function preprocessAnnotatedCodeBlocks(doc: Document, container: Element): void {
// Restore original code text in annotated code blocks.
// The llms-code-annotations.lua filter saves the original text
// (before code-annotation.lua strips markers) as a data attribute.
const annotated = container.querySelectorAll("[data-llms-code-original]");
for (const node of annotated) {
const el = node as Element;
const originalText = el.getAttribute("data-llms-code-original");
if (!originalText) continue;

// The attribute is on the wrapper div; find the <code> element inside
const codeEl = el.tagName === "CODE"
? el
: el.querySelector("code") as Element | null;
if (codeEl) {
// Replace content with original (removes syntax highlighting spans + annotation buttons)
codeEl.textContent = originalText;
}

el.removeAttribute("data-llms-code-original");
}

// Remove annotation gutter elements
const gutters = container.querySelectorAll(
".code-annotation-gutter, .code-annotation-gutter-bg",
);
for (const gutter of gutters) {
(gutter as Element).remove();
}

// Convert annotation definition lists to ordered lists.
// The annotation text is in <dd> elements; <dt> elements have just the number.
const dls = container.querySelectorAll("dl.code-annotation-container-grid");
for (const dlNode of dls) {
const dl = dlNode as Element;
const ol = doc.createElement("ol");
const dds = dl.querySelectorAll("dd");
for (const ddNode of dds) {
const dd = ddNode as Element;
const li = doc.createElement("li");
li.innerHTML = dd.innerHTML;
ol.appendChild(li);
}

// Replace the DL (and its cell-annotation wrapper div if present)
const parent = dl.parentElement;
if (parent && parent.classList.contains("cell-annotation")) {
parent.parentElement?.replaceChild(ol, parent);
} else {
dl.parentElement?.replaceChild(ol, dl);
}
}
}

/**
* Convert HTML content to markdown using Pandoc with the llms.lua filter.
*/
Expand Down
3 changes: 3 additions & 0 deletions src/project/types/website/website.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { projectOffset, projectOutputDir } from "../../project-shared.ts";
import { isHtmlFileOutput } from "../../../config/format.ts";

import {
kFilterParams,
kIncludeInHeader,
kPageTitle,
kTitle,
Expand Down Expand Up @@ -358,6 +359,8 @@ export const websiteProjectType: ProjectType = {

// Add llms.txt finalizer if enabled
if (websiteConfigBoolean(kLlmsTxt, false, project.config)) {
extras[kFilterParams] = extras[kFilterParams] || {};
extras[kFilterParams]["llms-txt"] = true;
extras.html[kHtmlFinalizers]?.push(
llmsHtmlFinalizer(source, project, format),
);
Expand Down
58 changes: 57 additions & 1 deletion src/resources/filters/llms/llms.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ local skippable_classes = {
["quarto-float"] = true,
["quarto-float-fig"] = true,
["figure"] = true,
["llms-conditional-content"] = true,
}
local droppable_classes = {
["navbar-container"] = true,
Expand All @@ -25,6 +26,8 @@ local droppable_classes = {
["listing-categories"] = true,
["quarto-listing-category"] = true, -- category filter sidebar
["listing-category"] = true, -- individual category badges
["quarto-page-breadcrumbs"] = true, -- breadcrumb navigation
["llms-hidden-content"] = true,
}
local droppable_ids = {
["quarto-header"] = true,
Expand Down Expand Up @@ -70,6 +73,53 @@ local function clean_element(el)
end
end

local function handle_tabset(div)
local titles = {}
local panes = {}

-- Extract tab titles from the nav BulletList (first one in the div)
-- and tab pane contents from the tab-content Div
for _, block in ipairs(div.content) do
if block.t == "BulletList" and #titles == 0 then
for _, item in ipairs(block.content) do
for _, inner_block in ipairs(item) do
if inner_block.t == "Plain" or inner_block.t == "Para" then
for _, inline in ipairs(inner_block.content) do
if inline.t == "Link" then
table.insert(titles, inline.content)
break
end
end
end
end
end
elseif block.t == "Div" and block.classes:includes("tab-content") then
for _, inner in ipairs(block.content) do
if inner.t == "Div" and inner.classes:includes("tab-pane") then
table.insert(panes, inner.content)
end
end
end
end

-- Build output: heading + content for each tab
local result = pandoc.Blocks({})
for i = 1, math.max(#titles, #panes) do
if titles[i] then
result:insert(pandoc.Header(2, titles[i]))
end
if panes[i] then
result:extend(panes[i])
end
end

if #result > 0 then
return result
end
-- Fallback: return content as-is
return div.content
end

local function handle_callout(div)
local kind = "NOTE" -- NOTE, TIP, IMPORTANT, WARNING, CAUTION
div.classes:map(function(cls)
Expand Down Expand Up @@ -140,8 +190,10 @@ function Link(link)
return link.content
end

if link.target and link.target:match("%.html$") then
if link.target and (link.target:match("%.html$") or link.target:match("%.html#")) then
link.target = link.target:gsub("%.html#", ".llms.md#")
link.target = link.target:gsub("%.html$", ".llms.md")
link.target = link.target:gsub("^%./", "")
if link.classes:includes("btn") then
link.attr = pandoc.Attr()
end
Expand Down Expand Up @@ -174,6 +226,10 @@ end

function Div(div)

if div.classes:includes("panel-tabset") then
return handle_tabset(div)
end

if div.classes:includes("callout") then
return handle_callout(div)
end
Expand Down
20 changes: 20 additions & 0 deletions src/resources/filters/main.lua
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ import("./quarto-pre/bibliography-formats.lua")
import("./quarto-pre/book-links.lua")
import("./quarto-pre/book-numbering.lua")
import("./quarto-pre/code-annotation.lua")
import("./quarto-pre/llms-code-annotations.lua")
import("./quarto-pre/llms-conditional-content.lua")
import("./quarto-pre/code-filename.lua")
import("./quarto-pre/contentsshortcode.lua")
import("./quarto-pre/engine-escape.lua")
Expand Down Expand Up @@ -321,6 +323,15 @@ local quarto_pre_filters = {
traverser = 'jog',
},

{ name = "pre-llms-conditional-content",
filter = filterIf(
function() return param("llms-txt", false) end,
llms_resolve_conditional_content()
),
flags = { "has_conditional_content" },
traverser = 'jog',
},

{ name = "pre-combined-hidden",
filter = combineFilters({
hidden(),
Expand All @@ -336,6 +347,15 @@ local quarto_pre_filters = {
traverser = 'jog',
},

{ name = "pre-llms-save-code-annotations",
filter = filterIf(
function() return param("llms-txt", false) end,
llms_save_code_annotations()
),
flags = { "has_code_annotations" },
traverser = 'jog',
},

{ name = "pre-code-annotations",
filter = code_annotations(),
flags = { "has_code_annotations" },
Expand Down
16 changes: 16 additions & 0 deletions src/resources/filters/quarto-pre/llms-code-annotations.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- llms-code-annotations.lua
-- Copyright (C) 2020-2026 Posit Software, PBC
--
-- Saves original CodeBlock text before code-annotation.lua strips markers.
-- Only runs when llms-txt is enabled (guarded by filterIf in main.lua).

function llms_save_code_annotations()
return {
CodeBlock = function(el)
if el.text:match("<%d+>") then
el.attributes["data-llms-code-original"] = el.text
end
return el
end
}
end
56 changes: 56 additions & 0 deletions src/resources/filters/quarto-pre/llms-conditional-content.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
-- llms-conditional-content.lua
-- Copyright (C) 2020-2026 Posit Software, PBC
--
-- Pre-filter that intercepts ConditionalBlock nodes referencing llms-txt
-- and replaces them with marker Divs so content can be included/excluded
-- from llms.md output independently of the HTML format.
-- Only runs when llms-txt is enabled (guarded by filterIf in main.lua).

function llms_resolve_conditional_content()
-- Determine if a ConditionalBlock should be visible for llms-txt output.
-- Returns true (include), false (exclude), or nil (no llms-txt condition).
local function is_llms_visible(tbl)
local constants = require("modules/constants")
local function list_contains(list, value)
if not list then return false end
for _, v in ipairs(list) do
if v == value then return true end
end
return false
end

local cond = tbl.condition
local has_when = list_contains(cond[constants.kWhenFormat], "llms-txt")
local has_unless = list_contains(cond[constants.kUnlessFormat], "llms-txt")

if not has_when and not has_unless then return nil end

if tbl.behavior == constants.kContentVisible then
-- content-visible when-format="llms-txt" -> include for llms
-- content-visible unless-format="llms-txt" -> exclude for llms
return has_when
else -- content-hidden
-- content-hidden when-format="llms-txt" -> exclude for llms
-- content-hidden unless-format="llms-txt" -> include for llms
return has_unless
end
end

return {
ConditionalBlock = function(tbl)
local llms_visible = is_llms_visible(tbl)
if llms_visible == nil then return nil end

local html_visible = is_visible(tbl) -- from content-hidden.lua
if llms_visible == html_visible then return nil end -- no intervention needed

local div = tbl.original_node:clone()
if llms_visible then
div.classes:insert("llms-conditional-content")
else
div.classes:insert("llms-hidden-content")
end
return div
end
}
end
5 changes: 5 additions & 0 deletions tests/docs/smoke-all/website/llms-txt/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ website:
- href: index.qmd
text: Home
- about.qmd
sidebar:
contents:
- section: Info
contents:
- about.qmd

format:
html:
Expand Down
Loading
Loading