From 83c76d06528e931c1bb3e6eb620f089021ad8a90 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 12:57:00 -0700 Subject: [PATCH 01/10] define search record contract and framework handoff helper --- src/utils/searchRecords.ts | 68 ++++++++++++++++++++++++++++++++++ tests/search-records.test.ts | 72 ++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 src/utils/searchRecords.ts create mode 100644 tests/search-records.test.ts diff --git a/src/utils/searchRecords.ts b/src/utils/searchRecords.ts new file mode 100644 index 000000000..3ff72c43f --- /dev/null +++ b/src/utils/searchRecords.ts @@ -0,0 +1,68 @@ +export type SearchRouteStyle = 'canonical' | 'framework-path' + +export type SearchHierarchy = { + lvl0?: string | null + lvl1?: string | null + lvl2?: string | null + lvl3?: string | null + lvl4?: string | null + lvl5?: string | null + lvl6?: string | null +} + +export type SearchRecord = { + objectID: string + url: string + anchor?: string + urlWithAnchor: string + library: string + framework: string + version: string + routeStyle: SearchRouteStyle + hierarchy: SearchHierarchy + content?: string +} + +export type SearchHitFrameworkContext = { + url: string + framework?: string | null + routeStyle?: SearchRouteStyle | null +} + +function getPathname(url: string) { + try { + return new URL(url, 'https://tanstack.com').pathname + } catch { + return url.split('#')[0]?.split('?')[0] ?? url + } +} + +export function hasFrameworkPath(url: string) { + const segments = getPathname(url).split('/').filter(Boolean) + + for (let index = 0; index < segments.length - 2; index++) { + if ( + segments[index] === 'docs' && + segments[index + 1] === 'framework' && + segments[index + 2] + ) { + return true + } + } + + return false +} + +export function shouldPersistFrameworkForHit(hit: SearchHitFrameworkContext) { + const framework = hit.framework?.trim().toLowerCase() + + if (!framework || framework === 'all') { + return false + } + + if (hit.routeStyle === 'framework-path') { + return false + } + + return !hasFrameworkPath(hit.url) +} diff --git a/tests/search-records.test.ts b/tests/search-records.test.ts new file mode 100644 index 000000000..f25912306 --- /dev/null +++ b/tests/search-records.test.ts @@ -0,0 +1,72 @@ +import assert from 'node:assert/strict' +import { + hasFrameworkPath, + shouldPersistFrameworkForHit, + type SearchHitFrameworkContext, +} from '../src/utils/searchRecords' + +type ShouldPersistCase = { + name: string + hit: SearchHitFrameworkContext + expected: boolean +} + +const shouldPersistCases: Array = [ + { + name: 'canonical framework hit persists framework before navigation', + hit: { + url: 'https://tanstack.com/form/latest/docs/overview#validation', + framework: 'solid', + routeStyle: 'canonical', + }, + expected: true, + }, + { + name: 'all-framework hit does not mutate preference', + hit: { + url: 'https://tanstack.com/form/latest/docs/overview#validation', + framework: 'all', + routeStyle: 'canonical', + }, + expected: false, + }, + { + name: 'framework path hit already carries framework in URL', + hit: { + url: 'https://tanstack.com/query/latest/docs/framework/solid/overview#validation', + framework: 'solid', + }, + expected: false, + }, + { + name: 'framework-path route style does not persist framework', + hit: { + url: 'https://tanstack.com/query/latest/docs/overview#validation', + framework: 'solid', + routeStyle: 'framework-path', + }, + expected: false, + }, +] + +for (const testCase of shouldPersistCases) { + assert.equal( + shouldPersistFrameworkForHit(testCase.hit), + testCase.expected, + testCase.name, + ) +} + +assert.equal( + hasFrameworkPath('/query/latest/docs/framework/solid/overview#validation'), + true, + 'relative framework path is detected', +) + +assert.equal( + hasFrameworkPath('/form/latest/docs/overview#validation'), + false, + 'canonical docs path is not treated as a framework path', +) + +console.log('search-records tests passed') From f96f672e23799dbbd0e4f4af17fc8d08cca9ac1d Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 13:03:25 -0700 Subject: [PATCH 02/10] fix modal filter selection behaviour --- src/components/Dropdown.tsx | 38 ++++++++++++++++++++-------------- src/components/SearchModal.tsx | 18 ++++++++++------ 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/components/Dropdown.tsx b/src/components/Dropdown.tsx index 0a58bbf33..93a8cdd6e 100644 --- a/src/components/Dropdown.tsx +++ b/src/components/Dropdown.tsx @@ -20,6 +20,7 @@ type DropdownContentProps = { className?: string align?: 'start' | 'center' | 'end' sideOffset?: number + portal?: boolean } type DropdownItemProps = { @@ -63,24 +64,29 @@ export function DropdownContent({ className, align = 'end', sideOffset = 6, + portal = true, }: DropdownContentProps) { - return ( - - - {children} - - + const content = ( + + {children} + ) + + if (!portal) { + return content + } + + return {content} } export function DropdownItem({ diff --git a/src/components/SearchModal.tsx b/src/components/SearchModal.tsx index e760bea47..968ff6c00 100644 --- a/src/components/SearchModal.tsx +++ b/src/components/SearchModal.tsx @@ -499,6 +499,7 @@ const Hit = ({ role="option" aria-selected={isFocused} tabIndex={-1} + data-search-hit="true" ref={ref} >
@@ -587,6 +588,7 @@ function LibraryRefinement() { setSelectedLibrary('')} @@ -664,6 +666,7 @@ function FrameworkRefinement() { handleSelect('')} className="font-bold"> All Frameworks @@ -723,8 +726,6 @@ function NoResults({
- ))} + ) + })}
)} From 82e25dfd6ce55be6b984fab5f527b8d28326dc79 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 13:34:56 -0700 Subject: [PATCH 04/10] Add normalized markdown-to-search extraction --- src/utils/markdown/searchExtraction.ts | 524 +++++++++++++++++++++++ tests/markdown-search-extraction.test.ts | 205 +++++++++ 2 files changed, 729 insertions(+) create mode 100644 src/utils/markdown/searchExtraction.ts create mode 100644 tests/markdown-search-extraction.test.ts diff --git a/src/utils/markdown/searchExtraction.ts b/src/utils/markdown/searchExtraction.ts new file mode 100644 index 000000000..5a6cb9f02 --- /dev/null +++ b/src/utils/markdown/searchExtraction.ts @@ -0,0 +1,524 @@ +import rehypeRaw from 'rehype-raw' +import rehypeSlug from 'rehype-slug' +import remarkGfm from 'remark-gfm' +import remarkParse from 'remark-parse' +import remarkRehype from 'remark-rehype' +import { unified } from 'unified' +import { extractCodeMeta } from './plugins/extractCodeMeta' +import { + rehypeParseCommentComponents, + rehypeTransformCommentComponents, + rehypeTransformFrameworkComponents, +} from './plugins' +import { + getInstallCommand, + type InstallMode, + type PackageManager, +} from './installCommand' + +const DEFAULT_PACKAGE_MANAGER: PackageManager = 'npm' + +type HeadingContext = { + anchor?: string + heading: string + level: number +} + +type SectionBuilder = { + framework: string + heading?: HeadingContext + chunks: Array +} + +type TabDescriptor = { + name: string + slug: string +} + +type PackageManagerMeta = { + packagesByFramework: Record>> + mode: InstallMode +} + +export type MarkdownSearchSection = { + framework: string + anchor?: string + heading?: string + level?: number + content: string +} + +export type MarkdownSearchExtraction = { + sections: Array + frameworks: Array +} + +export type MarkdownSearchExtractionOptions = { + packageManager?: PackageManager +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null +} + +function getTagName(node: unknown) { + if (!isRecord(node) || node.type !== 'element') { + return undefined + } + + return typeof node.tagName === 'string' ? node.tagName : undefined +} + +function getChildren(node: unknown) { + if (!isRecord(node) || !Array.isArray(node.children)) { + return [] + } + + return node.children +} + +function getStringProperty(node: unknown, property: string) { + if (!isRecord(node)) { + return undefined + } + + const properties = node.properties + if (!isRecord(properties)) { + return undefined + } + + const value = properties[property] + return typeof value === 'string' ? value : undefined +} + +function parseJsonObject(value: string | undefined) { + if (!value) { + return null + } + + try { + const parsed = JSON.parse(value) + return isRecord(parsed) ? parsed : null + } catch { + return null + } +} + +function normalizeSearchText(value: string) { + return value.replace(/\s+/g, ' ').trim() +} + +function isHeadingNode(node: unknown) { + const tagName = getTagName(node) + if (!tagName || tagName.length !== 2 || tagName[0] !== 'h') { + return false + } + + const level = Number(tagName[1]) + return Number.isInteger(level) && level >= 1 && level <= 6 +} + +function getNodeText(node: unknown): string { + if (!isRecord(node)) { + return '' + } + + if (node.type === 'text' || node.type === 'raw') { + return typeof node.value === 'string' ? node.value : '' + } + + const tagName = getTagName(node) + if (tagName === 'br') { + return '\n' + } + + if (tagName === 'img') { + return getStringProperty(node, 'alt') ?? '' + } + + return getChildren(node).map(getNodeText).join(' ') +} + +function getHeadingContext(node: unknown): HeadingContext | null { + if (!isHeadingNode(node)) { + return null + } + + const tagName = getTagName(node) + if (!tagName) { + return null + } + + const heading = normalizeSearchText(getNodeText(node)) + if (!heading) { + return null + } + + return { + anchor: getStringProperty(node, 'id'), + heading, + level: Number(tagName[1]), + } +} + +function createSectionBuilder( + framework: string, + heading?: HeadingContext, +): SectionBuilder { + return { + framework, + heading, + chunks: [], + } +} + +function flushSection( + sections: Array, + builder: SectionBuilder, +) { + const content = normalizeSearchText(builder.chunks.join('\n')) + if (!content) { + return + } + + sections.push({ + framework: builder.framework, + anchor: builder.heading?.anchor, + heading: builder.heading?.heading, + level: builder.heading?.level, + content, + }) +} + +function isCommentComponent(node: unknown, componentName: string) { + if (getTagName(node) !== 'md-comment-component') { + return false + } + + return ( + getStringProperty(node, 'data-component')?.toLowerCase() === componentName + ) +} + +function isFrameworkComponent(node: unknown) { + return isCommentComponent(node, 'framework') +} + +function isTabsComponent(node: unknown) { + return isCommentComponent(node, 'tabs') +} + +function isInstallMode(value: string): value is InstallMode { + return ( + value === 'install' || + value === 'dev-install' || + value === 'local-install' || + value === 'create' || + value === 'custom' + ) +} + +function parsePackageGroups(value: unknown) { + if (!Array.isArray(value)) { + return null + } + + const groups: Array> = [] + for (const group of value) { + if (!Array.isArray(group)) { + return null + } + + const packages: Array = [] + for (const packageName of group) { + if (typeof packageName !== 'string') { + return null + } + packages.push(packageName) + } + + if (packages.length) { + groups.push(packages) + } + } + + return groups.length ? groups : null +} + +function parsePackageManagerMeta(node: unknown): PackageManagerMeta | null { + const parsed = parseJsonObject( + getStringProperty(node, 'data-package-manager-meta'), + ) + if (!parsed) { + return null + } + + const mode = parsed.mode + if (typeof mode !== 'string' || !isInstallMode(mode)) { + return null + } + + const rawPackagesByFramework = parsed.packagesByFramework + if (!isRecord(rawPackagesByFramework)) { + return null + } + + const packagesByFramework: Record>> = {} + for (const [framework, packageGroups] of Object.entries( + rawPackagesByFramework, + )) { + const parsedPackageGroups = parsePackageGroups(packageGroups) + if (parsedPackageGroups) { + packagesByFramework[framework] = parsedPackageGroups + } + } + + if (!Object.keys(packagesByFramework).length) { + return null + } + + return { + mode, + packagesByFramework, + } +} + +function getPackageManagerCommandsByFramework( + node: unknown, + packageManager: PackageManager, +) { + const meta = parsePackageManagerMeta(node) + if (!meta) { + return null + } + + const commandsByFramework: Record = {} + for (const [framework, packageGroups] of Object.entries( + meta.packagesByFramework, + )) { + const commandText = getInstallCommand( + packageManager, + packageGroups, + meta.mode, + ).join('\n') + + if (commandText) { + commandsByFramework[framework] = commandText + } + } + + return Object.keys(commandsByFramework).length ? commandsByFramework : null +} + +function parseTabs(node: unknown): Array { + const parsed = parseJsonObject(getStringProperty(node, 'data-attributes')) + if (!parsed || !Array.isArray(parsed.tabs)) { + return [] + } + + const tabs: Array = [] + for (const tab of parsed.tabs) { + if (!isRecord(tab)) { + continue + } + + if (typeof tab.name !== 'string' || typeof tab.slug !== 'string') { + continue + } + + tabs.push({ + name: tab.name, + slug: tab.slug, + }) + } + + return tabs +} + +function getTabsText(node: unknown, options: RequiredSearchOptions) { + const tabs = parseTabs(node) + const panels = getChildren(node).filter( + (child) => getTagName(child) === 'md-tab-panel', + ) + + return panels + .map((panel, index) => { + const tabName = tabs[index]?.name + const panelText = getChildrenText(getChildren(panel), options) + return normalizeSearchText([tabName, panelText].filter(Boolean).join(' ')) + }) + .filter(Boolean) + .join('\n') +} + +function getNodeTextForSearch( + node: unknown, + options: RequiredSearchOptions, +): string { + if (isTabsComponent(node)) { + return getTabsText(node, options) + } + + if (isFrameworkComponent(node)) { + return '' + } + + const packageManagerCommands = getPackageManagerCommandsByFramework( + node, + options.packageManager, + ) + if (packageManagerCommands) { + return Object.values(packageManagerCommands).join('\n') + } + + return getNodeText(node) +} + +function getChildrenText( + children: Array, + options: RequiredSearchOptions, +) { + return children.map((child) => getNodeTextForSearch(child, options)).join('\n') +} + +function appendPackageManagerSections( + sections: Array, + commandsByFramework: Record, + heading: HeadingContext | undefined, +) { + for (const [framework, commandText] of Object.entries(commandsByFramework)) { + const content = normalizeSearchText(commandText) + if (!content) { + continue + } + + sections.push({ + framework, + anchor: heading?.anchor, + heading: heading?.heading, + level: heading?.level, + content, + }) + } +} + +function collectSectionsFromChildren( + children: Array, + framework: string, + options: RequiredSearchOptions, + inheritedHeading?: HeadingContext, +) { + const sections: Array = [] + let current = createSectionBuilder(framework, inheritedHeading) + + for (const child of children) { + const packageManagerCommands = getPackageManagerCommandsByFramework( + child, + options.packageManager, + ) + + if (packageManagerCommands) { + if (current.framework !== 'all') { + const commandText = packageManagerCommands[current.framework] + if (commandText) { + current.chunks.push(commandText) + } + continue + } + + flushSection(sections, current) + appendPackageManagerSections( + sections, + packageManagerCommands, + current.heading, + ) + current = createSectionBuilder(framework, current.heading) + continue + } + + if (isFrameworkComponent(child)) { + flushSection(sections, current) + + const heading = current.heading + for (const panel of getChildren(child)) { + if (getTagName(panel) !== 'md-framework-panel') { + continue + } + + const panelFramework = getStringProperty(panel, 'data-framework') + if (!panelFramework) { + continue + } + + sections.push( + ...collectSectionsFromChildren( + getChildren(panel), + panelFramework, + options, + heading, + ), + ) + } + + current = createSectionBuilder(framework, heading) + continue + } + + const heading = getHeadingContext(child) + if (heading) { + flushSection(sections, current) + current = createSectionBuilder(framework, heading) + continue + } + + const text = normalizeSearchText(getNodeTextForSearch(child, options)) + if (text) { + current.chunks.push(text) + } + } + + flushSection(sections, current) + return sections +} + +type RequiredSearchOptions = { + packageManager: PackageManager +} + +async function transformMarkdownForSearch(markdown: string) { + const processor = unified() + .use(remarkParse) + .use(remarkGfm) + .use(remarkRehype, { allowDangerousHtml: true }) + .use(extractCodeMeta) + .use(rehypeRaw) + .use(rehypeParseCommentComponents) + .use(rehypeSlug) + .use(rehypeTransformFrameworkComponents) + .use(rehypeTransformCommentComponents) + + return processor.run(processor.parse(markdown)) +} + +export async function extractMarkdownSearchSections( + markdown: string, + options: MarkdownSearchExtractionOptions = {}, +): Promise { + const tree = await transformMarkdownForSearch(markdown) + const requiredOptions: RequiredSearchOptions = { + packageManager: options.packageManager ?? DEFAULT_PACKAGE_MANAGER, + } + const sections = collectSectionsFromChildren( + getChildren(tree), + 'all', + requiredOptions, + ) + const frameworks = Array.from( + new Set(sections.map((section) => section.framework)), + ) + + return { + sections, + frameworks, + } +} diff --git a/tests/markdown-search-extraction.test.ts b/tests/markdown-search-extraction.test.ts new file mode 100644 index 000000000..d0998535d --- /dev/null +++ b/tests/markdown-search-extraction.test.ts @@ -0,0 +1,205 @@ +import { extractMarkdownSearchSections } from '../src/utils/markdown/searchExtraction' + +function assertEqual(actual: unknown, expected: unknown, message: string) { + if (actual !== expected) { + throw new Error(`${message}: expected ${String(expected)}, got ${String(actual)}`) + } +} + +function assertDeepEqual( + actual: Array, + expected: Array, + message: string, +) { + const actualJson = JSON.stringify(actual) + const expectedJson = JSON.stringify(expected) + + if (actualJson !== expectedJson) { + throw new Error(`${message}: expected ${expectedJson}, got ${actualJson}`) + } +} + +function assertMatch(value: string, pattern: RegExp, message: string) { + if (!pattern.test(value)) { + throw new Error(`${message}: ${pattern} did not match ${value}`) + } +} + +function assertDoesNotMatch(value: string, pattern: RegExp, message: string) { + if (pattern.test(value)) { + throw new Error(`${message}: ${pattern} matched ${value}`) + } +} + +const frameworkMarkdown = ` +# Adapter guide + +Shared setup applies to every framework. + + + +# React + +React adapter only. + +## Client setup + +Use React hooks. + +# Solid + +Solid adapter only. + +## Client setup + +Use Solid signals. + + +` + +const frameworkResult = + await extractMarkdownSearchSections(frameworkMarkdown) +const sharedFrameworkText = frameworkResult.sections + .filter((section) => section.framework === 'all') + .map((section) => section.content) + .join(' ') +const reactFrameworkText = frameworkResult.sections + .filter((section) => section.framework === 'react') + .map((section) => section.content) + .join(' ') +const solidFrameworkText = frameworkResult.sections + .filter((section) => section.framework === 'solid') + .map((section) => section.content) + .join(' ') + +assertMatch(sharedFrameworkText, /Shared setup/, 'shared text indexed') +assertMatch(reactFrameworkText, /React adapter only/, 'react text indexed') +assertMatch(reactFrameworkText, /Use React hooks/, 'react heading content indexed') +assertDoesNotMatch( + reactFrameworkText, + /Solid adapter only/, + 'solid content excluded from react section', +) +assertMatch(solidFrameworkText, /Solid adapter only/, 'solid text indexed') +assertMatch(solidFrameworkText, /Use Solid signals/, 'solid heading content indexed') +assertDoesNotMatch( + solidFrameworkText, + /React adapter only/, + 'react content excluded from solid section', +) +assertDeepEqual( + frameworkResult.frameworks, + ['all', 'react', 'solid'], + 'framework list preserved', +) + +const packageManagerMarkdown = ` +# Install + + + +react: @tanstack/react-query @tanstack/react-query-devtools +solid: @tanstack/solid-query + + +` + +const packageManagerResult = await extractMarkdownSearchSections( + packageManagerMarkdown, +) +const reactInstall = packageManagerResult.sections.find( + (section) => section.framework === 'react', +) +const solidInstall = packageManagerResult.sections.find( + (section) => section.framework === 'solid', +) + +assertEqual(reactInstall?.anchor, 'install', 'install anchor captured') +assertEqual(reactInstall?.heading, 'Install', 'install heading captured') +assertMatch( + reactInstall?.content ?? '', + /npm i -D @tanstack\/react-query @tanstack\/react-query-devtools/, + 'react install command generated', +) +assertMatch( + solidInstall?.content ?? '', + /npm i -D @tanstack\/solid-query/, + 'solid install command generated', +) +assertDoesNotMatch( + reactInstall?.content ?? '', + /react:/, + 'react raw package marker removed', +) +assertDoesNotMatch( + solidInstall?.content ?? '', + /solid:/, + 'solid raw package marker removed', +) + +const tabsMarkdown = ` +# Examples + + + +\`\`\`tsx title="app.tsx" +export const app = true +\`\`\` + +\`\`\`css title="styles.css" +.root { + color: tomato; +} +\`\`\` + + + + + +## Alpha + +Alpha tab content. + +## Beta + +Beta tab content. + + +` + +const tabsResult = await extractMarkdownSearchSections(tabsMarkdown) +const tabsText = tabsResult.sections + .map((section) => section.content) + .join(' ') + +assertMatch(tabsText, /app\.tsx/, 'file tab name indexed') +assertMatch(tabsText, /export const app = true/, 'file tab code indexed') +assertMatch(tabsText, /styles\.css/, 'second file tab name indexed') +assertMatch(tabsText, /color: tomato/, 'second file tab code indexed') +assertMatch(tabsText, /Alpha/, 'default tab name indexed') +assertMatch(tabsText, /Alpha tab content/, 'default tab content indexed') +assertMatch(tabsText, /Beta/, 'second default tab name indexed') +assertMatch(tabsText, /Beta tab content/, 'second default tab content indexed') +assertDoesNotMatch(tabsText, /::start:tabs/, 'raw start marker removed') +assertDoesNotMatch(tabsText, /::end:tabs/, 'raw end marker removed') + +const headingMarkdown = ` +## Search Params + +Heading anchors should survive extraction. +` + +const headingResult = await extractMarkdownSearchSections(headingMarkdown) +const headingSection = headingResult.sections.find( + (section) => section.heading === 'Search Params', +) + +assertEqual(headingSection?.anchor, 'search-params', 'heading anchor captured') +assertEqual(headingSection?.level, 2, 'heading level captured') +assertMatch( + headingSection?.content ?? '', + /Heading anchors/, + 'heading content captured', +) + +console.log('markdown-search-extraction tests passed') From dda670d672c7cc43a7ab9e50f1fac5ad98a12ec8 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 13:58:24 -0700 Subject: [PATCH 05/10] Generate records and upload tooling for new search --- scripts/sync-search-index.ts | 376 ++++++++++++++++++++++++++ src/utils/searchIndexGeneration.ts | 237 ++++++++++++++++ tests/search-index-generation.test.ts | 184 +++++++++++++ 3 files changed, 797 insertions(+) create mode 100644 scripts/sync-search-index.ts create mode 100644 src/utils/searchIndexGeneration.ts create mode 100644 tests/search-index-generation.test.ts diff --git a/scripts/sync-search-index.ts b/scripts/sync-search-index.ts new file mode 100644 index 000000000..835de76cc --- /dev/null +++ b/scripts/sync-search-index.ts @@ -0,0 +1,376 @@ +/// + +import { algoliasearch } from 'algoliasearch' +import matter from 'gray-matter' +import { getBranch, libraries } from '../src/libraries' +import type { LibrarySlim } from '../src/libraries' +import { + buildSearchRecordsForMarkdown, + isExcludedFromSearchIndex, +} from '../src/utils/searchIndexGeneration' +import type { SearchRecord } from '../src/utils/searchRecords' + +const DEFAULT_INDEX_NAME = 'TANSTACK_SG_TEST' +const DEFAULT_SITE_URL = 'https://tanstack.com' +const DEFAULT_PACKAGE_MANAGER = 'npm' + +type GitHubTreeEntry = { + path: string + type: string +} + +type SyncOptions = { + indexName: string + upload: boolean + siteUrl: string + packageManager: 'npm' | 'pnpm' | 'yarn' | 'bun' + libraryIds: Set +} + +function readFlagValue(args: Array, flag: string) { + const index = args.indexOf(flag) + if (index === -1) { + return undefined + } + + return args[index + 1] +} + +function readRepeatedFlagValues(args: Array, flag: string) { + const values: Array = [] + + for (let index = 0; index < args.length; index++) { + if (args[index] === flag && args[index + 1]) { + values.push(args[index + 1]) + } + } + + return values +} + +function parsePackageManager(value: string | undefined) { + if ( + value === 'npm' || + value === 'pnpm' || + value === 'yarn' || + value === 'bun' + ) { + return value + } + + return DEFAULT_PACKAGE_MANAGER +} + +function parseOptions(args: Array): SyncOptions { + return { + indexName: readFlagValue(args, '--index') ?? DEFAULT_INDEX_NAME, + upload: args.includes('--upload'), + siteUrl: readFlagValue(args, '--site-url') ?? DEFAULT_SITE_URL, + packageManager: parsePackageManager(readFlagValue(args, '--package-manager')), + libraryIds: new Set(readRepeatedFlagValues(args, '--library')), + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null +} + +function readString(value: unknown) { + return typeof value === 'string' ? value : undefined +} + +function getBranchSha(value: unknown) { + if (!isRecord(value) || !isRecord(value.commit)) { + return null + } + + return readString(value.commit.sha) ?? null +} + +function getTreeEntries(value: unknown): Array { + if (!isRecord(value) || !Array.isArray(value.tree)) { + return [] + } + + return value.tree.flatMap((entry) => { + if (!isRecord(entry)) { + return [] + } + + const path = readString(entry.path) + const type = readString(entry.type) + if (!path || !type) { + return [] + } + + return [{ path, type }] + }) +} + +async function fetchGitHubJson(url: string) { + const response = await fetch(url, { + headers: { + Accept: 'application/vnd.github+json', + 'User-Agent': 'tanstack-search-index-sync', + }, + }) + + if (response.status === 404) { + return null + } + + if (!response.ok) { + throw new Error(`GitHub request failed with ${response.status}: ${url}`) + } + + return response.json() +} + +async function fetchRawFile(repo: string, branch: string, path: string) { + const url = `https://raw.githubusercontent.com/${repo}/${branch}/${path}` + const response = await fetch(url, { + headers: { + 'User-Agent': 'tanstack-search-index-sync', + }, + }) + + if (response.status === 404) { + return null + } + + if (!response.ok) { + throw new Error(`Raw GitHub request failed with ${response.status}: ${url}`) + } + + return response.text() +} + +async function fetchRecursiveTree(repo: string, branch: string) { + const branchResponse = await fetchGitHubJson( + `https://api.github.com/repos/${repo}/branches/${branch}`, + ) + const branchSha = getBranchSha(branchResponse) + + if (!branchSha) { + return [] + } + + const treeResponse = await fetchGitHubJson( + `https://api.github.com/repos/${repo}/git/trees/${branchSha}?recursive=1`, + ) + + return getTreeEntries(treeResponse) +} + +function getDocsRoot(library: LibrarySlim) { + return library.docsRoot || 'docs' +} + +function getDocsPath(filePath: string, docsRoot: string) { + const prefix = `${docsRoot.replace(/\/+$/g, '')}/` + + if (!filePath.startsWith(prefix)) { + return null + } + + return filePath + .slice(prefix.length) + .replace(/\.md$/i, '') + .replace(/\/index$/i, '') +} + +function fallbackTitle(docsPath: string, library: LibrarySlim) { + const lastSegment = docsPath.split('/').filter(Boolean).at(-1) + if (!lastSegment) { + return library.name + } + + return lastSegment + .split('-') + .filter(Boolean) + .map((part) => `${part[0]?.toUpperCase() ?? ''}${part.slice(1)}`) + .join(' ') +} + +function getFrontmatterTitle(data: Record) { + const title = data.title + return typeof title === 'string' ? title : undefined +} + +async function buildLibraryRecords( + library: LibrarySlim, + options: SyncOptions, +) { + const branch = getBranch(library, 'latest') + const docsRoot = getDocsRoot(library) + const tree = await fetchRecursiveTree(library.repo, branch) + const markdownFiles = tree + .filter((entry) => entry.type === 'blob') + .map((entry) => entry.path) + .filter((path) => path.endsWith('.md')) + .filter((path) => getDocsPath(path, docsRoot) !== null) + + const records: Array = [] + + for (const filePath of markdownFiles) { + const docsPath = getDocsPath(filePath, docsRoot) + if (docsPath === null) { + continue + } + + const markdown = await fetchRawFile(library.repo, branch, filePath) + if (!markdown) { + continue + } + + const parsed = matter(markdown) + const title = getFrontmatterTitle(parsed.data) ?? fallbackTitle(docsPath, library) + const fileRecords = await buildSearchRecordsForMarkdown({ + library, + version: 'latest', + docsPath, + title, + content: parsed.content, + siteUrl: options.siteUrl, + packageManager: options.packageManager, + }) + + records.push(...fileRecords) + } + + return records +} + +function toAlgoliaObject(record: SearchRecord): Record { + return { + objectID: record.objectID, + url: record.url, + anchor: record.anchor, + urlWithAnchor: record.urlWithAnchor, + library: record.library, + framework: record.framework, + version: record.version, + routeStyle: record.routeStyle, + hierarchy: record.hierarchy, + content: record.content, + } +} + +async function uploadRecords(indexName: string, records: Array) { + const appId = process.env.ALGOLIA_APPLICATION_ID + const apiKey = process.env.ALGOLIA_ADMIN_API_KEY + + if (!appId || !apiKey) { + throw new Error( + 'ALGOLIA_APPLICATION_ID and ALGOLIA_ADMIN_API_KEY are required with --upload.', + ) + } + + if (!records.length) { + throw new Error('Refusing to upload an empty search index.') + } + + const client = algoliasearch(appId, apiKey) + + await client.setSettings({ + indexName, + indexSettings: { + attributesForFaceting: [ + 'filterOnly(library)', + 'filterOnly(framework)', + 'filterOnly(version)', + 'filterOnly(routeStyle)', + ], + searchableAttributes: [ + 'unordered(hierarchy.lvl1)', + 'unordered(hierarchy.lvl2)', + 'unordered(hierarchy.lvl3)', + 'unordered(hierarchy.lvl4)', + 'unordered(hierarchy.lvl5)', + 'unordered(hierarchy.lvl6)', + 'content', + ], + attributesToRetrieve: [ + 'hierarchy.lvl1', + 'hierarchy.lvl2', + 'hierarchy.lvl3', + 'hierarchy.lvl4', + 'hierarchy.lvl5', + 'hierarchy.lvl6', + 'url', + 'anchor', + 'urlWithAnchor', + 'content', + 'library', + 'framework', + 'version', + 'routeStyle', + ], + attributesToHighlight: [ + 'hierarchy.lvl1', + 'hierarchy.lvl2', + 'hierarchy.lvl3', + 'hierarchy.lvl4', + 'hierarchy.lvl5', + 'hierarchy.lvl6', + 'content', + ], + attributesToSnippet: ['content:50'], + }, + }) + + await client.replaceAllObjects({ + indexName, + objects: records.map(toAlgoliaObject), + batchSize: 1000, + scopes: ['settings', 'rules', 'synonyms'], + }) +} + +async function main() { + const options = parseOptions(process.argv.slice(2)) + const librariesToIndex = libraries.filter((library) => { + if (library.visible === false || !library.latestVersion) { + return false + } + + if (!options.libraryIds.size) { + return true + } + + return options.libraryIds.has(library.id) + }) + + const records: Array = [] + + for (const library of librariesToIndex) { + console.log(`building ${library.id}`) + const libraryRecords = await buildLibraryRecords(library, options) + records.push(...libraryRecords) + console.log(`- ${library.id}: ${libraryRecords.length} records`) + } + + const excludedRegistryRecords = records.filter((record) => + isExcludedFromSearchIndex(record.urlWithAnchor), + ) + + if (excludedRegistryRecords.length) { + throw new Error( + `Generated ${excludedRegistryRecords.length} excluded registry records.`, + ) + } + + console.log(`index: ${options.indexName}`) + console.log(`records: ${records.length}`) + console.log(options.upload ? 'mode: upload' : 'mode: dry-run') + + if (options.upload) { + await uploadRecords(options.indexName, records) + console.log(`uploaded ${records.length} records`) + } +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : error) + process.exitCode = 1 +}) diff --git a/src/utils/searchIndexGeneration.ts b/src/utils/searchIndexGeneration.ts new file mode 100644 index 000000000..d6231f009 --- /dev/null +++ b/src/utils/searchIndexGeneration.ts @@ -0,0 +1,237 @@ +import type { LibraryId } from '~/libraries/types' +import type { + SearchHierarchy, + SearchRecord, + SearchRouteStyle, +} from './searchRecords' +import { + extractMarkdownSearchSections, + type MarkdownSearchSection, +} from './markdown/searchExtraction' +import type { PackageManager } from './markdown/installCommand' + +const DEFAULT_SITE_URL = 'https://tanstack.com' +const INTENT_REGISTRY_PATH = '/intent/registry' + +type SearchIndexLibrary = { + id: LibraryId + name: string +} + +export type SearchIndexMarkdownInput = { + library: SearchIndexLibrary + version: string + docsPath: string + title: string + content: string + siteUrl?: string + packageManager?: PackageManager +} + +function normalizeUrlPath(path: string) { + const normalizedPath = `/${path.split('/').filter(Boolean).join('/')}` + + if (normalizedPath === '/') { + return '/' + } + + return normalizedPath.replace(/\/+$/g, '') +} + +function getUrlPathname(url: string) { + try { + return new URL(url, DEFAULT_SITE_URL).pathname + } catch { + return normalizeUrlPath(url.split('#')[0]?.split('?')[0] ?? url) + } +} + +export function isExcludedFromSearchIndex(url: string) { + const pathname = normalizeUrlPath(getUrlPathname(url)) + + return ( + pathname === INTENT_REGISTRY_PATH || + pathname.startsWith(`${INTENT_REGISTRY_PATH}/`) + ) +} + +function normalizeSiteUrl(siteUrl: string | undefined) { + return (siteUrl || DEFAULT_SITE_URL).replace(/\/+$/g, '') +} + +function normalizeDocsPath(docsPath: string) { + return docsPath + .replace(/\.md$/i, '') + .split('/') + .filter(Boolean) + .join('/') + .replace(/\/index$/i, '') +} + +function getRouteStyle(docsPath: string): SearchRouteStyle { + return docsPath.startsWith('framework/') ? 'framework-path' : 'canonical' +} + +function getFrameworkFromDocsPath(docsPath: string) { + const segments = docsPath.split('/').filter(Boolean) + + if (segments[0] !== 'framework') { + return null + } + + return segments[1] ?? null +} + +function buildUrl(input: { + siteUrl?: string + libraryId: LibraryId + version: string + docsPath: string +}) { + const basePath = [ + input.libraryId, + input.version, + 'docs', + ...input.docsPath.split('/').filter(Boolean), + ].join('/') + + return `${normalizeSiteUrl(input.siteUrl)}/${basePath}`.replace(/\/+$/g, '') +} + +function appendAnchor(url: string, anchor: string | undefined) { + if (!anchor) { + return url + } + + return `${url}#${anchor}` +} + +function normalizeText(value: string) { + return value.replace(/\s+/g, ' ').trim() +} + +function buildHierarchy( + libraryName: string, + title: string, + section: MarkdownSearchSection, +): SearchHierarchy { + const normalizedTitle = normalizeText(title) + const normalizedHeading = section.heading + ? normalizeText(section.heading) + : undefined + const hierarchy: SearchHierarchy = { + lvl0: libraryName, + lvl1: normalizedTitle || libraryName, + } + + if (normalizedHeading && normalizedHeading !== normalizedTitle) { + hierarchy.lvl2 = normalizedHeading + } + + return hierarchy +} + +function getRecordFramework( + routeStyle: SearchRouteStyle, + pathFramework: string | null, + sectionFramework: string, +) { + if (routeStyle === 'canonical') { + return sectionFramework + } + + if (!pathFramework) { + return null + } + + if (sectionFramework === 'all') { + return pathFramework + } + + if (sectionFramework === pathFramework) { + return sectionFramework + } + + return null +} + +function buildObjectId(input: { + libraryId: LibraryId + version: string + docsPath: string + framework: string + anchor?: string + index: number +}) { + return [ + input.libraryId, + input.version, + input.framework, + input.docsPath || 'index', + input.anchor || 'root', + String(input.index), + ].join(':') +} + +export async function buildSearchRecordsForMarkdown( + input: SearchIndexMarkdownInput, +): Promise> { + const docsPath = normalizeDocsPath(input.docsPath) + const url = buildUrl({ + siteUrl: input.siteUrl, + libraryId: input.library.id, + version: input.version, + docsPath, + }) + + if (isExcludedFromSearchIndex(url)) { + return [] + } + + const routeStyle = getRouteStyle(docsPath) + const pathFramework = getFrameworkFromDocsPath(docsPath) + const extraction = await extractMarkdownSearchSections( + `# ${input.title}\n${input.content}`, + { packageManager: input.packageManager }, + ) + const records: Array = [] + + for (const section of extraction.sections) { + const framework = getRecordFramework( + routeStyle, + pathFramework, + section.framework, + ) + + if (!framework) { + continue + } + + const urlWithAnchor = appendAnchor(url, section.anchor) + if (isExcludedFromSearchIndex(urlWithAnchor)) { + continue + } + + records.push({ + objectID: buildObjectId({ + libraryId: input.library.id, + version: input.version, + docsPath, + framework, + anchor: section.anchor, + index: records.length, + }), + url, + anchor: section.anchor, + urlWithAnchor, + library: input.library.id, + framework, + version: input.version, + routeStyle, + hierarchy: buildHierarchy(input.library.name, input.title, section), + content: section.content, + }) + } + + return records +} diff --git a/tests/search-index-generation.test.ts b/tests/search-index-generation.test.ts new file mode 100644 index 000000000..d35f5890c --- /dev/null +++ b/tests/search-index-generation.test.ts @@ -0,0 +1,184 @@ +import { + buildSearchRecordsForMarkdown, + isExcludedFromSearchIndex, +} from '../src/utils/searchIndexGeneration' + +function assertEqual(actual: unknown, expected: unknown, message: string) { + if (actual !== expected) { + throw new Error(`${message}: expected ${String(expected)}, got ${String(actual)}`) + } +} + +function assertTruthy(value: unknown, message: string) { + if (!value) { + throw new Error(message) + } +} + +function assertMatch(value: string, pattern: RegExp, message: string) { + if (!pattern.test(value)) { + throw new Error(`${message}: ${pattern} did not match ${value}`) + } +} + +function assertDoesNotInclude( + values: Array, + expected: string, + message: string, +) { + if (values.includes(expected)) { + throw new Error(`${message}: found ${expected}`) + } +} + +const formLibrary: { id: 'form'; name: string } = { + id: 'form', + name: 'TanStack Form', +} + +const canonicalRecords = await buildSearchRecordsForMarkdown({ + library: formLibrary, + version: 'latest', + docsPath: 'overview', + title: 'Overview', + content: ` +Shared overview content. + + + +# React + +React-only canonical content. + +## Validation + +React validation content. + +# Solid + +Solid-only canonical content. + +## Validation + +Solid validation content. + + +`, +}) + +const canonicalFrameworks = canonicalRecords.map((record) => record.framework) +assertTruthy( + canonicalFrameworks.includes('all'), + 'canonical shared record should be framework all', +) +assertTruthy( + canonicalFrameworks.includes('react'), + 'canonical framework block should emit react record', +) +assertTruthy( + canonicalFrameworks.includes('solid'), + 'canonical framework block should emit solid record', +) + +const reactValidationRecord = canonicalRecords.find( + (record) => + record.framework === 'react' && record.hierarchy.lvl2 === 'Validation', +) +assertTruthy(reactValidationRecord, 'react validation record should exist') +assertEqual( + reactValidationRecord?.routeStyle, + 'canonical', + 'canonical route style preserved', +) +assertEqual( + reactValidationRecord?.url, + 'https://tanstack.com/form/latest/docs/overview', + 'canonical URL preserved', +) +assertEqual( + reactValidationRecord?.urlWithAnchor, + 'https://tanstack.com/form/latest/docs/overview#validation', + 'canonical URL anchor preserved', +) +assertMatch( + reactValidationRecord?.content ?? '', + /React validation content/, + 'react content indexed', +) + +const routerLibrary: { id: 'router'; name: string } = { + id: 'router', + name: 'TanStack Router', +} + +const frameworkPathRecords = await buildSearchRecordsForMarkdown({ + library: routerLibrary, + version: 'latest', + docsPath: 'framework/react/quick-start', + title: 'Quick Start', + content: ` +React quick-start content. + +## Install + +Install the React adapter. + + + +# React + +React nested content. + +# Solid + +Solid nested content should not index for the React path. + + +`, +}) + +assertTruthy(frameworkPathRecords.length > 0, 'framework path records emitted') +for (const record of frameworkPathRecords) { + assertEqual(record.framework, 'react', 'framework path scoped to URL framework') + assertEqual( + record.routeStyle, + 'framework-path', + 'framework path route style preserved', + ) + assertEqual( + record.url.startsWith( + 'https://tanstack.com/router/latest/docs/framework/react/quick-start', + ), + true, + 'framework path URL preserved', + ) +} + +const frameworkPathText = frameworkPathRecords + .map((record) => record.content ?? '') + .join(' ') +assertMatch(frameworkPathText, /React nested content/, 'react nested text indexed') +assertDoesNotInclude( + frameworkPathRecords.map((record) => record.framework), + 'solid', + 'solid framework not emitted for react framework path', +) +assertEqual( + isExcludedFromSearchIndex( + 'https://tanstack.com/intent/registry?tab=packages', + ), + true, + 'registry query URL excluded', +) +assertEqual( + isExcludedFromSearchIndex('https://tanstack.com/intent/registry/pkg/SKILL'), + true, + 'registry path URL excluded', +) +assertEqual( + isExcludedFromSearchIndex('https://tanstack.com/intent/latest/docs/registry'), + false, + 'intent docs registry page remains indexable', +) + +console.log('search-index-generation tests passed') From de01c7d00bc022a9f7636bd1b98a7aa928793837 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 15:50:54 -0700 Subject: [PATCH 06/10] update search stuff --- src/components/SearchModal.tsx | 47 +++++++++++++++++++++++++++++----- src/utils/sitemap.ts | 6 ++++- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/components/SearchModal.tsx b/src/components/SearchModal.tsx index d4a25b237..799c7bf53 100644 --- a/src/components/SearchModal.tsx +++ b/src/components/SearchModal.tsx @@ -28,6 +28,10 @@ import { getStoredFrameworkPreference, usePersistFrameworkPreference, } from './FrameworkSelect' +import { + shouldPersistFrameworkForHit, + type SearchRouteStyle, +} from '~/utils/searchRecords' /** * Safely decode HTML entities without using innerHTML. @@ -83,7 +87,10 @@ interface AlgoliaHighlightResult { interface AlgoliaHit extends Record { objectID: string url: string + urlWithAnchor?: string library?: string + framework?: string + routeStyle?: SearchRouteStyle hierarchy: AlgoliaHierarchy content?: string type?: string @@ -329,8 +336,13 @@ function DynamicFilters() { 'hierarchy.lvl5', 'hierarchy.lvl6', 'url', + 'anchor', + 'urlWithAnchor', 'content', 'library', + 'framework', + 'version', + 'routeStyle', ]} attributesToHighlight={[ 'hierarchy.lvl1', @@ -362,6 +374,8 @@ const SafeLink = React.forwardRef( ref: React.Ref, ) => { const isInternal = href?.includes('//tanstack.com') + const internalUrl = href?.split('//tanstack.com')[1] + const [internalPath, internalHash] = internalUrl?.split('#') ?? [] if (!isInternal) { return ( @@ -382,7 +396,8 @@ const SafeLink = React.forwardRef( return ( { const { closeSearch } = useSearchContext() + const persistFramework = usePersistFrameworkPreference() + + const handleActivate = () => { + const framework = hit.framework + if ( + framework && + shouldPersistFrameworkForHit({ + url: hit.url, + framework, + routeStyle: hit.routeStyle, + }) + ) { + persistFramework(framework) + } + + closeSearch() + } const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter' || e.key === ' ') { e.preventDefault() + e.stopPropagation() const link = e.currentTarget as HTMLAnchorElement link.click() - closeSearch() } } const handleClick = () => { - closeSearch() + handleActivate() } const ref = React.useRef(null!) @@ -434,12 +466,13 @@ const Hit = ({ // Get library and framework info for this hit const hitLibrary = hit.library as string | undefined - const hitFramework = frameworkOptions.find((f) => - hit.url.includes(`/framework/${f.value}`), - ) + const hitFramework = + frameworkOptions.find((f) => f.value === hit.framework) ?? + frameworkOptions.find((f) => hit.url.includes(`/framework/${f.value}`)) const hitLibraryInfo = hitLibrary ? libraries.find((l) => l.id === hitLibrary) : null + const hitUrl = hit.urlWithAnchor ?? hit.url // Build hierarchy prefix based on what's filtered const prefixParts: React.ReactNode[] = [] @@ -487,7 +520,7 @@ const Hit = ({ return ( > { ...getLibraryEntries(), ...docsEntries.flat(), ...getBlogEntries(), - ] + ].filter( + (entry) => + entry.path !== '/intent/registry' && + !entry.path.startsWith('/intent/registry/'), + ) return Array.from( new Map(entries.map((entry) => [entry.path, entry])).values(), From 62c8484b09f1f84e2bb262802bc03a8b0bb7cbc6 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 17:18:35 -0700 Subject: [PATCH 07/10] Prune generated search index path for crawler-only indexing --- scripts/sync-search-index.ts | 376 ------------- src/components/SearchModal.tsx | 3 +- src/routes/$libraryId/$version.docs.$.tsx | 11 +- src/utils/docs.functions.ts | 3 + src/utils/markdown/filterFrameworkContent.ts | 47 ++ src/utils/markdown/searchExtraction.ts | 524 ------------------- src/utils/searchIndexGeneration.ts | 237 --------- src/utils/searchRecords.ts | 27 +- tests/filter-framework-content.test.ts | 71 +++ tests/markdown-search-extraction.test.ts | 205 -------- tests/search-index-generation.test.ts | 184 ------- 11 files changed, 133 insertions(+), 1555 deletions(-) delete mode 100644 scripts/sync-search-index.ts delete mode 100644 src/utils/markdown/searchExtraction.ts delete mode 100644 src/utils/searchIndexGeneration.ts create mode 100644 tests/filter-framework-content.test.ts delete mode 100644 tests/markdown-search-extraction.test.ts delete mode 100644 tests/search-index-generation.test.ts diff --git a/scripts/sync-search-index.ts b/scripts/sync-search-index.ts deleted file mode 100644 index 835de76cc..000000000 --- a/scripts/sync-search-index.ts +++ /dev/null @@ -1,376 +0,0 @@ -/// - -import { algoliasearch } from 'algoliasearch' -import matter from 'gray-matter' -import { getBranch, libraries } from '../src/libraries' -import type { LibrarySlim } from '../src/libraries' -import { - buildSearchRecordsForMarkdown, - isExcludedFromSearchIndex, -} from '../src/utils/searchIndexGeneration' -import type { SearchRecord } from '../src/utils/searchRecords' - -const DEFAULT_INDEX_NAME = 'TANSTACK_SG_TEST' -const DEFAULT_SITE_URL = 'https://tanstack.com' -const DEFAULT_PACKAGE_MANAGER = 'npm' - -type GitHubTreeEntry = { - path: string - type: string -} - -type SyncOptions = { - indexName: string - upload: boolean - siteUrl: string - packageManager: 'npm' | 'pnpm' | 'yarn' | 'bun' - libraryIds: Set -} - -function readFlagValue(args: Array, flag: string) { - const index = args.indexOf(flag) - if (index === -1) { - return undefined - } - - return args[index + 1] -} - -function readRepeatedFlagValues(args: Array, flag: string) { - const values: Array = [] - - for (let index = 0; index < args.length; index++) { - if (args[index] === flag && args[index + 1]) { - values.push(args[index + 1]) - } - } - - return values -} - -function parsePackageManager(value: string | undefined) { - if ( - value === 'npm' || - value === 'pnpm' || - value === 'yarn' || - value === 'bun' - ) { - return value - } - - return DEFAULT_PACKAGE_MANAGER -} - -function parseOptions(args: Array): SyncOptions { - return { - indexName: readFlagValue(args, '--index') ?? DEFAULT_INDEX_NAME, - upload: args.includes('--upload'), - siteUrl: readFlagValue(args, '--site-url') ?? DEFAULT_SITE_URL, - packageManager: parsePackageManager(readFlagValue(args, '--package-manager')), - libraryIds: new Set(readRepeatedFlagValues(args, '--library')), - } -} - -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null -} - -function readString(value: unknown) { - return typeof value === 'string' ? value : undefined -} - -function getBranchSha(value: unknown) { - if (!isRecord(value) || !isRecord(value.commit)) { - return null - } - - return readString(value.commit.sha) ?? null -} - -function getTreeEntries(value: unknown): Array { - if (!isRecord(value) || !Array.isArray(value.tree)) { - return [] - } - - return value.tree.flatMap((entry) => { - if (!isRecord(entry)) { - return [] - } - - const path = readString(entry.path) - const type = readString(entry.type) - if (!path || !type) { - return [] - } - - return [{ path, type }] - }) -} - -async function fetchGitHubJson(url: string) { - const response = await fetch(url, { - headers: { - Accept: 'application/vnd.github+json', - 'User-Agent': 'tanstack-search-index-sync', - }, - }) - - if (response.status === 404) { - return null - } - - if (!response.ok) { - throw new Error(`GitHub request failed with ${response.status}: ${url}`) - } - - return response.json() -} - -async function fetchRawFile(repo: string, branch: string, path: string) { - const url = `https://raw.githubusercontent.com/${repo}/${branch}/${path}` - const response = await fetch(url, { - headers: { - 'User-Agent': 'tanstack-search-index-sync', - }, - }) - - if (response.status === 404) { - return null - } - - if (!response.ok) { - throw new Error(`Raw GitHub request failed with ${response.status}: ${url}`) - } - - return response.text() -} - -async function fetchRecursiveTree(repo: string, branch: string) { - const branchResponse = await fetchGitHubJson( - `https://api.github.com/repos/${repo}/branches/${branch}`, - ) - const branchSha = getBranchSha(branchResponse) - - if (!branchSha) { - return [] - } - - const treeResponse = await fetchGitHubJson( - `https://api.github.com/repos/${repo}/git/trees/${branchSha}?recursive=1`, - ) - - return getTreeEntries(treeResponse) -} - -function getDocsRoot(library: LibrarySlim) { - return library.docsRoot || 'docs' -} - -function getDocsPath(filePath: string, docsRoot: string) { - const prefix = `${docsRoot.replace(/\/+$/g, '')}/` - - if (!filePath.startsWith(prefix)) { - return null - } - - return filePath - .slice(prefix.length) - .replace(/\.md$/i, '') - .replace(/\/index$/i, '') -} - -function fallbackTitle(docsPath: string, library: LibrarySlim) { - const lastSegment = docsPath.split('/').filter(Boolean).at(-1) - if (!lastSegment) { - return library.name - } - - return lastSegment - .split('-') - .filter(Boolean) - .map((part) => `${part[0]?.toUpperCase() ?? ''}${part.slice(1)}`) - .join(' ') -} - -function getFrontmatterTitle(data: Record) { - const title = data.title - return typeof title === 'string' ? title : undefined -} - -async function buildLibraryRecords( - library: LibrarySlim, - options: SyncOptions, -) { - const branch = getBranch(library, 'latest') - const docsRoot = getDocsRoot(library) - const tree = await fetchRecursiveTree(library.repo, branch) - const markdownFiles = tree - .filter((entry) => entry.type === 'blob') - .map((entry) => entry.path) - .filter((path) => path.endsWith('.md')) - .filter((path) => getDocsPath(path, docsRoot) !== null) - - const records: Array = [] - - for (const filePath of markdownFiles) { - const docsPath = getDocsPath(filePath, docsRoot) - if (docsPath === null) { - continue - } - - const markdown = await fetchRawFile(library.repo, branch, filePath) - if (!markdown) { - continue - } - - const parsed = matter(markdown) - const title = getFrontmatterTitle(parsed.data) ?? fallbackTitle(docsPath, library) - const fileRecords = await buildSearchRecordsForMarkdown({ - library, - version: 'latest', - docsPath, - title, - content: parsed.content, - siteUrl: options.siteUrl, - packageManager: options.packageManager, - }) - - records.push(...fileRecords) - } - - return records -} - -function toAlgoliaObject(record: SearchRecord): Record { - return { - objectID: record.objectID, - url: record.url, - anchor: record.anchor, - urlWithAnchor: record.urlWithAnchor, - library: record.library, - framework: record.framework, - version: record.version, - routeStyle: record.routeStyle, - hierarchy: record.hierarchy, - content: record.content, - } -} - -async function uploadRecords(indexName: string, records: Array) { - const appId = process.env.ALGOLIA_APPLICATION_ID - const apiKey = process.env.ALGOLIA_ADMIN_API_KEY - - if (!appId || !apiKey) { - throw new Error( - 'ALGOLIA_APPLICATION_ID and ALGOLIA_ADMIN_API_KEY are required with --upload.', - ) - } - - if (!records.length) { - throw new Error('Refusing to upload an empty search index.') - } - - const client = algoliasearch(appId, apiKey) - - await client.setSettings({ - indexName, - indexSettings: { - attributesForFaceting: [ - 'filterOnly(library)', - 'filterOnly(framework)', - 'filterOnly(version)', - 'filterOnly(routeStyle)', - ], - searchableAttributes: [ - 'unordered(hierarchy.lvl1)', - 'unordered(hierarchy.lvl2)', - 'unordered(hierarchy.lvl3)', - 'unordered(hierarchy.lvl4)', - 'unordered(hierarchy.lvl5)', - 'unordered(hierarchy.lvl6)', - 'content', - ], - attributesToRetrieve: [ - 'hierarchy.lvl1', - 'hierarchy.lvl2', - 'hierarchy.lvl3', - 'hierarchy.lvl4', - 'hierarchy.lvl5', - 'hierarchy.lvl6', - 'url', - 'anchor', - 'urlWithAnchor', - 'content', - 'library', - 'framework', - 'version', - 'routeStyle', - ], - attributesToHighlight: [ - 'hierarchy.lvl1', - 'hierarchy.lvl2', - 'hierarchy.lvl3', - 'hierarchy.lvl4', - 'hierarchy.lvl5', - 'hierarchy.lvl6', - 'content', - ], - attributesToSnippet: ['content:50'], - }, - }) - - await client.replaceAllObjects({ - indexName, - objects: records.map(toAlgoliaObject), - batchSize: 1000, - scopes: ['settings', 'rules', 'synonyms'], - }) -} - -async function main() { - const options = parseOptions(process.argv.slice(2)) - const librariesToIndex = libraries.filter((library) => { - if (library.visible === false || !library.latestVersion) { - return false - } - - if (!options.libraryIds.size) { - return true - } - - return options.libraryIds.has(library.id) - }) - - const records: Array = [] - - for (const library of librariesToIndex) { - console.log(`building ${library.id}`) - const libraryRecords = await buildLibraryRecords(library, options) - records.push(...libraryRecords) - console.log(`- ${library.id}: ${libraryRecords.length} records`) - } - - const excludedRegistryRecords = records.filter((record) => - isExcludedFromSearchIndex(record.urlWithAnchor), - ) - - if (excludedRegistryRecords.length) { - throw new Error( - `Generated ${excludedRegistryRecords.length} excluded registry records.`, - ) - } - - console.log(`index: ${options.indexName}`) - console.log(`records: ${records.length}`) - console.log(options.upload ? 'mode: upload' : 'mode: dry-run') - - if (options.upload) { - await uploadRecords(options.indexName, records) - console.log(`uploaded ${records.length} records`) - } -} - -main().catch((error) => { - console.error(error instanceof Error ? error.message : error) - process.exitCode = 1 -}) diff --git a/src/components/SearchModal.tsx b/src/components/SearchModal.tsx index 799c7bf53..68e1c7324 100644 --- a/src/components/SearchModal.tsx +++ b/src/components/SearchModal.tsx @@ -30,7 +30,6 @@ import { } from './FrameworkSelect' import { shouldPersistFrameworkForHit, - type SearchRouteStyle, } from '~/utils/searchRecords' /** @@ -90,7 +89,7 @@ interface AlgoliaHit extends Record { urlWithAnchor?: string library?: string framework?: string - routeStyle?: SearchRouteStyle + routeStyle?: string hierarchy: AlgoliaHierarchy content?: string type?: string diff --git a/src/routes/$libraryId/$version.docs.$.tsx b/src/routes/$libraryId/$version.docs.$.tsx index ee525cec5..07b8fa3a7 100644 --- a/src/routes/$libraryId/$version.docs.$.tsx +++ b/src/routes/$libraryId/$version.docs.$.tsx @@ -61,19 +61,28 @@ export const Route = createFileRoute('/$libraryId/$version/docs/$')({ } }, head: ({ loaderData, params }) => { - const { libraryId } = params + const { libraryId, version, _splat: docsPath } = params const library = findLibrary(libraryId) if (!library) { throw notFound() } + const frameworkVariantLinks = (loaderData?.frameworks ?? []).map( + (framework) => ({ + rel: 'alternate', + type: 'text/markdown', + href: `/${libraryId}/${version}/docs/${docsPath}.md?framework=${framework}`, + }), + ) + return { meta: seo({ title: `${loaderData?.title} | ${library.name} Docs`, description: loaderData?.description, noindex: library.visible === false, }), + links: frameworkVariantLinks, } }, component: Docs, diff --git a/src/utils/docs.functions.ts b/src/utils/docs.functions.ts index dbfeea200..5b930be64 100644 --- a/src/utils/docs.functions.ts +++ b/src/utils/docs.functions.ts @@ -10,6 +10,7 @@ import { isRecoverableGitHubContentError, } from '~/utils/documents.server' import { renderMarkdownToRsc } from './markdown' +import { extractFrameworksFromMarkdown } from './markdown/filterFrameworkContent' import { getCachedDocsArtifact } from './github-content-cache.server' import { buildRedirectManifest, type RedirectManifestEntry } from './redirects' import { removeLeadingSlash } from './utils' @@ -239,6 +240,7 @@ export const fetchDocs = createServerFn({ method: 'GET' }) contentRsc, title: frontMatter.data?.title ?? 'Content temporarily unavailable', description, + frameworks: extractFrameworksFromMarkdown(frontMatter.content), filePath, headings, frontmatter: frontMatter.data, @@ -255,6 +257,7 @@ export const fetchDocsPage = createServerFn({ method: 'GET' }) description: doc.description, filePath: doc.filePath, frontmatter: doc.frontmatter, + frameworks: doc.frameworks, headings: doc.headings, title: doc.title, } diff --git a/src/utils/markdown/filterFrameworkContent.ts b/src/utils/markdown/filterFrameworkContent.ts index 66fa8d724..ba423b6f9 100644 --- a/src/utils/markdown/filterFrameworkContent.ts +++ b/src/utils/markdown/filterFrameworkContent.ts @@ -31,6 +31,53 @@ type FilterOptions = { keepMarkers?: boolean } +export function extractFrameworksFromMarkdown(markdown: string): Array { + const frameworks: Array = [] + const seen = new Set() + + const addFramework = (framework: string) => { + const normalizedFramework = framework.trim().toLowerCase() + if (!normalizedFramework || seen.has(normalizedFramework)) { + return + } + + seen.add(normalizedFramework) + frameworks.push(normalizedFramework) + } + + const frameworkBlockRegex = + /([\s\S]*?)/gi + let frameworkBlockMatch: RegExpExecArray | null + + while ((frameworkBlockMatch = frameworkBlockRegex.exec(markdown)) !== null) { + for (const section of splitByFrameworkHeadings( + frameworkBlockMatch[1] ?? '', + )) { + addFramework(section.framework) + } + } + + const tabsBlockRegex = + /([\s\S]*?)/gi + let tabsBlockMatch: RegExpExecArray | null + + while ((tabsBlockMatch = tabsBlockRegex.exec(markdown)) !== null) { + const attrs = tabsBlockMatch[1] ?? '' + const variant = parseAttribute(attrs, 'variant') + if (variant !== 'package-manager' && variant !== 'package-managers') { + continue + } + + for (const framework of Object.keys( + parseFrameworkLines(tabsBlockMatch[2] ?? ''), + )) { + addFramework(framework) + } + } + + return frameworks +} + /** * Filters framework-specific content and package-manager tabs from raw markdown. * If no framework is specified, returns markdown unchanged. diff --git a/src/utils/markdown/searchExtraction.ts b/src/utils/markdown/searchExtraction.ts deleted file mode 100644 index 5a6cb9f02..000000000 --- a/src/utils/markdown/searchExtraction.ts +++ /dev/null @@ -1,524 +0,0 @@ -import rehypeRaw from 'rehype-raw' -import rehypeSlug from 'rehype-slug' -import remarkGfm from 'remark-gfm' -import remarkParse from 'remark-parse' -import remarkRehype from 'remark-rehype' -import { unified } from 'unified' -import { extractCodeMeta } from './plugins/extractCodeMeta' -import { - rehypeParseCommentComponents, - rehypeTransformCommentComponents, - rehypeTransformFrameworkComponents, -} from './plugins' -import { - getInstallCommand, - type InstallMode, - type PackageManager, -} from './installCommand' - -const DEFAULT_PACKAGE_MANAGER: PackageManager = 'npm' - -type HeadingContext = { - anchor?: string - heading: string - level: number -} - -type SectionBuilder = { - framework: string - heading?: HeadingContext - chunks: Array -} - -type TabDescriptor = { - name: string - slug: string -} - -type PackageManagerMeta = { - packagesByFramework: Record>> - mode: InstallMode -} - -export type MarkdownSearchSection = { - framework: string - anchor?: string - heading?: string - level?: number - content: string -} - -export type MarkdownSearchExtraction = { - sections: Array - frameworks: Array -} - -export type MarkdownSearchExtractionOptions = { - packageManager?: PackageManager -} - -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null -} - -function getTagName(node: unknown) { - if (!isRecord(node) || node.type !== 'element') { - return undefined - } - - return typeof node.tagName === 'string' ? node.tagName : undefined -} - -function getChildren(node: unknown) { - if (!isRecord(node) || !Array.isArray(node.children)) { - return [] - } - - return node.children -} - -function getStringProperty(node: unknown, property: string) { - if (!isRecord(node)) { - return undefined - } - - const properties = node.properties - if (!isRecord(properties)) { - return undefined - } - - const value = properties[property] - return typeof value === 'string' ? value : undefined -} - -function parseJsonObject(value: string | undefined) { - if (!value) { - return null - } - - try { - const parsed = JSON.parse(value) - return isRecord(parsed) ? parsed : null - } catch { - return null - } -} - -function normalizeSearchText(value: string) { - return value.replace(/\s+/g, ' ').trim() -} - -function isHeadingNode(node: unknown) { - const tagName = getTagName(node) - if (!tagName || tagName.length !== 2 || tagName[0] !== 'h') { - return false - } - - const level = Number(tagName[1]) - return Number.isInteger(level) && level >= 1 && level <= 6 -} - -function getNodeText(node: unknown): string { - if (!isRecord(node)) { - return '' - } - - if (node.type === 'text' || node.type === 'raw') { - return typeof node.value === 'string' ? node.value : '' - } - - const tagName = getTagName(node) - if (tagName === 'br') { - return '\n' - } - - if (tagName === 'img') { - return getStringProperty(node, 'alt') ?? '' - } - - return getChildren(node).map(getNodeText).join(' ') -} - -function getHeadingContext(node: unknown): HeadingContext | null { - if (!isHeadingNode(node)) { - return null - } - - const tagName = getTagName(node) - if (!tagName) { - return null - } - - const heading = normalizeSearchText(getNodeText(node)) - if (!heading) { - return null - } - - return { - anchor: getStringProperty(node, 'id'), - heading, - level: Number(tagName[1]), - } -} - -function createSectionBuilder( - framework: string, - heading?: HeadingContext, -): SectionBuilder { - return { - framework, - heading, - chunks: [], - } -} - -function flushSection( - sections: Array, - builder: SectionBuilder, -) { - const content = normalizeSearchText(builder.chunks.join('\n')) - if (!content) { - return - } - - sections.push({ - framework: builder.framework, - anchor: builder.heading?.anchor, - heading: builder.heading?.heading, - level: builder.heading?.level, - content, - }) -} - -function isCommentComponent(node: unknown, componentName: string) { - if (getTagName(node) !== 'md-comment-component') { - return false - } - - return ( - getStringProperty(node, 'data-component')?.toLowerCase() === componentName - ) -} - -function isFrameworkComponent(node: unknown) { - return isCommentComponent(node, 'framework') -} - -function isTabsComponent(node: unknown) { - return isCommentComponent(node, 'tabs') -} - -function isInstallMode(value: string): value is InstallMode { - return ( - value === 'install' || - value === 'dev-install' || - value === 'local-install' || - value === 'create' || - value === 'custom' - ) -} - -function parsePackageGroups(value: unknown) { - if (!Array.isArray(value)) { - return null - } - - const groups: Array> = [] - for (const group of value) { - if (!Array.isArray(group)) { - return null - } - - const packages: Array = [] - for (const packageName of group) { - if (typeof packageName !== 'string') { - return null - } - packages.push(packageName) - } - - if (packages.length) { - groups.push(packages) - } - } - - return groups.length ? groups : null -} - -function parsePackageManagerMeta(node: unknown): PackageManagerMeta | null { - const parsed = parseJsonObject( - getStringProperty(node, 'data-package-manager-meta'), - ) - if (!parsed) { - return null - } - - const mode = parsed.mode - if (typeof mode !== 'string' || !isInstallMode(mode)) { - return null - } - - const rawPackagesByFramework = parsed.packagesByFramework - if (!isRecord(rawPackagesByFramework)) { - return null - } - - const packagesByFramework: Record>> = {} - for (const [framework, packageGroups] of Object.entries( - rawPackagesByFramework, - )) { - const parsedPackageGroups = parsePackageGroups(packageGroups) - if (parsedPackageGroups) { - packagesByFramework[framework] = parsedPackageGroups - } - } - - if (!Object.keys(packagesByFramework).length) { - return null - } - - return { - mode, - packagesByFramework, - } -} - -function getPackageManagerCommandsByFramework( - node: unknown, - packageManager: PackageManager, -) { - const meta = parsePackageManagerMeta(node) - if (!meta) { - return null - } - - const commandsByFramework: Record = {} - for (const [framework, packageGroups] of Object.entries( - meta.packagesByFramework, - )) { - const commandText = getInstallCommand( - packageManager, - packageGroups, - meta.mode, - ).join('\n') - - if (commandText) { - commandsByFramework[framework] = commandText - } - } - - return Object.keys(commandsByFramework).length ? commandsByFramework : null -} - -function parseTabs(node: unknown): Array { - const parsed = parseJsonObject(getStringProperty(node, 'data-attributes')) - if (!parsed || !Array.isArray(parsed.tabs)) { - return [] - } - - const tabs: Array = [] - for (const tab of parsed.tabs) { - if (!isRecord(tab)) { - continue - } - - if (typeof tab.name !== 'string' || typeof tab.slug !== 'string') { - continue - } - - tabs.push({ - name: tab.name, - slug: tab.slug, - }) - } - - return tabs -} - -function getTabsText(node: unknown, options: RequiredSearchOptions) { - const tabs = parseTabs(node) - const panels = getChildren(node).filter( - (child) => getTagName(child) === 'md-tab-panel', - ) - - return panels - .map((panel, index) => { - const tabName = tabs[index]?.name - const panelText = getChildrenText(getChildren(panel), options) - return normalizeSearchText([tabName, panelText].filter(Boolean).join(' ')) - }) - .filter(Boolean) - .join('\n') -} - -function getNodeTextForSearch( - node: unknown, - options: RequiredSearchOptions, -): string { - if (isTabsComponent(node)) { - return getTabsText(node, options) - } - - if (isFrameworkComponent(node)) { - return '' - } - - const packageManagerCommands = getPackageManagerCommandsByFramework( - node, - options.packageManager, - ) - if (packageManagerCommands) { - return Object.values(packageManagerCommands).join('\n') - } - - return getNodeText(node) -} - -function getChildrenText( - children: Array, - options: RequiredSearchOptions, -) { - return children.map((child) => getNodeTextForSearch(child, options)).join('\n') -} - -function appendPackageManagerSections( - sections: Array, - commandsByFramework: Record, - heading: HeadingContext | undefined, -) { - for (const [framework, commandText] of Object.entries(commandsByFramework)) { - const content = normalizeSearchText(commandText) - if (!content) { - continue - } - - sections.push({ - framework, - anchor: heading?.anchor, - heading: heading?.heading, - level: heading?.level, - content, - }) - } -} - -function collectSectionsFromChildren( - children: Array, - framework: string, - options: RequiredSearchOptions, - inheritedHeading?: HeadingContext, -) { - const sections: Array = [] - let current = createSectionBuilder(framework, inheritedHeading) - - for (const child of children) { - const packageManagerCommands = getPackageManagerCommandsByFramework( - child, - options.packageManager, - ) - - if (packageManagerCommands) { - if (current.framework !== 'all') { - const commandText = packageManagerCommands[current.framework] - if (commandText) { - current.chunks.push(commandText) - } - continue - } - - flushSection(sections, current) - appendPackageManagerSections( - sections, - packageManagerCommands, - current.heading, - ) - current = createSectionBuilder(framework, current.heading) - continue - } - - if (isFrameworkComponent(child)) { - flushSection(sections, current) - - const heading = current.heading - for (const panel of getChildren(child)) { - if (getTagName(panel) !== 'md-framework-panel') { - continue - } - - const panelFramework = getStringProperty(panel, 'data-framework') - if (!panelFramework) { - continue - } - - sections.push( - ...collectSectionsFromChildren( - getChildren(panel), - panelFramework, - options, - heading, - ), - ) - } - - current = createSectionBuilder(framework, heading) - continue - } - - const heading = getHeadingContext(child) - if (heading) { - flushSection(sections, current) - current = createSectionBuilder(framework, heading) - continue - } - - const text = normalizeSearchText(getNodeTextForSearch(child, options)) - if (text) { - current.chunks.push(text) - } - } - - flushSection(sections, current) - return sections -} - -type RequiredSearchOptions = { - packageManager: PackageManager -} - -async function transformMarkdownForSearch(markdown: string) { - const processor = unified() - .use(remarkParse) - .use(remarkGfm) - .use(remarkRehype, { allowDangerousHtml: true }) - .use(extractCodeMeta) - .use(rehypeRaw) - .use(rehypeParseCommentComponents) - .use(rehypeSlug) - .use(rehypeTransformFrameworkComponents) - .use(rehypeTransformCommentComponents) - - return processor.run(processor.parse(markdown)) -} - -export async function extractMarkdownSearchSections( - markdown: string, - options: MarkdownSearchExtractionOptions = {}, -): Promise { - const tree = await transformMarkdownForSearch(markdown) - const requiredOptions: RequiredSearchOptions = { - packageManager: options.packageManager ?? DEFAULT_PACKAGE_MANAGER, - } - const sections = collectSectionsFromChildren( - getChildren(tree), - 'all', - requiredOptions, - ) - const frameworks = Array.from( - new Set(sections.map((section) => section.framework)), - ) - - return { - sections, - frameworks, - } -} diff --git a/src/utils/searchIndexGeneration.ts b/src/utils/searchIndexGeneration.ts deleted file mode 100644 index d6231f009..000000000 --- a/src/utils/searchIndexGeneration.ts +++ /dev/null @@ -1,237 +0,0 @@ -import type { LibraryId } from '~/libraries/types' -import type { - SearchHierarchy, - SearchRecord, - SearchRouteStyle, -} from './searchRecords' -import { - extractMarkdownSearchSections, - type MarkdownSearchSection, -} from './markdown/searchExtraction' -import type { PackageManager } from './markdown/installCommand' - -const DEFAULT_SITE_URL = 'https://tanstack.com' -const INTENT_REGISTRY_PATH = '/intent/registry' - -type SearchIndexLibrary = { - id: LibraryId - name: string -} - -export type SearchIndexMarkdownInput = { - library: SearchIndexLibrary - version: string - docsPath: string - title: string - content: string - siteUrl?: string - packageManager?: PackageManager -} - -function normalizeUrlPath(path: string) { - const normalizedPath = `/${path.split('/').filter(Boolean).join('/')}` - - if (normalizedPath === '/') { - return '/' - } - - return normalizedPath.replace(/\/+$/g, '') -} - -function getUrlPathname(url: string) { - try { - return new URL(url, DEFAULT_SITE_URL).pathname - } catch { - return normalizeUrlPath(url.split('#')[0]?.split('?')[0] ?? url) - } -} - -export function isExcludedFromSearchIndex(url: string) { - const pathname = normalizeUrlPath(getUrlPathname(url)) - - return ( - pathname === INTENT_REGISTRY_PATH || - pathname.startsWith(`${INTENT_REGISTRY_PATH}/`) - ) -} - -function normalizeSiteUrl(siteUrl: string | undefined) { - return (siteUrl || DEFAULT_SITE_URL).replace(/\/+$/g, '') -} - -function normalizeDocsPath(docsPath: string) { - return docsPath - .replace(/\.md$/i, '') - .split('/') - .filter(Boolean) - .join('/') - .replace(/\/index$/i, '') -} - -function getRouteStyle(docsPath: string): SearchRouteStyle { - return docsPath.startsWith('framework/') ? 'framework-path' : 'canonical' -} - -function getFrameworkFromDocsPath(docsPath: string) { - const segments = docsPath.split('/').filter(Boolean) - - if (segments[0] !== 'framework') { - return null - } - - return segments[1] ?? null -} - -function buildUrl(input: { - siteUrl?: string - libraryId: LibraryId - version: string - docsPath: string -}) { - const basePath = [ - input.libraryId, - input.version, - 'docs', - ...input.docsPath.split('/').filter(Boolean), - ].join('/') - - return `${normalizeSiteUrl(input.siteUrl)}/${basePath}`.replace(/\/+$/g, '') -} - -function appendAnchor(url: string, anchor: string | undefined) { - if (!anchor) { - return url - } - - return `${url}#${anchor}` -} - -function normalizeText(value: string) { - return value.replace(/\s+/g, ' ').trim() -} - -function buildHierarchy( - libraryName: string, - title: string, - section: MarkdownSearchSection, -): SearchHierarchy { - const normalizedTitle = normalizeText(title) - const normalizedHeading = section.heading - ? normalizeText(section.heading) - : undefined - const hierarchy: SearchHierarchy = { - lvl0: libraryName, - lvl1: normalizedTitle || libraryName, - } - - if (normalizedHeading && normalizedHeading !== normalizedTitle) { - hierarchy.lvl2 = normalizedHeading - } - - return hierarchy -} - -function getRecordFramework( - routeStyle: SearchRouteStyle, - pathFramework: string | null, - sectionFramework: string, -) { - if (routeStyle === 'canonical') { - return sectionFramework - } - - if (!pathFramework) { - return null - } - - if (sectionFramework === 'all') { - return pathFramework - } - - if (sectionFramework === pathFramework) { - return sectionFramework - } - - return null -} - -function buildObjectId(input: { - libraryId: LibraryId - version: string - docsPath: string - framework: string - anchor?: string - index: number -}) { - return [ - input.libraryId, - input.version, - input.framework, - input.docsPath || 'index', - input.anchor || 'root', - String(input.index), - ].join(':') -} - -export async function buildSearchRecordsForMarkdown( - input: SearchIndexMarkdownInput, -): Promise> { - const docsPath = normalizeDocsPath(input.docsPath) - const url = buildUrl({ - siteUrl: input.siteUrl, - libraryId: input.library.id, - version: input.version, - docsPath, - }) - - if (isExcludedFromSearchIndex(url)) { - return [] - } - - const routeStyle = getRouteStyle(docsPath) - const pathFramework = getFrameworkFromDocsPath(docsPath) - const extraction = await extractMarkdownSearchSections( - `# ${input.title}\n${input.content}`, - { packageManager: input.packageManager }, - ) - const records: Array = [] - - for (const section of extraction.sections) { - const framework = getRecordFramework( - routeStyle, - pathFramework, - section.framework, - ) - - if (!framework) { - continue - } - - const urlWithAnchor = appendAnchor(url, section.anchor) - if (isExcludedFromSearchIndex(urlWithAnchor)) { - continue - } - - records.push({ - objectID: buildObjectId({ - libraryId: input.library.id, - version: input.version, - docsPath, - framework, - anchor: section.anchor, - index: records.length, - }), - url, - anchor: section.anchor, - urlWithAnchor, - library: input.library.id, - framework, - version: input.version, - routeStyle, - hierarchy: buildHierarchy(input.library.name, input.title, section), - content: section.content, - }) - } - - return records -} diff --git a/src/utils/searchRecords.ts b/src/utils/searchRecords.ts index 3ff72c43f..497da1487 100644 --- a/src/utils/searchRecords.ts +++ b/src/utils/searchRecords.ts @@ -1,32 +1,7 @@ -export type SearchRouteStyle = 'canonical' | 'framework-path' - -export type SearchHierarchy = { - lvl0?: string | null - lvl1?: string | null - lvl2?: string | null - lvl3?: string | null - lvl4?: string | null - lvl5?: string | null - lvl6?: string | null -} - -export type SearchRecord = { - objectID: string - url: string - anchor?: string - urlWithAnchor: string - library: string - framework: string - version: string - routeStyle: SearchRouteStyle - hierarchy: SearchHierarchy - content?: string -} - export type SearchHitFrameworkContext = { url: string framework?: string | null - routeStyle?: SearchRouteStyle | null + routeStyle?: string | null } function getPathname(url: string) { diff --git a/tests/filter-framework-content.test.ts b/tests/filter-framework-content.test.ts new file mode 100644 index 000000000..e7af89d5d --- /dev/null +++ b/tests/filter-framework-content.test.ts @@ -0,0 +1,71 @@ +import { extractFrameworksFromMarkdown } from '../src/utils/markdown/filterFrameworkContent' + +function assertEqual(actual: unknown, expected: unknown, message: string) { + const actualJson = JSON.stringify(actual) + const expectedJson = JSON.stringify(expected) + if (actualJson !== expectedJson) { + throw new Error(`${message}: expected ${expectedJson}, got ${actualJson}`) + } +} + +const frameworkBlockMarkdown = ` +Shared content. + + + +# React + +React content. + +# Solid + +Solid content. + +# Vue + +Vue content. + + +` + +assertEqual( + extractFrameworksFromMarkdown(frameworkBlockMarkdown), + ['react', 'solid', 'vue'], + 'framework block frameworks extracted', +) + +const packageManagerMarkdown = ` + +react: @tanstack/react-query +solid: @tanstack/solid-query +react: @tanstack/react-query-devtools + +` + +assertEqual( + extractFrameworksFromMarkdown(packageManagerMarkdown), + ['react', 'solid'], + 'package manager frameworks extracted and deduped', +) + +const mixedMarkdown = ` + +# Svelte +Svelte content. +# React +React content. + + + +vue: @tanstack/vue-form +svelte: @tanstack/svelte-form + +` + +assertEqual( + extractFrameworksFromMarkdown(mixedMarkdown), + ['svelte', 'react', 'vue'], + 'mixed framework sources preserve first-seen order', +) + +console.log('filter-framework-content tests passed') diff --git a/tests/markdown-search-extraction.test.ts b/tests/markdown-search-extraction.test.ts deleted file mode 100644 index d0998535d..000000000 --- a/tests/markdown-search-extraction.test.ts +++ /dev/null @@ -1,205 +0,0 @@ -import { extractMarkdownSearchSections } from '../src/utils/markdown/searchExtraction' - -function assertEqual(actual: unknown, expected: unknown, message: string) { - if (actual !== expected) { - throw new Error(`${message}: expected ${String(expected)}, got ${String(actual)}`) - } -} - -function assertDeepEqual( - actual: Array, - expected: Array, - message: string, -) { - const actualJson = JSON.stringify(actual) - const expectedJson = JSON.stringify(expected) - - if (actualJson !== expectedJson) { - throw new Error(`${message}: expected ${expectedJson}, got ${actualJson}`) - } -} - -function assertMatch(value: string, pattern: RegExp, message: string) { - if (!pattern.test(value)) { - throw new Error(`${message}: ${pattern} did not match ${value}`) - } -} - -function assertDoesNotMatch(value: string, pattern: RegExp, message: string) { - if (pattern.test(value)) { - throw new Error(`${message}: ${pattern} matched ${value}`) - } -} - -const frameworkMarkdown = ` -# Adapter guide - -Shared setup applies to every framework. - - - -# React - -React adapter only. - -## Client setup - -Use React hooks. - -# Solid - -Solid adapter only. - -## Client setup - -Use Solid signals. - - -` - -const frameworkResult = - await extractMarkdownSearchSections(frameworkMarkdown) -const sharedFrameworkText = frameworkResult.sections - .filter((section) => section.framework === 'all') - .map((section) => section.content) - .join(' ') -const reactFrameworkText = frameworkResult.sections - .filter((section) => section.framework === 'react') - .map((section) => section.content) - .join(' ') -const solidFrameworkText = frameworkResult.sections - .filter((section) => section.framework === 'solid') - .map((section) => section.content) - .join(' ') - -assertMatch(sharedFrameworkText, /Shared setup/, 'shared text indexed') -assertMatch(reactFrameworkText, /React adapter only/, 'react text indexed') -assertMatch(reactFrameworkText, /Use React hooks/, 'react heading content indexed') -assertDoesNotMatch( - reactFrameworkText, - /Solid adapter only/, - 'solid content excluded from react section', -) -assertMatch(solidFrameworkText, /Solid adapter only/, 'solid text indexed') -assertMatch(solidFrameworkText, /Use Solid signals/, 'solid heading content indexed') -assertDoesNotMatch( - solidFrameworkText, - /React adapter only/, - 'react content excluded from solid section', -) -assertDeepEqual( - frameworkResult.frameworks, - ['all', 'react', 'solid'], - 'framework list preserved', -) - -const packageManagerMarkdown = ` -# Install - - - -react: @tanstack/react-query @tanstack/react-query-devtools -solid: @tanstack/solid-query - - -` - -const packageManagerResult = await extractMarkdownSearchSections( - packageManagerMarkdown, -) -const reactInstall = packageManagerResult.sections.find( - (section) => section.framework === 'react', -) -const solidInstall = packageManagerResult.sections.find( - (section) => section.framework === 'solid', -) - -assertEqual(reactInstall?.anchor, 'install', 'install anchor captured') -assertEqual(reactInstall?.heading, 'Install', 'install heading captured') -assertMatch( - reactInstall?.content ?? '', - /npm i -D @tanstack\/react-query @tanstack\/react-query-devtools/, - 'react install command generated', -) -assertMatch( - solidInstall?.content ?? '', - /npm i -D @tanstack\/solid-query/, - 'solid install command generated', -) -assertDoesNotMatch( - reactInstall?.content ?? '', - /react:/, - 'react raw package marker removed', -) -assertDoesNotMatch( - solidInstall?.content ?? '', - /solid:/, - 'solid raw package marker removed', -) - -const tabsMarkdown = ` -# Examples - - - -\`\`\`tsx title="app.tsx" -export const app = true -\`\`\` - -\`\`\`css title="styles.css" -.root { - color: tomato; -} -\`\`\` - - - - - -## Alpha - -Alpha tab content. - -## Beta - -Beta tab content. - - -` - -const tabsResult = await extractMarkdownSearchSections(tabsMarkdown) -const tabsText = tabsResult.sections - .map((section) => section.content) - .join(' ') - -assertMatch(tabsText, /app\.tsx/, 'file tab name indexed') -assertMatch(tabsText, /export const app = true/, 'file tab code indexed') -assertMatch(tabsText, /styles\.css/, 'second file tab name indexed') -assertMatch(tabsText, /color: tomato/, 'second file tab code indexed') -assertMatch(tabsText, /Alpha/, 'default tab name indexed') -assertMatch(tabsText, /Alpha tab content/, 'default tab content indexed') -assertMatch(tabsText, /Beta/, 'second default tab name indexed') -assertMatch(tabsText, /Beta tab content/, 'second default tab content indexed') -assertDoesNotMatch(tabsText, /::start:tabs/, 'raw start marker removed') -assertDoesNotMatch(tabsText, /::end:tabs/, 'raw end marker removed') - -const headingMarkdown = ` -## Search Params - -Heading anchors should survive extraction. -` - -const headingResult = await extractMarkdownSearchSections(headingMarkdown) -const headingSection = headingResult.sections.find( - (section) => section.heading === 'Search Params', -) - -assertEqual(headingSection?.anchor, 'search-params', 'heading anchor captured') -assertEqual(headingSection?.level, 2, 'heading level captured') -assertMatch( - headingSection?.content ?? '', - /Heading anchors/, - 'heading content captured', -) - -console.log('markdown-search-extraction tests passed') diff --git a/tests/search-index-generation.test.ts b/tests/search-index-generation.test.ts deleted file mode 100644 index d35f5890c..000000000 --- a/tests/search-index-generation.test.ts +++ /dev/null @@ -1,184 +0,0 @@ -import { - buildSearchRecordsForMarkdown, - isExcludedFromSearchIndex, -} from '../src/utils/searchIndexGeneration' - -function assertEqual(actual: unknown, expected: unknown, message: string) { - if (actual !== expected) { - throw new Error(`${message}: expected ${String(expected)}, got ${String(actual)}`) - } -} - -function assertTruthy(value: unknown, message: string) { - if (!value) { - throw new Error(message) - } -} - -function assertMatch(value: string, pattern: RegExp, message: string) { - if (!pattern.test(value)) { - throw new Error(`${message}: ${pattern} did not match ${value}`) - } -} - -function assertDoesNotInclude( - values: Array, - expected: string, - message: string, -) { - if (values.includes(expected)) { - throw new Error(`${message}: found ${expected}`) - } -} - -const formLibrary: { id: 'form'; name: string } = { - id: 'form', - name: 'TanStack Form', -} - -const canonicalRecords = await buildSearchRecordsForMarkdown({ - library: formLibrary, - version: 'latest', - docsPath: 'overview', - title: 'Overview', - content: ` -Shared overview content. - - - -# React - -React-only canonical content. - -## Validation - -React validation content. - -# Solid - -Solid-only canonical content. - -## Validation - -Solid validation content. - - -`, -}) - -const canonicalFrameworks = canonicalRecords.map((record) => record.framework) -assertTruthy( - canonicalFrameworks.includes('all'), - 'canonical shared record should be framework all', -) -assertTruthy( - canonicalFrameworks.includes('react'), - 'canonical framework block should emit react record', -) -assertTruthy( - canonicalFrameworks.includes('solid'), - 'canonical framework block should emit solid record', -) - -const reactValidationRecord = canonicalRecords.find( - (record) => - record.framework === 'react' && record.hierarchy.lvl2 === 'Validation', -) -assertTruthy(reactValidationRecord, 'react validation record should exist') -assertEqual( - reactValidationRecord?.routeStyle, - 'canonical', - 'canonical route style preserved', -) -assertEqual( - reactValidationRecord?.url, - 'https://tanstack.com/form/latest/docs/overview', - 'canonical URL preserved', -) -assertEqual( - reactValidationRecord?.urlWithAnchor, - 'https://tanstack.com/form/latest/docs/overview#validation', - 'canonical URL anchor preserved', -) -assertMatch( - reactValidationRecord?.content ?? '', - /React validation content/, - 'react content indexed', -) - -const routerLibrary: { id: 'router'; name: string } = { - id: 'router', - name: 'TanStack Router', -} - -const frameworkPathRecords = await buildSearchRecordsForMarkdown({ - library: routerLibrary, - version: 'latest', - docsPath: 'framework/react/quick-start', - title: 'Quick Start', - content: ` -React quick-start content. - -## Install - -Install the React adapter. - - - -# React - -React nested content. - -# Solid - -Solid nested content should not index for the React path. - - -`, -}) - -assertTruthy(frameworkPathRecords.length > 0, 'framework path records emitted') -for (const record of frameworkPathRecords) { - assertEqual(record.framework, 'react', 'framework path scoped to URL framework') - assertEqual( - record.routeStyle, - 'framework-path', - 'framework path route style preserved', - ) - assertEqual( - record.url.startsWith( - 'https://tanstack.com/router/latest/docs/framework/react/quick-start', - ), - true, - 'framework path URL preserved', - ) -} - -const frameworkPathText = frameworkPathRecords - .map((record) => record.content ?? '') - .join(' ') -assertMatch(frameworkPathText, /React nested content/, 'react nested text indexed') -assertDoesNotInclude( - frameworkPathRecords.map((record) => record.framework), - 'solid', - 'solid framework not emitted for react framework path', -) -assertEqual( - isExcludedFromSearchIndex( - 'https://tanstack.com/intent/registry?tab=packages', - ), - true, - 'registry query URL excluded', -) -assertEqual( - isExcludedFromSearchIndex('https://tanstack.com/intent/registry/pkg/SKILL'), - true, - 'registry path URL excluded', -) -assertEqual( - isExcludedFromSearchIndex('https://tanstack.com/intent/latest/docs/registry'), - false, - 'intent docs registry page remains indexable', -) - -console.log('search-index-generation tests passed') From 71920e5fad083851946e229d0d85f16f07f45021 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 17:32:25 -0700 Subject: [PATCH 08/10] fix filtering --- src/components/SearchModal.tsx | 86 ++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/src/components/SearchModal.tsx b/src/components/SearchModal.tsx index 68e1c7324..5f550e633 100644 --- a/src/components/SearchModal.tsx +++ b/src/components/SearchModal.tsx @@ -12,7 +12,6 @@ import { SearchBox, Snippet, Configure, - useMenu, useInstantSearch, useInfiniteHits, } from 'react-instantsearch' @@ -171,13 +170,11 @@ const SearchFiltersContext = React.createContext<{ libraryItems: Array<{ value: string label: string - count: number isRefined: boolean }> frameworkItems: Array<{ value: string label: string - count: number isRefined: boolean }> } | null>(null) @@ -208,19 +205,6 @@ function SearchFiltersProvider({ children }: { children: React.ReactNode }) { const [selectedFramework, setSelectedFramework] = React.useState(getInitialFramework) - // Use useMenu just to get the list of available options and counts - // We do NOT use refine() because facet filters don't support OR logic - // Instead, we build custom filter strings via Configure component - const { items: rawLibraryItems } = useMenu({ - attribute: 'library', - limit: 50, - }) - - const { items: rawFrameworkItems } = useMenu({ - attribute: 'framework', - limit: 50, - }) - // Auto-select based on current page URL const pathname = useRouterState({ select: (state) => state.location.pathname, @@ -259,22 +243,56 @@ function SearchFiltersProvider({ children }: { children: React.ReactNode }) { } }, [pathname, getInitialFramework]) - // Sort items by their defined order and filter out "all" from display - const libraryItems = [...rawLibraryItems] - .filter((item) => item.value !== 'all') - .sort((a, b) => { - const aIndex = libraries.findIndex((l) => l.id === a.value) - const bIndex = libraries.findIndex((l) => l.id === b.value) - return aIndex - bIndex - }) - - const frameworkItems = [...rawFrameworkItems] - .filter((item) => item.value !== 'all') - .sort((a, b) => { - const aIndex = frameworkOptions.findIndex((f) => f.value === a.value) - const bIndex = frameworkOptions.findIndex((f) => f.value === b.value) - return aIndex - bIndex - }) + const searchableLibraries = React.useMemo( + () => + libraries.filter( + (library) => library.visible !== false && library.latestVersion, + ), + [], + ) + + const selectedLibraryInfo = React.useMemo( + () => searchableLibraries.find((library) => library.id === selectedLibrary), + [searchableLibraries, selectedLibrary], + ) + + const availableFrameworkValues = React.useMemo(() => { + if (selectedLibraryInfo) { + return selectedLibraryInfo.frameworks + } + + return Array.from( + new Set(searchableLibraries.flatMap((library) => library.frameworks)), + ) + }, [searchableLibraries, selectedLibraryInfo]) + + React.useEffect(() => { + if (!selectedLibraryInfo || !selectedFramework) { + return + } + + if ( + !selectedLibraryInfo.frameworks.some( + (framework) => framework === selectedFramework, + ) + ) { + setSelectedFramework('') + } + }, [selectedFramework, selectedLibraryInfo]) + + const libraryItems = searchableLibraries.map((library) => ({ + value: library.id, + label: library.id, + isRefined: library.id === selectedLibrary, + })) + + const frameworkItems = frameworkOptions + .filter((framework) => availableFrameworkValues.includes(framework.value)) + .map((framework) => ({ + value: framework.value, + label: framework.label, + isRefined: framework.value === selectedFramework, + })) // Wrapper functions that just update state (no Algolia refine) const selectLibrary = React.useCallback((value: string) => { @@ -641,9 +659,6 @@ function LibraryRefinement() { {item.label.toUpperCase()} - - ({item.count}) -
) })} @@ -714,7 +729,6 @@ function FrameworkRefinement() { {fw && {fw.label}} {capitalize(item.label)} - ({item.count})
) })} From 586d1818e3334ae39230d4f9085970c22ea4ce25 Mon Sep 17 00:00:00 2001 From: Sarah Gerrard Date: Wed, 29 Apr 2026 18:04:44 -0700 Subject: [PATCH 09/10] fix badges and sitemap --- src/components/SearchModal.tsx | 49 +++++++------ src/libraries/libraries.ts | 53 ++++++++++++-- src/libraries/types.ts | 2 +- src/utils/docs.functions.ts | 125 ++++++++++++++++++--------------- src/utils/documents.server.ts | 2 +- src/utils/sitemap.ts | 25 ++++--- 6 files changed, 164 insertions(+), 92 deletions(-) diff --git a/src/components/SearchModal.tsx b/src/components/SearchModal.tsx index 5f550e633..8e5e7c671 100644 --- a/src/components/SearchModal.tsx +++ b/src/components/SearchModal.tsx @@ -158,6 +158,27 @@ const searchClient = liteClient( 'FQ0DQ6MA3C', '10c34d6a5c89f6048cf644d601e65172', ) +const searchIndexName = 'tanstack-test' + +function buildSearchFilters({ + selectedLibrary, + selectedFramework, +}: { + selectedLibrary: string + selectedFramework: string +}) { + const filterParts: string[] = ['(version:latest OR version:all)'] + + if (selectedLibrary) { + filterParts.push(`library:${selectedLibrary}`) + } + + if (selectedFramework) { + filterParts.push(`(framework:${selectedFramework} OR framework:all)`) + } + + return filterParts.join(' AND ') +} // Context to share filter state between components const SearchFiltersContext = React.createContext<{ @@ -325,24 +346,6 @@ function SearchFiltersProvider({ children }: { children: React.ReactNode }) { function DynamicFilters() { const { selectedLibrary, selectedFramework } = useSearchFilters() - // Build filter string - // - Always filter to latest version OR "all" (for core pages) - // - When library selected: scope strictly to that library - // - When framework selected: include that framework OR "all" (for integration pages) - const filterParts: string[] = [] - - // Version filter: include latest OR "all" (core pages) - filterParts.push('(version:latest OR version:all)') - - if (selectedLibrary) { - filterParts.push(`library:${selectedLibrary}`) - } - - if (selectedFramework) { - // Include selected framework OR "all" (integration pages, core pages) - filterParts.push(`(framework:${selectedFramework} OR framework:all)`) - } - return ( ) } @@ -500,7 +503,8 @@ const Hit = ({ @@ -887,7 +891,7 @@ export function SearchModal() { Search TanStack docs - +
@@ -1020,7 +1024,8 @@ function SearchResults({ focusedIndex }: { focusedIndex: number }) { setSelectedLibrary(lib.id) }} className={twMerge( - 'px-2 py-1 text-xs font-black uppercase rounded text-white transition-opacity hover:opacity-80', + 'px-2 py-1 text-xs font-black uppercase rounded transition-opacity hover:opacity-80', + lib.badgeTextStyle ?? 'text-white', lib.bgStyle, )} > diff --git a/src/libraries/libraries.ts b/src/libraries/libraries.ts index a68835646..dbf759cac 100644 --- a/src/libraries/libraries.ts +++ b/src/libraries/libraries.ts @@ -32,7 +32,7 @@ export const query: LibrarySlim = { defaultDocs: 'framework/react/overview', sitemap: { includeLandingPage: true, - includeTopLevelDocsPages: true, + includeDocsPages: true, }, installPath: 'framework/$framework/installation', legacyPackages: ['react-query'], @@ -223,7 +223,7 @@ export const router: LibrarySlim = { docsRoot: 'docs/router', sitemap: { includeLandingPage: true, - includeTopLevelDocsPages: true, + includeDocsPages: true, }, legacyPackages: ['react-location'], hideCodesandboxUrl: true, @@ -292,7 +292,7 @@ export const start: LibrarySlim = { defaultDocs: 'framework/react/overview', sitemap: { includeLandingPage: true, - includeTopLevelDocsPages: true, + includeDocsPages: true, }, installPath: 'framework/$framework/build-from-scratch', embedEditor: 'codesandbox', @@ -338,7 +338,7 @@ export const table: LibrarySlim = { defaultDocs: 'introduction', sitemap: { includeLandingPage: true, - includeTopLevelDocsPages: true, + includeDocsPages: true, }, corePackageName: '@tanstack/table-core', legacyPackages: ['react-table'], @@ -411,7 +411,7 @@ export const form: LibrarySlim = { ogImage: 'https://github.com/tanstack/form/raw/main/media/repo-header.png', sitemap: { includeLandingPage: true, - includeTopLevelDocsPages: true, + includeDocsPages: true, }, } @@ -441,6 +441,10 @@ export const virtual: LibrarySlim = { ogImage: 'https://github.com/tanstack/query/raw/main/media/header.png', defaultDocs: 'introduction', legacyPackages: ['react-virtual'], + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const ranger: LibrarySlim = { @@ -455,6 +459,7 @@ export const ranger: LibrarySlim = { borderStyle: 'border-black/50 dark:border-gray-100/50', textStyle: 'text-black dark:text-gray-100', textColor: 'text-black dark:text-gray-100', + badgeTextStyle: 'text-white dark:text-gray-900', colorFrom: 'from-black dark:from-gray-100', colorTo: 'to-gray-600 dark:to-gray-400', accentColorFrom: 'from-blue-500', @@ -470,6 +475,10 @@ export const ranger: LibrarySlim = { availableVersions: ['v0'], scarfId: 'dd278e06-bb3f-420c-85c6-6e42d14d8f61', ogImage: 'https://github.com/tanstack/ranger/raw/main/media/headerv1.png', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const store: LibrarySlim = { @@ -496,6 +505,10 @@ export const store: LibrarySlim = { scarfId: '302d0fef-cb3f-43c6-b45c-f055b9745edb', ogImage: 'https://github.com/tanstack/store/raw/main/media/repo-header.png', defaultDocs: 'overview', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const pacer: LibrarySlim = { @@ -524,6 +537,10 @@ export const pacer: LibrarySlim = { scarfId: '302d0fef-cb3f-43c6-b45c-f055b9745edb', ogImage: 'https://github.com/tanstack/pacer/raw/main/media/repo-header.png', defaultDocs: 'overview', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const hotkeys: LibrarySlim = { @@ -551,6 +568,10 @@ export const hotkeys: LibrarySlim = { availableVersions: ['v0'], ogImage: 'https://github.com/tanstack/hotkeys/raw/main/media/repo-header.png', defaultDocs: 'overview', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const db: LibrarySlim = { @@ -579,6 +600,7 @@ export const db: LibrarySlim = { defaultDocs: 'overview', sitemap: { includeLandingPage: true, + includeDocsPages: true, }, } @@ -606,6 +628,10 @@ export const ai: LibrarySlim = { availableVersions: ['v0'], ogImage: 'https://github.com/tanstack/ai/raw/main/media/repo-header.png', defaultDocs: 'getting-started/overview', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const intent: LibrarySlim = { @@ -631,6 +657,10 @@ export const intent: LibrarySlim = { availableVersions: ['v0'], ogImage: 'https://github.com/tanstack/intent/raw/main/media/repo-header.png', defaultDocs: 'overview', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const config: LibrarySlim = { @@ -646,6 +676,7 @@ export const config: LibrarySlim = { borderStyle: 'border-black/50 dark:border-gray-100/50', textStyle: 'text-black dark:text-gray-100', textColor: 'text-black dark:text-gray-100', + badgeTextStyle: 'text-white dark:text-gray-900', colorFrom: 'from-black dark:from-gray-100', colorTo: 'to-gray-600 dark:to-gray-400', accentColorFrom: 'from-blue-500', @@ -660,6 +691,10 @@ export const config: LibrarySlim = { latestBranch: 'main', availableVersions: ['v0'], ogImage: 'https://github.com/tanstack/config/raw/main/media/repo-header.png', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const devtools: LibrarySlim = { @@ -691,6 +726,10 @@ export const devtools: LibrarySlim = { availableVersions: ['v0'], ogImage: 'https://github.com/tanstack/devtools/raw/main/media/repo-header.png', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const mcp: LibrarySlim = { @@ -751,6 +790,10 @@ export const cli: LibrarySlim = { availableVersions: ['v0'], ogImage: 'https://github.com/tanstack/cli/raw/main/media/repo-header.png', defaultDocs: 'overview', + sitemap: { + includeLandingPage: true, + includeDocsPages: true, + }, } export const libraries: LibrarySlim[] = [ diff --git a/src/libraries/types.ts b/src/libraries/types.ts index 12c9a6f4b..8002e950a 100644 --- a/src/libraries/types.ts +++ b/src/libraries/types.ts @@ -82,7 +82,7 @@ export type LibrarySlim = { visible?: boolean sitemap?: { includeLandingPage?: boolean - includeTopLevelDocsPages?: boolean + includeDocsPages?: boolean } } diff --git a/src/utils/docs.functions.ts b/src/utils/docs.functions.ts index 5b930be64..2b266c7dd 100644 --- a/src/utils/docs.functions.ts +++ b/src/utils/docs.functions.ts @@ -8,6 +8,7 @@ import { fetchApiContents, fetchRepoFile, isRecoverableGitHubContentError, + shouldUseLocalDocsFiles, } from '~/utils/documents.server' import { renderMarkdownToRsc } from './markdown' import { extractFrameworksFromMarkdown } from './markdown/filterFrameworkContent' @@ -116,11 +117,79 @@ function isDocsManifest(value: unknown): value is DocsManifest { ) } +async function buildDocsManifest({ + repo, + branch, + docsRoot, +}: { + repo: string + branch: string + docsRoot: string +}): Promise { + const nodes = await fetchApiContents(repo, branch, docsRoot) + + if (!nodes) { + return { paths: [], redirects: {} } + } + + const markdownFiles = flattenDocsNodes(nodes).filter((node) => + node.path.endsWith('.md'), + ) + const paths = new Set() + const redirects: Array = [] + + for (const node of markdownFiles) { + const canonicalPath = getCanonicalDocsPath(node.path, docsRoot) + + if (canonicalPath === null) { + continue + } + + paths.add(canonicalPath) + + const file = await fetchRepoFile(repo, branch, node.path) + + if (!file) { + continue + } + + const frontMatter = extractFrontMatter(file) + + for (const redirectFrom of frontMatter.data.redirectFrom ?? []) { + const normalizedRedirect = normalizeDocsRedirectPath( + redirectFrom, + docsRoot, + ) + + if (!normalizedRedirect || normalizedRedirect === canonicalPath) { + continue + } + + redirects.push({ + from: normalizedRedirect, + to: canonicalPath, + source: node.path, + }) + } + } + + return { + paths: Array.from(paths), + redirects: buildRedirectManifest(redirects, { + label: `docs redirects for ${repo}@${branch}:${docsRoot}`, + }), + } +} + export const fetchDocsManifest = createServerFn({ method: 'GET' }) .inputValidator(docsManifestInput) .handler(async ({ data }) => { const { repo, branch, docsRoot } = data + if (shouldUseLocalDocsFiles()) { + return buildDocsManifest({ repo, branch, docsRoot }) + } + return getCachedDocsArtifact({ repo, gitRef: branch, @@ -128,61 +197,7 @@ export const fetchDocsManifest = createServerFn({ method: 'GET' }) artifactType: 'docs-manifest', artifactKey: 'default', isValue: isDocsManifest, - build: async () => { - const nodes = await fetchApiContents(repo, branch, docsRoot) - - if (!nodes) { - return { paths: [], redirects: {} } - } - - const markdownFiles = flattenDocsNodes(nodes).filter((node) => - node.path.endsWith('.md'), - ) - const paths = new Set() - const redirects: Array = [] - - for (const node of markdownFiles) { - const canonicalPath = getCanonicalDocsPath(node.path, docsRoot) - - if (canonicalPath === null) { - continue - } - - paths.add(canonicalPath) - - const file = await fetchRepoFile(repo, branch, node.path) - - if (!file) { - continue - } - - const frontMatter = extractFrontMatter(file) - - for (const redirectFrom of frontMatter.data.redirectFrom ?? []) { - const normalizedRedirect = normalizeDocsRedirectPath( - redirectFrom, - docsRoot, - ) - - if (!normalizedRedirect || normalizedRedirect === canonicalPath) { - continue - } - - redirects.push({ - from: normalizedRedirect, - to: canonicalPath, - source: node.path, - }) - } - } - - return { - paths: Array.from(paths), - redirects: buildRedirectManifest(redirects, { - label: `docs redirects for ${repo}@${branch}:${docsRoot}`, - }), - } - }, + build: () => buildDocsManifest({ repo, branch, docsRoot }), }) }) diff --git a/src/utils/documents.server.ts b/src/utils/documents.server.ts index 0dff458d7..42833b569 100644 --- a/src/utils/documents.server.ts +++ b/src/utils/documents.server.ts @@ -43,7 +43,7 @@ export function isRecoverableGitHubContentError( ) } -function shouldUseLocalDocsFiles() { +export function shouldUseLocalDocsFiles() { if (process.env.NODE_ENV !== 'development') { return false } diff --git a/src/utils/sitemap.ts b/src/utils/sitemap.ts index 1529e5050..6b3be4f82 100644 --- a/src/utils/sitemap.ts +++ b/src/utils/sitemap.ts @@ -9,8 +9,6 @@ export type SitemapEntry = { lastModified?: string } -const MAX_DOCS_SITEMAP_DEPTH = 3 - const HIGH_VALUE_NON_DOC_PAGES = [ '/', '/blog', @@ -22,6 +20,14 @@ const HIGH_VALUE_NON_DOC_PAGES = [ '/paid-support', ] as const satisfies ReadonlyArray +const LOW_VALUE_DOCS_SITEMAP_SEGMENTS = new Set(['examples', 'community']) + +const LOW_VALUE_DOCS_SITEMAP_SLUGS = new Set([ + 'community-resources', + 'contributors', + 'npm-stats', +]) + function trimTrailingSlash(url: string) { return url.replace(/\/$/, '') } @@ -48,16 +54,19 @@ function getLibraryEntries(): Array { ) { return [] } - const basePath = `/${library.id}/latest` return [{ path: basePath }] }) } -function isTopLevelDocsSlug(slug: string) { - const segments = slug.split('/') +function isHighValueDocsSlug(slug: string) { + const segments = slug.split('/').filter(Boolean) - return segments.length <= MAX_DOCS_SITEMAP_DEPTH + return ( + segments.length > 0 && + !segments.some((segment) => LOW_VALUE_DOCS_SITEMAP_SEGMENTS.has(segment)) && + !LOW_VALUE_DOCS_SITEMAP_SLUGS.has(slug) + ) } async function getLibraryDocsEntries( @@ -66,7 +75,7 @@ async function getLibraryDocsEntries( if ( library.visible === false || !library.latestVersion || - library.sitemap?.includeTopLevelDocsPages !== true + library.sitemap?.includeDocsPages !== true ) { return [] } @@ -81,7 +90,7 @@ async function getLibraryDocsEntries( return manifest.paths .filter(Boolean) - .filter(isTopLevelDocsSlug) + .filter(isHighValueDocsSlug) .map((slug) => ({ path: `/${library.id}/latest/docs/${slug}`, })) From 5b18585f81a414b4d34fb5bcfe7e2154744f1e31 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 01:08:53 +0000 Subject: [PATCH 10/10] ci: apply automated fixes --- src/components/SearchModal.tsx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/components/SearchModal.tsx b/src/components/SearchModal.tsx index 8e5e7c671..b47dd7e31 100644 --- a/src/components/SearchModal.tsx +++ b/src/components/SearchModal.tsx @@ -27,9 +27,7 @@ import { getStoredFrameworkPreference, usePersistFrameworkPreference, } from './FrameworkSelect' -import { - shouldPersistFrameworkForHit, -} from '~/utils/searchRecords' +import { shouldPersistFrameworkForHit } from '~/utils/searchRecords' /** * Safely decode HTML entities without using innerHTML. @@ -891,7 +889,10 @@ export function SearchModal() { Search TanStack docs - +