feat(search): custom tokenizer
This commit is contained in:
parent
7e93c8529a
commit
ebdb9de9ba
@ -19,18 +19,54 @@ export const feedback = `<a href="/feedback" class="feedback-footer">Made with
|
|||||||
export const search: DefaultTheme.Config['search'] = {
|
export const search: DefaultTheme.Config['search'] = {
|
||||||
options: {
|
options: {
|
||||||
miniSearch: {
|
miniSearch: {
|
||||||
|
options: {
|
||||||
|
tokenize: (text) => text.split(/[\n\r #%*,=/:;?[\]{}()&]+/u), // simplified charset: removed [-_.@] and non-english chars (diacritics etc.)
|
||||||
|
processTerm: (term, fieldName) => {
|
||||||
|
term = term
|
||||||
|
.trim()
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/^\.+/, '')
|
||||||
|
.replace(/\.+$/, '')
|
||||||
|
const stopWords = [
|
||||||
|
'frontmatter',
|
||||||
|
'$frontmatter.synopsis',
|
||||||
|
'and',
|
||||||
|
'about',
|
||||||
|
'but',
|
||||||
|
'now',
|
||||||
|
'the',
|
||||||
|
'with',
|
||||||
|
'you'
|
||||||
|
]
|
||||||
|
if (term.length < 2 || stopWords.includes(term)) return false
|
||||||
|
|
||||||
|
if (fieldName === 'text') {
|
||||||
|
const parts = term.split('.')
|
||||||
|
if (parts.length > 1) {
|
||||||
|
const newTerms = [term, ...parts]
|
||||||
|
.filter((t) => t.length >= 2)
|
||||||
|
.filter((t) => !stopWords.includes(t))
|
||||||
|
return newTerms
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return term
|
||||||
|
}
|
||||||
|
},
|
||||||
searchOptions: {
|
searchOptions: {
|
||||||
combineWith: 'AND',
|
combineWith: 'AND',
|
||||||
fuzzy: false,
|
fuzzy: true,
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
boostDocument: (
|
boostDocument: (
|
||||||
_,
|
documentId,
|
||||||
term,
|
term,
|
||||||
storedFields: Record<string, string | string[]>
|
storedFields: Record<string, string | string[]>
|
||||||
) => {
|
) => {
|
||||||
const titles = (storedFields?.titles as string[])
|
const titles = (storedFields?.titles as string[])
|
||||||
.filter((t) => Boolean(t))
|
.filter((t) => Boolean(t))
|
||||||
.map((t) => t.toLowerCase())
|
.map((t) => t.toLowerCase())
|
||||||
|
// Downrank posts
|
||||||
|
if (documentId.match(/\/posts/)) return -5
|
||||||
|
|
||||||
// Uprate if term appears in titles. Add bonus for higher levels (i.e. lower index)
|
// Uprate if term appears in titles. Add bonus for higher levels (i.e. lower index)
|
||||||
const titleIndex =
|
const titleIndex =
|
||||||
titles
|
titles
|
||||||
|
Loading…
Reference in New Issue
Block a user