/* $OpenBSD$ */ /* * Copyright (c) 2026 Nicholas Marriott * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include "tmux.h" /* * Fuzzy matching in the style of fzf. The pattern is split into groups by | * and each group is split on spaces into terms. A row matches if any group * matches; within a group all positive terms must match and all inverse terms * must not match. * * Plain positive terms are fuzzy subsequences. A leading ' makes a term an * exact substring match, ^ anchors a term at the start and $ anchors it at * the end. A leading ! inverts the term. Plain inverse terms are exact * substring matches rather than inverse fuzzy matches, like fzf. * * Both the pattern and the text are UTF-8. The text may contain tmux style * directives (#[...]); these and their contents are invisible to matching and * occupy no columns, but align= styles do move the surrounding text and are * accounted for exactly as format_draw lays it out (the no-list layout, see * format_draw_none). Matching is smart-case: case is ignored unless the pattern * contains an uppercase character (ASCII case folding only; other characters * are compared exactly by their UTF-8 data). * * On a match a bitstr_t of the requested display width is returned with a bit * set for every column occupied by a matched character, so the caller can * highlight them; NULL is returned if there is no match. A cheap fzf-style * score (matches at the start, after word boundaries and in contiguous runs * score higher) is also produced so callers can rank best-match-first. */ #define FUZZY_BONUS_EXACT 1000 #define FUZZY_BONUS_PREFIX 200 #define FUZZY_BONUS_SUFFIX 100 #define FUZZY_BONUS_START 12 #define FUZZY_BONUS_BOUNDARY 8 #define FUZZY_BONUS_CONSECUTIVE 6 #define FUZZY_PENALTY_LEADING 1 #define FUZZY_PENALTY_LEADING_MAX 10 #define FUZZY_PENALTY_GAP 1 /* A single visible character of the text. */ struct fuzzy_char { enum style_align align; struct utf8_data ud; /* original UTF-8 data */ u_int width; /* display width */ u_int offset; /* within its alignment */ }; /* One parsed query term. */ struct fuzzy_term { int inverse; int exact; int prefix; int suffix; const char *text; size_t len; }; /* Is this character a word boundary, so a match after it scores higher? */ static int fuzzy_is_boundary(const struct utf8_data *ud) { static const char *boundary = " -_/.:"; if (ud->size != 1) return (0); return (strchr(boundary, ud->data[0]) != NULL); } /* * Compare two characters, folding ASCII case if wanted. UTF-8 is compared * directly without case folding. */ static int fuzzy_char_equal(const struct utf8_data *a, const struct utf8_data *b, int fold) { if (fold && a->size == 1 && b->size == 1 && a->data[0] < 0x80 && b->data[0] < 0x80) return (tolower(a->data[0]) == tolower(b->data[0])); return (a->size == b->size && memcmp(a->data, b->data, a->size) == 0); } /* Map a style alignment onto one of the four layout columns. */ static enum style_align fuzzy_align(enum style_align align) { if (align == STYLE_ALIGN_DEFAULT) return (STYLE_ALIGN_LEFT); return (align); } /* Add a visible character to the array, updating the alignment width. */ static void fuzzy_add(struct fuzzy_char **cs, u_int *ncs, u_int *alloc, enum style_align a, const struct utf8_data *ud, u_int *widths) { struct fuzzy_char *fc; if (*ncs == *alloc) { *alloc = (*alloc == 0) ? 64 : *alloc * 2; *cs = xreallocarray(*cs, *alloc, sizeof **cs); } fc = &(*cs)[(*ncs)++]; fc->align = a; memcpy(&fc->ud, ud, sizeof fc->ud); fc->width = ud->width; fc->offset = widths[a]; widths[a] += ud->width; } /* Decode a character as UTF-8. */ static const char * fuzzy_decode_one(const char *cp, const char *end, struct utf8_data *ud) { enum utf8_state more; const char *start = cp; if ((more = utf8_open(ud, (u_char)*cp)) == UTF8_MORE) { while (++cp != end && more == UTF8_MORE) more = utf8_append(ud, (u_char)*cp); if (more == UTF8_DONE) return (cp); cp = start; } utf8_set(ud, (u_char)*cp); return (cp + 1); } /* * Scan the text into an array of visible characters, skipping styles and * recording the alignment and intra-alignment offset of each. Returns the * array and its length and fills in the per-alignment widths. */ static struct fuzzy_char * fuzzy_scan(const char *text, u_int *ncs, u_int *widths) { struct fuzzy_char *cs = NULL; u_int alloc = 0, n, leading, i; enum style_align current = STYLE_ALIGN_LEFT; struct style sy; const char *cp = text, *textend = text + strlen(text); const char *end; struct utf8_data ud, hash, bracket; char *tmp; *ncs = 0; memset(widths, 0, sizeof *widths * (STYLE_ALIGN_ABSOLUTE_CENTRE + 1)); style_set(&sy, &grid_default_cell); utf8_set(&hash, '#'); utf8_set(&bracket, '['); while (*cp != '\0') { /* Handle a run of #s, which may introduce a style. */ if (*cp == '#') { for (n = 0; cp[n] == '#'; n++) /* nothing */; if (cp[n] != '[') { /* Escaped #s: ##->#, so half (rounded up). */ leading = (n % 2 == 0) ? n / 2 : n / 2 + 1; for (i = 0; i < leading; i++) { fuzzy_add(&cs, ncs, &alloc, current, &hash, widths); } cp += n; continue; } /* Even count: all #s escaped, the [ is literal. */ for (i = 0; i < n / 2; i++) fuzzy_add(&cs, ncs, &alloc, current, &hash, widths); if (n % 2 == 0) { fuzzy_add(&cs, ncs, &alloc, current, &bracket, widths); cp += n + 1; continue; } /* Odd count: this is a style, find and parse it. */ end = format_skip(cp + n + 1, "]"); if (end == NULL) break; tmp = xstrndup(cp + n + 1, end - (cp + n + 1)); if (style_parse(&sy, &grid_default_cell, tmp) == 0) current = fuzzy_align(sy.align); free(tmp); cp = end + 1; continue; } /* Decode one character, multibyte or single byte. */ cp = fuzzy_decode_one(cp, textend, &ud); /* * Skip non-printable single bytes (control characters and raw * bytes left over from a failed decode); keep printable ASCII * and any decoded UTF-8. */ if (ud.size == 1 && (ud.data[0] <= 0x1f || ud.data[0] >= 0x7f)) continue; fuzzy_add(&cs, ncs, &alloc, current, &ud, widths); } return (cs); } /* * Work out the display column of a visible character given the trimmed widths * and start columns of each alignment. Returns 0 and sets the column if the * character is visible, otherwise returns -1. */ static int fuzzy_column(const struct fuzzy_char *fc, const u_int *start, const u_int *src, const u_int *vis, u_int *column) { enum style_align a = fc->align; if (fc->offset < src[a] || fc->offset >= src[a] + vis[a]) return (-1); *column = start[a] + (fc->offset - src[a]); return (0); } /* Decode a UTF-8 term into an array of characters. */ static u_int fuzzy_decode(const char *tok, size_t len, struct utf8_data *out) { const char *cp = tok, *end = tok + len; u_int n = 0; while (cp != end) cp = fuzzy_decode_one(cp, end, &out[n++]); return (n); } /* Add the score for a fuzzy token matched at the given positions. */ static int fuzzy_score_positions(const u_int *pos, u_int npos, const struct fuzzy_char *cs) { u_int i, gap, span; int score = 0; if (npos == 0) return (0); if (pos[0] == 0) score += FUZZY_BONUS_START; else { if (fuzzy_is_boundary(&cs[pos[0] - 1].ud)) score += FUZZY_BONUS_BOUNDARY; if (pos[0] < FUZZY_PENALTY_LEADING_MAX) score -= pos[0] * FUZZY_PENALTY_LEADING; else { score -= FUZZY_PENALTY_LEADING_MAX * FUZZY_PENALTY_LEADING; } } for (i = 1; i < npos; i++) { if (pos[i] == pos[i - 1] + 1) score += FUZZY_BONUS_CONSECUTIVE; else if (fuzzy_is_boundary(&cs[pos[i] - 1].ud)) score += FUZZY_BONUS_BOUNDARY; } span = pos[npos - 1] - pos[0] + 1; gap = span - npos; score -= gap * FUZZY_PENALTY_GAP; return (score); } /* * Match a token as a subsequence of the visible characters. Returns if the * token matches. */ static int fuzzy_match_fuzzy(const struct utf8_data *tok, u_int toklen, struct fuzzy_char *cs, u_int ncs, int fold, int *score, char *matched) { u_int pi, ci, *pos; int found, value; if (toklen == 0 || ncs == 0) return (0); pos = xcalloc(toklen, sizeof *pos); /* First find a subsequence from the start. */ ci = 0; for (pi = 0; pi < toklen; pi++) { while (ci != ncs && !fuzzy_char_equal(&tok[pi], &cs[ci].ud, fold)) ci++; if (ci == ncs) { free(pos); return (0); } pos[pi] = ci++; } /* Then compact it backwards to prefer a shorter span. */ ci = pos[toklen - 1]; for (pi = toklen; pi > 0; pi--) { found = 0; for (;;) { if (fuzzy_char_equal(&tok[pi - 1], &cs[ci].ud, fold)) { pos[pi - 1] = ci; found = 1; break; } if (ci == 0) break; ci--; } if (!found) { free(pos); return (0); } if (pi != 1) ci--; } value = fuzzy_score_positions(pos, toklen, cs); *score += value; for (pi = 0; pi < toklen; pi++) matched[pos[pi]] = 1; free(pos); return (1); } /* Score an exact, prefix or suffix match. */ static int fuzzy_score_exact(u_int start, u_int toklen, u_int ncs, const struct fuzzy_char *cs, int prefix, int suffix) { int score; score = FUZZY_BONUS_EXACT + toklen * FUZZY_BONUS_CONSECUTIVE; if (prefix) score += FUZZY_BONUS_PREFIX; if (suffix) score += FUZZY_BONUS_SUFFIX; if (start == 0) score += FUZZY_BONUS_START; else if (fuzzy_is_boundary(&cs[start - 1].ud)) score += FUZZY_BONUS_BOUNDARY; if (start < FUZZY_PENALTY_LEADING_MAX) score -= start * FUZZY_PENALTY_LEADING; else score -= FUZZY_PENALTY_LEADING_MAX * FUZZY_PENALTY_LEADING; if (!prefix && !suffix) score -= ncs - (start + toklen); return (score); } /* Match an exact, prefix or suffix term against the visible characters. */ static int fuzzy_match_exact(const struct utf8_data *tok, u_int toklen, struct fuzzy_char *cs, u_int ncs, int fold, int prefix, int suffix, int *score, char *matched) { u_int start, end, i, j, best = 0; int ok, found = 0, value, bestscore = 0; if (toklen == 0 || toklen > ncs) return (0); if (prefix && suffix) { if (toklen != ncs) return (0); start = 0; end = 1; } else if (prefix) { start = 0; end = 1; } else if (suffix) { start = ncs - toklen; end = start + 1; } else { start = 0; end = ncs - toklen + 1; } for (i = start; i < end; i++) { ok = 1; for (j = 0; j < toklen; j++) { if (!fuzzy_char_equal(&tok[j], &cs[i + j].ud, fold)) { ok = 0; break; } } if (!ok) continue; value = fuzzy_score_exact(i, toklen, ncs, cs, prefix, suffix); if (!found || value > bestscore) { found = 1; best = i; bestscore = value; } } if (!found) return (0); *score += bestscore; if (matched != NULL) { for (i = 0; i < toklen; i++) matched[best + i] = 1; } return (1); } /* Parse one term. */ static int fuzzy_parse_term(const char *start, const char *end, struct fuzzy_term *term) { memset(term, 0, sizeof *term); if (start == end) return (0); if (*start == '!') { term->inverse = 1; start++; } if (start == end) return (0); if (*start == '\'') { term->exact = 1; start++; } else if (*start == '^') { term->exact = 1; term->prefix = 1; start++; } if (start == end) return (0); if (end[-1] == '$') { term->exact = 1; term->suffix = 1; end--; } if (start == end) return (0); if (term->inverse) term->exact = 1; term->text = start; term->len = end - start; return (1); } /* Match one parsed term. */ static int fuzzy_match_term(const struct fuzzy_term *term, struct utf8_data *tok, struct fuzzy_char *cs, u_int ncs, int fold, int *score, char *matched) { u_int toklen; int value = 0, matched_term; toklen = fuzzy_decode(term->text, term->len, tok); if (term->exact) { matched_term = fuzzy_match_exact(tok, toklen, cs, ncs, fold, term->prefix, term->suffix, &value, term->inverse ? NULL : matched); } else { matched_term = fuzzy_match_fuzzy(tok, toklen, cs, ncs, fold, &value, term->inverse ? NULL : matched); } if (term->inverse) return (!matched_term); if (!matched_term) return (0); *score += value; return (1); } /* Match one AND group of terms. */ static int fuzzy_match_group(const char *start, const char *end, struct utf8_data *tok, struct fuzzy_char *cs, u_int ncs, int fold, int *score, char *matched) { const char *cp = start, *sp; struct fuzzy_term term; int any = 0; *score = 0; while (cp != end) { while (cp != end && *cp == ' ') cp++; if (cp == end) break; sp = cp; while (cp != end && *cp != ' ') cp++; if (!fuzzy_parse_term(sp, cp, &term)) return (0); any = 1; if (!fuzzy_match_term(&term, tok, cs, ncs, fold, score, matched)) return (0); } return (any); } /* * Fuzzy match pattern against text, which is drawn into a region of the given * display width. Returns a bitstr_t of width bits with a bit set for each * column occupied by a matched character, or NULL if there is no match. A * higher returned score is better. */ bitstr_t * fuzzy_match(const char *pattern, const char *text, u_int width, u_int *score) { struct fuzzy_char *cs; char *matched = NULL, *best = NULL, *groupmatched; struct utf8_data *tok; bitstr_t *mask; u_int ncs, i, j, column; u_int widths[STYLE_ALIGN_ABSOLUTE_CENTRE + 1]; u_int start[STYLE_ALIGN_ABSOLUTE_CENTRE + 1]; u_int src[STYLE_ALIGN_ABSOLUTE_CENTRE + 1]; u_int vis[STYLE_ALIGN_ABSOLUTE_CENTRE + 1]; u_int wl, wc, wr, wa; const char *cp, *sp; int bestscore = 0, groupscore, found = 0, fold; if (width == 0) return (NULL); /* An empty query matches everything, with nothing highlighted. */ for (cp = pattern; *cp == ' ' || *cp == '|'; cp++) /* nothing */; if (*cp == '\0') { if (score != NULL) *score = 0; return (bit_alloc(width)); } /* Smart-case: fold unless the pattern has an uppercase character. */ fold = 1; for (cp = pattern; *cp != '\0'; cp++) { if (*cp >= 'A' && *cp <= 'Z') { fold = 0; break; } } /* Scan the text into visible characters. */ cs = fuzzy_scan(text, &ncs, widths); matched = xcalloc(ncs == 0 ? 1 : ncs, sizeof *matched); best = xcalloc(ncs == 0 ? 1 : ncs, sizeof *best); tok = xreallocarray(NULL, strlen(pattern) + 1, sizeof *tok); /* Match each |-separated group and keep the best-scoring one. */ cp = pattern; while (*cp != '\0') { while (*cp == ' ' || *cp == '|') cp++; if (*cp == '\0') break; sp = cp; while (*cp != '\0' && *cp != '|') cp++; memset(matched, 0, ncs == 0 ? 1 : ncs); groupmatched = matched; if (fuzzy_match_group(sp, cp, tok, cs, ncs, fold, &groupscore, groupmatched)) { if (!found || groupscore > bestscore) { found = 1; bestscore = groupscore; memcpy(best, matched, ncs == 0 ? 1 : ncs); } } } free(tok); if (!found) { free(best); free(matched); free(cs); return (NULL); } /* * Work out the trimmed widths and start columns of each alignment, * mirroring format_draw_none. */ wl = widths[STYLE_ALIGN_LEFT]; wc = widths[STYLE_ALIGN_CENTRE]; wr = widths[STYLE_ALIGN_RIGHT]; wa = widths[STYLE_ALIGN_ABSOLUTE_CENTRE]; while (wl + wc + wr > width) { if (wc > 0) wc--; else if (wr > 0) wr--; else wl--; } if (wa > width) wa = width; start[STYLE_ALIGN_LEFT] = 0; src[STYLE_ALIGN_LEFT] = 0; vis[STYLE_ALIGN_LEFT] = wl; start[STYLE_ALIGN_RIGHT] = width - wr; src[STYLE_ALIGN_RIGHT] = widths[STYLE_ALIGN_RIGHT] - wr; vis[STYLE_ALIGN_RIGHT] = wr; start[STYLE_ALIGN_CENTRE] = wl + ((width - wr) - wl) / 2 - wc / 2; src[STYLE_ALIGN_CENTRE] = widths[STYLE_ALIGN_CENTRE] / 2 - wc / 2; vis[STYLE_ALIGN_CENTRE] = wc; start[STYLE_ALIGN_ABSOLUTE_CENTRE] = (width - wa) / 2; src[STYLE_ALIGN_ABSOLUTE_CENTRE] = 0; vis[STYLE_ALIGN_ABSOLUTE_CENTRE] = wa; /* Set a bit for each column of each matched character. */ mask = bit_alloc(width); for (i = 0; i < ncs; i++) { if (!best[i]) continue; if (fuzzy_column(&cs[i], start, src, vis, &column) != 0) continue; for (j = 0; j < cs[i].width && column + j < width; j++) bit_set(mask, column + j); } free(best); free(matched); free(cs); if (score != NULL) *score = (bestscore < 0) ? 0 : (u_int)bestscore; return (mask); }