import { distance } from 'fastest-levenshtein';

// this method is covered by tests in Search.test.ts. If you make any changes
// please make sure that tests pass as well
export const tokenize = (query: string) =>
    query
        .trim()
        .toLowerCase()
        // reference: https://unicode-table.com/en/#045F
        .replace(/[^a-zÀ-ƿа-џ0-9 \-,]/g, '')
        .split(/[ ,]/g)
        .map((lowerCaseToken) => lowerCaseToken.trim())
        .filter((lowerCaseToken) => lowerCaseToken.length > 0);

// this method is covered by tests in Search.test.ts. If you make any changes
// please make sure that tests pass as well
export const score = (text: string, lowerCaseTokens: string[]) => {
    const lowerCaseTokenizedText = tokenize(text);
    let score = 0;
    for (let i = 0; i < lowerCaseTokens.length; i += 1) {
        let thisWordScore = 0;
        const searchToken = lowerCaseTokens[i];
        if (lowerCaseTokenizedText.includes(searchToken)) {
            // full text match
            thisWordScore += 2;
        } else if (
            lowerCaseTokenizedText.some((tokenizedText) =>
                tokenizedText.includes(searchToken),
            )
        ) {
            // partial text match
            thisWordScore += 1;
        } else if (searchToken.length > 4) {
            // levenshtein fuzzy match
            // do it only for words longer than 4 characters. Also, years are
            // 4 characters, and we don't want fuzzy match for years.
            const minDiff = lowerCaseTokenizedText.reduce(
                (min, lowerCaseToken) =>
                    Math.min(
                        min,
                        distance(lowerCaseToken, searchToken),
                        distance(
                            // look into the beginning of the word with the same
                            // length as the input string
                            lowerCaseToken.slice(0, searchToken.length),
                            searchToken,
                        ),
                    ),
                Infinity,
            );

            // We wanted to be very conservative with fuzzy search results and
            // include only fuzzy results where levenshtein distance is 1
            // character for short and medium size words, and 2 chars for longer
            // words.
            if (minDiff === 1 || (minDiff === 2 && searchToken.length > 7)) {
                // fuzzy match
                thisWordScore += 0.5;
            }
        }
        if (searchToken.length > 3 && thisWordScore !== 0) {
            // bonus for long words
            thisWordScore = thisWordScore += 1;
        }
        score += thisWordScore;
    }
    return score;
};
