/*
 * Decompiled with CFR 0.152.
 */
package com.teamscale.index.code_clones.normalization;

import com.teamscale.index.code_clones.core.Unit;
import com.teamscale.index.code_clones.core.WiaWordUnit;
import eu.cqse.check.framework.core.WorkItemLocationBuilder;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.TokenStreamTextUtils;
import eu.cqse.check.framework.util.tokens.TokenPattern;
import eu.cqse.check.framework.util.tokens.TokenPatternMatch;
import eu.cqse.check.framework.util.tokens.WiaTokenUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.conqat.engine.commons.findings.location.ManualTestCaseTextRegionLocation;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.string.LineOffsetConverter;
import org.conqat.lib.commons.string.StringUtils;

public class ManualTestCloneNormalizer {
    private static final Logger LOGGER = LogManager.getLogger();
    private static final Pattern WORD_SPLIT_PATTERN = Pattern.compile("\\b+", 256);
    private static final int ACTION_GROUP = 0;
    private static final int CHECK_GROUP = 1;
    private static final TokenPattern TEST_STEP_DECOMPOSE_PATTERN = new TokenPattern().regex("^action$").sequence(new Object[]{ETokenType.EQ}).sequence(new Object[]{ETokenType.STRING_LITERAL}).group(0).skipUntil(new Object[]{ETokenType.KEY}).regex("^check$").sequence(new Object[]{ETokenType.EQ}).sequence(new Object[]{ETokenType.STRING_LITERAL}).group(1);
    private static final Pattern CHARACTER_REMOVAL_PATTERN = Pattern.compile("[^\\p{Alnum}]", 256);
    private static final Set<String> STOP_WORDS = CollectionUtils.asHashSet((Object[])new String[]{"a", "about", "also", "although", "among", "amongst", "amoungst", "an", "and", "another", "anyhow", "anyone", "anything", "anyway", "anywhere", "around", "as", "at", "besides", "but", "by", "he", "hence", "her", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "in", "indeed", "into", "it", "its", "itself", "many", "me", "meanwhile", "mine", "moreover", "my", "myself", "namely", "never", "nevertheless", "nobody", "none", "noone", "nothing", "nowhere", "of", "off", "on", "only", "onto", "or", "otherwise", "our", "ours", "ourselves", "perhaps", "please", "rather", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "since", "sincere", "so", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "than", "that", "the", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "throughout", "thru", "thus", "to", "too", "un", "until", "up", "upon", "us", "very", "via", "we", "well", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves"});

    private static String normalizeWord(String word) {
        String normalized = CHARACTER_REMOVAL_PATTERN.matcher(word).replaceAll("");
        return normalized.toLowerCase();
    }

    private static boolean isStopWord(String word) {
        return STOP_WORDS.contains(word);
    }

    private static boolean isIgnoredWord(String word) {
        return StringUtils.isEmpty((String)word) || ManualTestCloneNormalizer.isStopWord(word);
    }

    private static String[] splitIntoWords(String sentence) {
        return WORD_SPLIT_PATTERN.split(sentence);
    }

    private static String addFieldPrefix(String word, boolean isAction) {
        if (isAction) {
            return "$ACTION$" + word;
        }
        return "$CHECK$" + word;
    }

    private static List<TokenPatternMatch> findTestSteps(List<IToken> tokens) {
        return TEST_STEP_DECOMPOSE_PATTERN.findNonOverlappingMatches(tokens);
    }

    private static int addUnitsForItem(String uniformPath, List<Unit> units, int indexInElement, IToken itemToken, String workItemId, int stepIndex, boolean isAction) {
        int rawStartOffset = itemToken.getOffset() + 1;
        int rawStartLine = itemToken.getLineNumber() + 1;
        int currentFieldOffset = 0;
        String tokenText = WiaTokenUtils.removeStringQuotes((String)itemToken.getText());
        LineOffsetConverter lineConverter = new LineOffsetConverter(tokenText);
        for (String word : ManualTestCloneNormalizer.splitIntoWords(tokenText)) {
            String normalizedWord = ManualTestCloneNormalizer.normalizeWord(word);
            if (!ManualTestCloneNormalizer.isIgnoredWord(normalizedWord)) {
                int fieldEndOffset = currentFieldOffset + word.length() - 1;
                ManualTestCaseTextRegionLocation location = WorkItemLocationBuilder.forTestStepRaw((String)uniformPath, (String)workItemId, (int)stepIndex, (int)rawStartOffset, (int)rawStartLine, (int)currentFieldOffset, (int)fieldEndOffset, (int)lineConverter.getLine(currentFieldOffset), (int)lineConverter.getLine(fieldEndOffset), (boolean)isAction);
                WiaWordUnit unit = new WiaWordUnit(ManualTestCloneNormalizer.addFieldPrefix(normalizedWord, isAction), word, indexInElement++, location);
                units.add(unit);
            }
            currentFieldOffset += word.length();
        }
        return indexInElement;
    }

    public static List<Unit> getNormalizedUnits(List<IToken> tokens, String uniformPath) {
        Optional workItemId = WiaTokenUtils.getInternalId(tokens);
        if (workItemId.isEmpty()) {
            LOGGER.error("Provided token stream for '{}' is not a valid work item representation", (Object)uniformPath);
            return CollectionUtils.emptyList();
        }
        int indexInElement = 0;
        int stepIndex = 0;
        ArrayList<Unit> units = new ArrayList<Unit>();
        for (TokenPatternMatch match : ManualTestCloneNormalizer.findTestSteps(tokens)) {
            List actionTokens = match.groupTokens(0);
            List checkTokens = match.groupTokens(1);
            CCSMAssert.isTrue((actionTokens.size() == 1 ? 1 : 0) != 0, () -> "Expected exactly one action token, but found %d: %s".formatted(actionTokens.size(), TokenStreamTextUtils.concatTokenTexts((List)actionTokens, (String)" ")));
            CCSMAssert.isTrue((checkTokens.size() == 1 ? 1 : 0) != 0, () -> "Expected exactly one check token, but found %d: %s".formatted(checkTokens.size(), TokenStreamTextUtils.concatTokenTexts((List)checkTokens, (String)" ")));
            indexInElement = ManualTestCloneNormalizer.addUnitsForItem(uniformPath, units, indexInElement, (IToken)actionTokens.get(0), (String)workItemId.get(), stepIndex, true);
            indexInElement = ManualTestCloneNormalizer.addUnitsForItem(uniformPath, units, indexInElement, (IToken)checkTokens.get(0), (String)workItemId.get(), stepIndex, false);
            ++stepIndex;
        }
        return units;
    }
}

