/*
 * Decompiled with CFR 0.152.
 */
package eu.cqse.check.framework.scanner.ambiguous_language;

import eu.cqse.check.framework.scanner.ELanguage;
import eu.cqse.check.framework.scanner.ambiguous_language.CLikeLanguageDetector;
import eu.cqse.check.framework.scanner.ambiguous_language.KubernetesLanguageDetector;
import eu.cqse.check.framework.scanner.ambiguous_language.MatlabLanguageDetector;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.conqat.engine.resource.util.UniformPathUtils;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.string.StringUtils;
import org.conqat.lib.commons.utils.UtilsInstantiationNotSupportedException;
import org.jetbrains.annotations.VisibleForTesting;
import org.jspecify.annotations.NonNull;

public final class AmbiguousLanguageResolutionUtils {
    private static final List<ELanguage> AMBIGUOUS_LANGUAGES_PRIORITY_LIST = Arrays.asList(ELanguage.C, ELanguage.CPP, ELanguage.MATLAB, ELanguage.OBJECTIVE_C, ELanguage.OBJECTIVE_CPP, ELanguage.LINE, ELanguage.KUBERNETES);
    private static final Pattern VISUAL_BASIC_CONTENT_PATTERN = Pattern.compile("\\W(End Module|End Sub|imports|Dim|BEGIN)\\W");
    private static final String[] KNOWN_TEXT_FILE_EXTENSIONS = new String[]{".csv", ".txt", ".html", ".ecuextract", ".ecuextract.variant", ".arxml", ".ecuconfig"};
    private static final Pattern ASCII_CHARACTER_PATTERN = Pattern.compile("\\p{ASCII}");
    private static final Pattern OBJECTIVE_C_INCLUDE_MATCHER = Pattern.compile("(^|\\n)\\s*#(include|import)\\s");

    public static ELanguage getLanguageFromUniformPath(Set<ELanguage> configuredLanguages, String uniformPath, String fileContent) {
        String elementName = UniformPathUtils.getElementName(uniformPath);
        if (!(elementName = elementName.toLowerCase()).contains(".")) {
            if (AmbiguousLanguageResolutionUtils.isLikelyBinaryContent(fileContent)) {
                AmbiguousLanguageResolutionUtils.logErrorAboutBinaryContent(uniformPath);
                return null;
            }
            if (configuredLanguages.size() == 1) {
                return (ELanguage)((Object)configuredLanguages.stream().findAny().get());
            }
            if (configuredLanguages.contains((Object)ELanguage.CPP)) {
                return ELanguage.CPP;
            }
            return ELanguage.TEXT;
        }
        if (StringUtils.endsWithOneOf((String)elementName, (String[])KNOWN_TEXT_FILE_EXTENSIONS)) {
            return ELanguage.LINE;
        }
        String extension = UniformPathUtils.getExtension(elementName);
        CCSMAssert.isNotNull((Object)extension);
        if (extension.equals(".cls")) {
            if (VISUAL_BASIC_CONTENT_PATTERN.matcher(fileContent).find()) {
                return ELanguage.VB;
            }
            return ELanguage.LINE;
        }
        Set<ELanguage> languagesForFileExtension = ELanguage.getAllLanguagesForExtension(extension);
        if (languagesForFileExtension.isEmpty()) {
            return AmbiguousLanguageResolutionUtils.determineLanguageForUnknownFileExtension(uniformPath, fileContent);
        }
        if (languagesForFileExtension.size() == 1) {
            return (ELanguage)((Object)CollectionUtils.getAny(languagesForFileExtension));
        }
        return AmbiguousLanguageResolutionUtils.decideForLanguage(configuredLanguages, fileContent, languagesForFileExtension);
    }

    private static ELanguage decideForLanguage(Set<ELanguage> configuredLanguages, String fileContent, Set<ELanguage> languagesForFileExtension) {
        HashSet languageCandidates = CollectionUtils.intersectionSet(languagesForFileExtension, (Collection[])new Collection[]{configuredLanguages});
        if (languageCandidates.isEmpty()) {
            return (ELanguage)((Object)languagesForFileExtension.stream().sorted().findFirst().orElseThrow(() -> new AssertionError((Object)"Expected languagesForFileExtension to not be empty")));
        }
        return AmbiguousLanguageResolutionUtils.decideForAmbiguousLanguage(languageCandidates, fileContent);
    }

    private static ELanguage determineLanguageForUnknownFileExtension(String uniformPath, String fileContent) {
        if (AmbiguousLanguageResolutionUtils.isLikelyBinaryContent(fileContent)) {
            AmbiguousLanguageResolutionUtils.logErrorAboutBinaryContent(uniformPath);
            return null;
        }
        return ELanguage.LINE;
    }

    private static void logErrorAboutBinaryContent(String uniformPath) {
        LogManager.getLogger().error("Ignoring file with unsupported extension and likely binary content: " + uniformPath + ". If this file is a valid code file that should be analyzed, please use an explicit language mapping in the connector configuration.");
    }

    private static boolean isLikelyBinaryContent(String fileContent) {
        String contentStart = StringUtils.getFirstCharacters((String)fileContent, (int)200);
        if (contentStart.length() < 200) {
            return false;
        }
        long numAsciiCharacters = 0L;
        Matcher matcher = ASCII_CHARACTER_PATTERN.matcher(contentStart);
        while (matcher.find()) {
            ++numAsciiCharacters;
        }
        return (double)numAsciiCharacters / (double)contentStart.length() < 0.75;
    }

    @VisibleForTesting
    public static ELanguage decideForAmbiguousLanguage(Set<ELanguage> languageCandidates, String fileContent) {
        HashSet foundLanguageCandidates;
        EnumSet<ELanguage> patternsToScanFor;
        EnumSet<ELanguage> foundLanguagePatterns;
        CCSMAssert.isNotEmpty(languageCandidates, (String)"languageCandidates was empty");
        if (languageCandidates.contains((Object)ELanguage.MATLAB) && (languageCandidates.contains((Object)ELanguage.OBJECTIVE_C) || languageCandidates.contains((Object)ELanguage.OBJECTIVE_CPP)) && OBJECTIVE_C_INCLUDE_MATCHER.matcher(fileContent).find()) {
            languageCandidates.remove((Object)ELanguage.MATLAB);
        }
        if ((foundLanguagePatterns = AmbiguousLanguageResolutionUtils.scanCodeForLanguageSpecificPatterns(fileContent, patternsToScanFor = AmbiguousLanguageResolutionUtils.determinePatternsToScanFor(languageCandidates))).containsAll(EnumSet.of(ELanguage.OBJECTIVE_C, ELanguage.CPP))) {
            foundLanguagePatterns.add(ELanguage.OBJECTIVE_CPP);
        }
        if ((foundLanguageCandidates = CollectionUtils.intersectionSet(languageCandidates, (Collection[])new Collection[]{foundLanguagePatterns})).contains((Object)ELanguage.OBJECTIVE_CPP)) {
            return ELanguage.OBJECTIVE_CPP;
        }
        if (foundLanguageCandidates.contains((Object)ELanguage.OBJECTIVE_C)) {
            return ELanguage.OBJECTIVE_C;
        }
        Optional<ELanguage> foundLanguage = AMBIGUOUS_LANGUAGES_PRIORITY_LIST.stream().filter(foundLanguageCandidates::contains).findFirst();
        if (foundLanguage.isPresent()) {
            return foundLanguage.get();
        }
        foundLanguage = AMBIGUOUS_LANGUAGES_PRIORITY_LIST.stream().filter(languageCandidates::contains).findFirst();
        if (foundLanguage.isPresent()) {
            return foundLanguage.get();
        }
        return (ELanguage)((Object)languageCandidates.stream().findFirst().get());
    }

    private static @NonNull EnumSet<ELanguage> determinePatternsToScanFor(Set<ELanguage> languageCandidates) {
        EnumSet<ELanguage> patternsToScanFor = EnumSet.noneOf(ELanguage.class);
        if (languageCandidates.contains((Object)ELanguage.CPP) || languageCandidates.contains((Object)ELanguage.OBJECTIVE_CPP)) {
            patternsToScanFor.add(ELanguage.CPP);
        }
        if (languageCandidates.contains((Object)ELanguage.OBJECTIVE_C) || languageCandidates.contains((Object)ELanguage.OBJECTIVE_CPP)) {
            patternsToScanFor.add(ELanguage.OBJECTIVE_C);
        }
        if (languageCandidates.contains((Object)ELanguage.MATLAB)) {
            patternsToScanFor.add(ELanguage.MATLAB);
        }
        if (languageCandidates.contains((Object)ELanguage.KUBERNETES)) {
            patternsToScanFor.add(ELanguage.KUBERNETES);
        }
        return patternsToScanFor;
    }

    private static EnumSet<ELanguage> scanCodeForLanguageSpecificPatterns(String fileContent, EnumSet<ELanguage> patternsToSearchFor) {
        if (patternsToSearchFor.isEmpty()) {
            return EnumSet.noneOf(ELanguage.class);
        }
        EnumSet<ELanguage> foundLanguagePatterns = EnumSet.noneOf(ELanguage.class);
        if (patternsToSearchFor.contains((Object)ELanguage.MATLAB) && MatlabLanguageDetector.containsLikelyMatlabCode(fileContent)) {
            foundLanguagePatterns.add(ELanguage.MATLAB);
        }
        if (patternsToSearchFor.contains((Object)ELanguage.CPP) || patternsToSearchFor.contains((Object)ELanguage.OBJECTIVE_C)) {
            foundLanguagePatterns.addAll(CLikeLanguageDetector.scanForClikeLanguagePatterns(fileContent, patternsToSearchFor));
        }
        if (patternsToSearchFor.contains((Object)ELanguage.KUBERNETES) && KubernetesLanguageDetector.isLikelyKubernetesContent(fileContent)) {
            foundLanguagePatterns.add(ELanguage.KUBERNETES);
        }
        return foundLanguagePatterns;
    }

    private AmbiguousLanguageResolutionUtils() {
        throw new UtilsInstantiationNotSupportedException();
    }
}

