/*
 * Decompiled with CFR 0.152.
 */
package com.teamscale.index.binary_size.common_substrings;

import com.google.common.annotations.VisibleForTesting;
import com.teamscale.core.analysis.AnalysisStep;
import com.teamscale.core.analysis.EAnalysisStepParameter;
import com.teamscale.core.analysis.EIndexAccessMode;
import com.teamscale.core.analysis.IndexAccess;
import com.teamscale.core.analysis.StepParameter;
import com.teamscale.core.analysis.configuration.model.CodeScopeAware;
import com.teamscale.index.architecture.assessment.TypeToComponentMapper;
import com.teamscale.index.architecture.scope.ArchitectureDefinition;
import com.teamscale.index.architecture.scope.ArchitectureDefinitionParser;
import com.teamscale.index.architecture.scope.ComponentNode;
import com.teamscale.index.binary_size.common_substrings.StringLiteralSequence;
import com.teamscale.index.binary_size.common_substrings.StringLiteralSequenceExtractor;
import com.teamscale.index.binary_size.common_substrings.StringLiteralSubstringCloneIndex;
import com.teamscale.index.binary_size.common_substrings.SubstringLiteralCloneClass;
import com.teamscale.index.code_clones.CloneComponentHelper;
import com.teamscale.index.code_clones.core.Clone;
import com.teamscale.index.code_clones.suffixtree.CloneDetectingSuffixTree;
import com.teamscale.index.code_clones.suffixtree.ICloneReporter;
import com.teamscale.index.findings.FindingsSynchronizingAnalyzingStepBase;
import com.teamscale.index.resource.TokenElementIndex;
import com.teamscale.index.resource.TokenElementInfo;
import eu.cqse.check.framework.scanner.ELanguage;
import eu.cqse.check.framework.scanner.LanguageGroups;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.conqat.engine.commons.findings.location.ElementLocation;
import org.conqat.engine.commons.findings.location.TextRegionLocation;
import org.conqat.engine.core.core.ConQATException;
import org.conqat.engine.index.shared.IndexFinding;
import org.conqat.engine.persistence.store.StorageException;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.collections.ListMap;
import org.conqat.lib.commons.collections.Pair;
import org.conqat.lib.commons.collections.SetMap;
import org.conqat.lib.commons.markup.MarkupUtils;
import org.conqat.lib.commons.string.StringUtils;
import org.jspecify.annotations.NonNull;

@AnalysisStep(hints={EAnalysisStepParameter.MERGE_INPUT_DELTAS})
public class CommonSubstringsAnalysisStep
extends FindingsSynchronizingAnalyzingStepBase {
    private static final Logger LOGGER = LogManager.getLogger();
    public static final String FINDINGS_GROUP = "Substring clone";
    public static final String FINDING_CATEGORY = "Efficiency";
    @VisibleForTesting
    public static final String SUBSTRING_CLONE_FINDINGS_PARTITION = "substring-clones";
    public static final EnumSet<ELanguage> SUPPORTED_LANGUAGES = LanguageGroups.C_AND_DERIVATIVES;
    @IndexAccess(indexName="cpp-string-literals", value=EIndexAccessMode.READ_WRITE)
    private StringLiteralSubstringCloneIndex stringLiteralIndex;
    public static final String ARCHITECTURE_PATH_FOR_INTRA_COMPONENT_ANALYSIS_PARAMETER_NAME = "architecture-path-for-intra-component-analysis";
    public static final String ARCHITECTURE_PATH_FOR_INTRA_COMPONENT_ANALYSIS_PARAMETER_DESCRIPTION = "The path of a teamscale architecture in the project. If given, the analysis will report substring clones only if all clones of the clone class are in the same architecture component.\nNote that the analysis expects that each file is mapped to at most one component. If a file is mapped to more than one component, the analysis ignores all but the alphabetically first component.\n";
    @StepParameter(value="architecture-path-for-intra-component-analysis", optional=true)
    private String architecturePathForIntraComponentAnalysis = "";
    public static final String MINIMUM_SUBSTRING_CLONE_LENGTH_PARAMETER_NAME = "minimum-substring-clone-length";
    public static final String MINIMUM_SUBSTRING_CLONE_LENGTH_PARAMETER_DESCRIPTION = "Configures the minimum length of reported clones (in number of characters). Clones that are shorter than the configured length are ignored.\n";
    @StepParameter(value="minimum-substring-clone-length", optional=true)
    private int minimumSubstringCloneLength = 8;
    public static final String MINIMUM_POTENTIAL_SAVINGS_PARAMETER_NAME = "minimum-potential-savings";
    public static final String MINIMUM_POTENTIAL_SAVINGS_PARAMETER_DESCRIPTION = "Configures the minimum number of (estimated) bytes that can be saved per reported clone group. Groups with less potential saving will not be reported.\n\nTo compute the saving, we assume that the common substring will be extracted to one shared constant. Therefore, it will be stored only once in the binary.\nAt the clone sites, the cost decreases by the length of the clone and increases for an additional null terminator and string-concatenation operators.\n";
    @StepParameter(value="minimum-potential-savings", optional=true)
    private int minimumPotentialSavings = 10;
    private static final Pattern SUBSTRING_TOKEN_SPLIT_PATTERN = Pattern.compile("\\s+|(\\\\\\w|\\W)+");

    public void execute() throws Exception {
        List deletedKeysAsStrings = this.contentDelta.getDeletedKeysAsStrings();
        List addedOrChangedKeysAsStrings = this.contentDelta.getAddedOrChangedKeysAsStrings();
        Map<String, TokenElementInfo> changedTokenElements = CommonSubstringsAnalysisStep.loadChangedTokenElements(addedOrChangedKeysAsStrings, this.contentIndex);
        Map<String, List<StringLiteralSequence>> allStringLiterals = this.collectAllStringLiteralsAndUpdateIndex(deletedKeysAsStrings, changedTokenElements);
        PathsToAnalyze pathsToAnalyze = this.determinePathsToAnalyze(changedTokenElements, addedOrChangedKeysAsStrings, deletedKeysAsStrings);
        AnalysisResults result = this.computeFindingsAndNewCloneClasses(allStringLiterals, pathsToAnalyze);
        this.stringLiteralIndex.updateCloneClasses(result.cloneClasses, deletedKeysAsStrings);
        HashSet<String> analyzedFiles = new HashSet<String>();
        analyzedFiles.addAll(pathsToAnalyze.addedOrChangedPaths);
        analyzedFiles.addAll(pathsToAnalyze.pathsThatSharedCloneClassesWithChangedOrDeletedFiles);
        analyzedFiles.addAll(result.findings.keySet());
        this.synchronizeFindings((CodeScopeAware<ListMap<String, IndexFinding>>)CodeScopeAware.defaultCodeScopeWithValue(result.buildFindingsAsListMap()), SUBSTRING_CLONE_FINDINGS_PARTITION, analyzedFiles);
    }

    private PathsToAnalyze determinePathsToAnalyze(Map<String, TokenElementInfo> changedTokenElements, List<String> addedOrChangedKeysAsStrings, List<String> deletedKeysAsStrings) throws StorageException {
        Set<String> addedOrChangedPaths = changedTokenElements.keySet();
        HashSet changedOrDeletedPaths = CollectionUtils.unionSet(addedOrChangedKeysAsStrings, (Collection[])new Collection[]{deletedKeysAsStrings});
        HashSet<String> pathsThatSharedCloneClassesWithChangedOrDeletedFiles = new HashSet<String>(this.stringLiteralIndex.getPathsThatShareCloneClassesWithPaths(new ArrayList<String>(changedOrDeletedPaths)));
        if (this.configuredArchitectureChangedInCurrentCommit()) {
            addedOrChangedPaths = new HashSet<String>(addedOrChangedPaths);
            for (Pair element : this.contentIndex.getAllTokenElements()) {
                if (!SUPPORTED_LANGUAGES.contains(((TokenElementInfo)((Object)element.getSecond())).getLanguage())) continue;
                addedOrChangedPaths.add((String)element.getFirst());
            }
            changedOrDeletedPaths.addAll(addedOrChangedPaths);
            pathsThatSharedCloneClassesWithChangedOrDeletedFiles.clear();
        }
        pathsThatSharedCloneClassesWithChangedOrDeletedFiles.removeAll(changedOrDeletedPaths);
        return new PathsToAnalyze(addedOrChangedPaths, pathsThatSharedCloneClassesWithChangedOrDeletedFiles);
    }

    private @NonNull Map<String, List<StringLiteralSequence>> collectAllStringLiteralsAndUpdateIndex(List<String> deletedKeysAsStrings, Map<String, TokenElementInfo> changedTokenElements) throws StorageException {
        this.stringLiteralIndex.removeStringLiteralsForUniformPaths(deletedKeysAsStrings);
        Map allStringLiterals = this.stringLiteralIndex.loadStoredStringLiterals().toMap();
        Map<String, List<StringLiteralSequence>> stringLiteralsInChangedFiles = CommonSubstringsAnalysisStep.collectStringLiterals(changedTokenElements.values());
        this.stringLiteralIndex.storeStringLiteralsForUniformPaths(stringLiteralsInChangedFiles);
        allStringLiterals.putAll(stringLiteralsInChangedFiles);
        return allStringLiterals;
    }

    private boolean configuredArchitectureChangedInCurrentCommit() {
        if (!this.architecturePathForIntraComponentAnalysis.isEmpty()) {
            return this.contentDelta.getDeletedKeysAsStrings().contains(this.architecturePathForIntraComponentAnalysis) || this.contentDelta.getAddedOrChangedKeysAsStrings().contains(this.architecturePathForIntraComponentAnalysis);
        }
        return false;
    }

    private AnalysisResults computeFindingsAndNewCloneClasses(Map<String, List<StringLiteralSequence>> allStringLiterals, PathsToAnalyze pathsToAnalyze) throws StorageException {
        AnalysisResults results = new AnalysisResults();
        Optional<Object> architecture = Optional.empty();
        if (!this.architecturePathForIntraComponentAnalysis.isEmpty()) {
            architecture = this.loadArchitecture(this.architecturePathForIntraComponentAnalysis);
        }
        if (architecture.isPresent()) {
            CloneComponentHelper componentHelper = new CloneComponentHelper((ArchitectureDefinition)architecture.get(), this.getParallelTaskExecutor());
            Set<String> pathsWithStringLiterals = allStringLiterals.keySet();
            Map<ComponentNode, Set<String>> pathsPerComponents = CommonSubstringsAnalysisStep.splitPathsPerFirstContainingArchitectureComponent(componentHelper, pathsWithStringLiterals);
            for (Map.Entry<ComponentNode, Set<String>> pathsInComponent : pathsPerComponents.entrySet()) {
                HashMap<String, List<StringLiteralSequence>> allStringLiteralsInComponent = new HashMap<String, List<StringLiteralSequence>>();
                Set<String> uniformPathsInComponent = pathsInComponent.getValue();
                for (String uniformPath : uniformPathsInComponent) {
                    List<StringLiteralSequence> literalsInUniformPath = allStringLiterals.get(uniformPath);
                    allStringLiteralsInComponent.put(uniformPath, literalsInUniformPath);
                }
                results.addAll(this.determineAndAddCloneClassesFromFiles(uniformPathsInComponent, allStringLiteralsInComponent, pathsToAnalyze.pathsThatSharedCloneClassesWithChangedOrDeletedFiles, pathsInComponent.getKey().getName()));
            }
        } else {
            results.addAll(this.determineAndAddCloneClassesFromFiles(pathsToAnalyze.addedOrChangedPaths, allStringLiterals, pathsToAnalyze.pathsThatSharedCloneClassesWithChangedOrDeletedFiles, ""));
        }
        return results;
    }

    private static Map<ComponentNode, Set<String>> splitPathsPerFirstContainingArchitectureComponent(CloneComponentHelper componentHelper, Set<String> pathsWithStringLiterals) {
        TypeToComponentMapper.MappingResult componentMapping = componentHelper.createComponentMappingForPaths(pathsWithStringLiterals);
        HashMap<ComponentNode, Set<String>> pathsPerComponents = new HashMap<ComponentNode, Set<String>>();
        HashSet handledUniformPaths = new HashSet();
        SetMap<ComponentNode, String> componentsToTypes = componentMapping.componentsToTypes();
        for (ComponentNode componentNode : CollectionUtils.sort((Collection)componentsToTypes.getKeys(), Comparator.comparing(ComponentNode::getName))) {
            Set uniformPathsInComponent = (Set)componentsToTypes.getCollection((Object)componentNode);
            uniformPathsInComponent.removeAll(handledUniformPaths);
            handledUniformPaths.addAll(uniformPathsInComponent);
            pathsPerComponents.put(componentNode, uniformPathsInComponent);
        }
        return pathsPerComponents;
    }

    private Optional<ArchitectureDefinition> loadArchitecture(String architecturePath) throws StorageException {
        TokenElementInfo architectureFile = this.contentIndex.getTokenElement(architecturePath);
        if (architectureFile == null) {
            LOGGER.error("Did not find architecture at path {}. Switching to global substring clone analysis.", (Object)architecturePath);
            return Optional.empty();
        }
        try {
            ArchitectureDefinition architecture = new ArchitectureDefinitionParser(architectureFile.getUniformPath(), architectureFile.getText()).parse();
            return Optional.of(architecture);
        }
        catch (ConQATException e) {
            LOGGER.error("Could not parse architecture {}. Switching to global substring clone analysis.", (Object)architecturePath, (Object)e);
            return Optional.empty();
        }
    }

    private static Map<String, TokenElementInfo> loadChangedTokenElements(List<String> changedUniformPaths, TokenElementIndex contentIndex) throws StorageException {
        HashMap<String, TokenElementInfo> relevantChangedTokenElements = new HashMap<String, TokenElementInfo>();
        for (TokenElementInfo element : contentIndex.getTokenElements(changedUniformPaths)) {
            if (!SUPPORTED_LANGUAGES.contains(element.getLanguage())) continue;
            relevantChangedTokenElements.put(element.getUniformPath(), element);
        }
        return relevantChangedTokenElements;
    }

    private AnalysisResults determineAndAddCloneClassesFromFiles(Set<String> contentChangedFiles, Map<String, List<StringLiteralSequence>> allStringLiterals, Set<String> pathsThatSharedCloneClassesWithChangedOrDeletedFiles, String architectureComponentName) throws StorageException {
        List<SuffixTreeCloneClass> cloneClasses = this.findSubstringCloneClassesInLiterals(allStringLiterals);
        HashSet<String> pathsWhereFindingsMightHaveChanged = new HashSet<String>(pathsThatSharedCloneClassesWithChangedOrDeletedFiles);
        for (SuffixTreeCloneClass suffixTreeCloneClass : cloneClasses) {
            if (!suffixTreeCloneClass.hasCloneInUniformPath(contentChangedFiles)) continue;
            pathsWhereFindingsMightHaveChanged.addAll(suffixTreeCloneClass.getUniformPathsOfClones());
        }
        ArrayList<SuffixTreeCloneClass> relevantClonesClasses = new ArrayList<SuffixTreeCloneClass>();
        for (SuffixTreeCloneClass cloneClass : cloneClasses) {
            if (!cloneClass.hasCloneInUniformPath(pathsWhereFindingsMightHaveChanged)) continue;
            relevantClonesClasses.add(cloneClass);
        }
        AnalysisResults analysisResults = new AnalysisResults();
        for (SuffixTreeCloneClass cloneClass : relevantClonesClasses) {
            AnalysisResults resultsFromCloneClass = CommonSubstringsAnalysisStep.buildFindingsAndCloneClasses(cloneClass, pathsWhereFindingsMightHaveChanged, architectureComponentName);
            analysisResults.addAll(resultsFromCloneClass);
        }
        return analysisResults;
    }

    private List<SuffixTreeCloneClass> findSubstringCloneClassesInLiterals(Map<String, List<StringLiteralSequence>> allStringLiterals) throws StorageException {
        ArrayList<SubstringToken> word = new ArrayList<SubstringToken>();
        for (List<StringLiteralSequence> stringLiterals : allStringLiterals.values()) {
            for (StringLiteralSequence stringLiteral : stringLiterals) {
                word.addAll(CommonSubstringsAnalysisStep.tokenizeStringLiteral(stringLiteral));
                word.add(SubstringToken.createSentinelToken());
            }
        }
        CloneDetectingSuffixTree cloneDetectingSuffixTree = new CloneDetectingSuffixTree(word);
        SubStringCloneReporter reporter = new SubStringCloneReporter(word);
        cloneDetectingSuffixTree.findClones(1, reporter);
        return reporter.clonesClasses;
    }

    private static AnalysisResults buildFindingsAndCloneClasses(SuffixTreeCloneClass suffixTreeCloneClass, Set<String> pathsWhereFindingsMightHaveChanged, String architectureComponentName) {
        AnalysisResults results = new AnalysisResults();
        String clonedString = suffixTreeCloneClass.clonedString;
        SubstringLiteralCloneClass cloneClass = new SubstringLiteralCloneClass(architectureComponentName, clonedString);
        HashMap<String, String> findingProperties = new HashMap<String, String>(Map.ofEntries(Map.entry("Clone Length", clonedString.length()), Map.entry("Number of Clones", suffixTreeCloneClass.startPositionsInWord.size()), Map.entry("Potential Savings", suffixTreeCloneClass.computePotentialSavingsOfCloneClass()), Map.entry("Cloned Characters", MarkupUtils.formatAsSourceCode((String)clonedString))));
        if (!architectureComponentName.isEmpty()) {
            findingProperties.put("Architecture Component", architectureComponentName);
        }
        List<TextRegionLocation> cloneSiblingLocations = suffixTreeCloneClass.computeLocationsForClones();
        for (TextRegionLocation location : cloneSiblingLocations) {
            String findingMessage = "Substring clone " + MarkupUtils.formatAsSourceCode((String)("\"" + clonedString + "\""));
            IndexFinding finding = new IndexFinding(FINDINGS_GROUP, FINDING_CATEGORY, findingMessage, (ElementLocation)location);
            finding.addSiblingLocations(cloneSiblingLocations);
            finding.addProperties(findingProperties);
            if (!pathsWhereFindingsMightHaveChanged.contains(location.getUniformPath())) continue;
            results.addCloneClass(location.getUniformPath(), cloneClass);
            results.addFinding(location.getUniformPath(), finding);
        }
        return results;
    }

    private static int computePotentialSavingsOfCloneClass(String clonedString, List<Integer> cloneStartingIndexes, int cloneLength, List<SubstringToken> word) {
        int potentialSavings = 0;
        boolean oneStringLiteralEqualToClone = false;
        boolean allStringLiteralsEqualToClone = true;
        for (int startPosition : cloneStartingIndexes) {
            boolean stringLiteralEndsWithClone;
            int cost = 0;
            boolean stringLiteralStartsWithClone = startPosition == 0 || word.get(startPosition - 1).isSentinel();
            int positionBehindClone = startPosition + cloneLength;
            boolean bl = stringLiteralEndsWithClone = positionBehindClone == word.size() - 1 || word.get(positionBehindClone).isSentinel();
            if (stringLiteralStartsWithClone && stringLiteralEndsWithClone) {
                oneStringLiteralEqualToClone = true;
                continue;
            }
            allStringLiteralsEqualToClone = false;
            if (!stringLiteralStartsWithClone) {
                ++cost;
            }
            if (stringLiteralEndsWithClone) {
                --cost;
            }
            potentialSavings += clonedString.length() - ++cost;
        }
        if (!oneStringLiteralEqualToClone) {
            potentialSavings -= clonedString.length() + 1;
        }
        if (allStringLiteralsEqualToClone) {
            return 0;
        }
        return potentialSavings;
    }

    private static Map<String, List<StringLiteralSequence>> collectStringLiterals(Collection<TokenElementInfo> tokenElements) {
        HashMap<String, List<StringLiteralSequence>> stringLiteralsInChangedFiles = new HashMap<String, List<StringLiteralSequence>>();
        for (TokenElementInfo tokenElementInfo : tokenElements) {
            if (!SUPPORTED_LANGUAGES.contains(tokenElementInfo.getLanguage())) {
                stringLiteralsInChangedFiles.put(tokenElementInfo.getUniformPath(), new ArrayList());
                continue;
            }
            List<StringLiteralSequence> literals = StringLiteralSequenceExtractor.collectStringLiteralSequencesFromTokenElement(tokenElementInfo);
            stringLiteralsInChangedFiles.put(tokenElementInfo.getUniformPath(), literals);
        }
        return stringLiteralsInChangedFiles;
    }

    private static List<SubstringToken> tokenizeStringLiteral(StringLiteralSequence stringLiteral) {
        SubstringToken token;
        String stringLiteralText = stringLiteral.literalText;
        ArrayList<SubstringToken> words = new ArrayList<SubstringToken>();
        int previousMatchEnd = 0;
        Matcher matcher = SUBSTRING_TOKEN_SPLIT_PATTERN.matcher(stringLiteralText);
        while (matcher.find()) {
            if (matcher.start() > previousMatchEnd) {
                token = new SubstringToken(stringLiteralText.substring(previousMatchEnd, matcher.start()), previousMatchEnd, stringLiteral);
                words.add(token);
            }
            token = new SubstringToken(stringLiteralText.substring(matcher.start(), matcher.end()), matcher.start(), stringLiteral);
            words.add(token);
            previousMatchEnd = matcher.end();
        }
        if (previousMatchEnd < stringLiteralText.length()) {
            token = new SubstringToken(stringLiteralText.substring(previousMatchEnd), previousMatchEnd, stringLiteral);
            words.add(token);
        }
        return words;
    }

    @VisibleForTesting
    public static List<String> tokenizeStringLiteralForTesting(String text) {
        StringLiteralSequence stringLiteral = new StringLiteralSequence(text, "test_path.c", new StringLiteralSequence.StringLiteralPositioningInfo[]{new StringLiteralSequence.StringLiteralPositioningInfo(0, 0, text.length(), text.length(), false, 1)});
        List<SubstringToken> tokenList = CommonSubstringsAnalysisStep.tokenizeStringLiteral(stringLiteral);
        return CollectionUtils.map(tokenList, token -> token.tokenText);
    }

    private record PathsToAnalyze(Set<String> addedOrChangedPaths, Set<String> pathsThatSharedCloneClassesWithChangedOrDeletedFiles) {
    }

    private static class AnalysisResults {
        private final Map<String, ArrayList<IndexFinding>> findings = new HashMap<String, ArrayList<IndexFinding>>();
        private final Map<String, ArrayList<SubstringLiteralCloneClass>> cloneClasses = new HashMap<String, ArrayList<SubstringLiteralCloneClass>>();

        private AnalysisResults() {
        }

        private void addFinding(String uniformPath, IndexFinding finding) {
            this.findings.computeIfAbsent(uniformPath, path -> new ArrayList());
            this.findings.get(uniformPath).add(finding);
        }

        private void addCloneClass(String uniformPath, SubstringLiteralCloneClass cloneClass) {
            this.cloneClasses.computeIfAbsent(uniformPath, path -> new ArrayList());
            this.cloneClasses.get(uniformPath).add(cloneClass);
        }

        private void addAll(AnalysisResults results) {
            for (Map.Entry<String, ArrayList<IndexFinding>> entry : results.findings.entrySet()) {
                this.findings.computeIfAbsent(entry.getKey(), path -> new ArrayList());
                this.findings.get(entry.getKey()).addAll((Collection<IndexFinding>)entry.getValue());
            }
            for (Map.Entry<String, ArrayList<Object>> entry : results.cloneClasses.entrySet()) {
                this.cloneClasses.computeIfAbsent(entry.getKey(), path -> new ArrayList());
                this.cloneClasses.get(entry.getKey()).addAll((Collection<SubstringLiteralCloneClass>)entry.getValue());
            }
        }

        public ListMap<String, IndexFinding> buildFindingsAsListMap() {
            ListMap result = new ListMap();
            for (Map.Entry<String, ArrayList<IndexFinding>> findingsInPath : this.findings.entrySet()) {
                result.addAll((Object)findingsInPath.getKey(), (Collection)findingsInPath.getValue());
            }
            return result;
        }
    }

    private static class SuffixTreeCloneClass {
        private final List<SubstringToken> word;
        private final List<Integer> startPositionsInWord;
        private final int length;
        private final String clonedString;

        private SuffixTreeCloneClass(List<Integer> startPositionsInWord, int length, List<SubstringToken> word) {
            this.word = word;
            this.startPositionsInWord = startPositionsInWord;
            this.length = length;
            this.clonedString = SuffixTreeCloneClass.buildClonedString(startPositionsInWord, length, word);
        }

        private Set<String> getUniformPathsOfClones() {
            HashSet<String> uniformPaths = new HashSet<String>();
            for (int startPosition : this.startPositionsInWord) {
                SubstringToken firstTokenOfClone = this.word.get(startPosition);
                uniformPaths.add(firstTokenOfClone.originLiteral.uniformPath);
            }
            return uniformPaths;
        }

        public boolean hasCloneInUniformPath(Set<String> contentChangedFiles) {
            for (int startPosition : this.startPositionsInWord) {
                SubstringToken firstTokenOfClone = this.word.get(startPosition);
                if (!contentChangedFiles.contains(firstTokenOfClone.originLiteral.uniformPath)) continue;
                return true;
            }
            return false;
        }

        public List<TextRegionLocation> computeLocationsForClones() {
            ArrayList<TextRegionLocation> locations = new ArrayList<TextRegionLocation>();
            for (int startPosition : this.startPositionsInWord) {
                SubstringToken cloneStartToken = this.word.get(startPosition);
                SubstringToken cloneEndToken = this.word.get(startPosition + this.length - 1);
                TextRegionLocation location = StringLiteralSequenceExtractor.buildLocationForOffsetsInLiteral(cloneStartToken.originLiteral, cloneStartToken.offsetInLiteral, cloneEndToken.offsetInLiteral + cloneEndToken.tokenText.length() - 1);
                locations.add(location);
            }
            return locations;
        }

        private static String buildClonedString(List<Integer> startPositionsInWord, int length, List<SubstringToken> word) {
            List<SubstringToken> tokens = word.subList(startPositionsInWord.get(0), startPositionsInWord.get(0) + length);
            return StringUtils.concat((Iterable)CollectionUtils.map(tokens, token -> token.tokenText), (String)"");
        }

        private int computePotentialSavingsOfCloneClass() {
            int potentialSavings = 0;
            boolean oneStringLiteralEqualToClone = false;
            boolean allStringLiteralsEqualToClone = true;
            for (int startPosition : this.startPositionsInWord) {
                boolean stringLiteralEndsWithClone;
                int cost = 0;
                boolean stringLiteralStartsWithClone = startPosition == 0 || this.word.get(startPosition - 1).isSentinel();
                int positionBehindClone = startPosition + this.length;
                boolean bl = stringLiteralEndsWithClone = positionBehindClone == this.word.size() - 1 || this.word.get(positionBehindClone).isSentinel();
                if (stringLiteralStartsWithClone && stringLiteralEndsWithClone) {
                    oneStringLiteralEqualToClone = true;
                    continue;
                }
                allStringLiteralsEqualToClone = false;
                if (!stringLiteralStartsWithClone) {
                    ++cost;
                }
                if (stringLiteralEndsWithClone) {
                    --cost;
                }
                potentialSavings += this.clonedString.length() - ++cost;
            }
            if (!oneStringLiteralEqualToClone) {
                potentialSavings -= this.clonedString.length() + 1;
            }
            if (allStringLiteralsEqualToClone) {
                return 0;
            }
            return potentialSavings;
        }
    }

    private static class SubstringToken {
        private final String tokenText;
        private final StringLiteralSequence originLiteral;
        private final int offsetInLiteral;

        public SubstringToken(String tokenText, int offsetInLiteral, StringLiteralSequence originLiteral) {
            this.tokenText = tokenText;
            this.offsetInLiteral = offsetInLiteral;
            this.originLiteral = originLiteral;
        }

        public static SubstringToken createSentinelToken() {
            return new SubstringToken(null, 0, null);
        }

        public boolean equals(Object o) {
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            if (this.isSentinel()) {
                return this == o;
            }
            return this.tokenText.equals(((SubstringToken)o).tokenText);
        }

        private boolean isSentinel() {
            return this.tokenText == null;
        }

        public int hashCode() {
            if (this.isSentinel()) {
                System.identityHashCode(this);
            }
            return Objects.hashCode(this.tokenText);
        }
    }

    private class SubStringCloneReporter
    implements ICloneReporter {
        private final List<SubstringToken> word;
        List<Integer> startPositionsInWord;
        int length;
        List<SuffixTreeCloneClass> clonesClasses = new ArrayList<SuffixTreeCloneClass>();

        public SubStringCloneReporter(List<SubstringToken> word) {
            this.word = word;
            this.startPositionsInWord = new ArrayList<Integer>();
            this.length = 0;
        }

        @Override
        public boolean startCloneClass(int normalizedLength, int numberOfClones) {
            this.startPositionsInWord = new ArrayList<Integer>(numberOfClones);
            this.length = normalizedLength;
            return true;
        }

        @Override
        public Clone addClone(int startPosition, int length) {
            this.startPositionsInWord.add(startPosition);
            return null;
        }

        @Override
        public boolean completeCloneClass() {
            this.startPositionsInWord = SubStringCloneReporter.filterClonesFromSameLiterals(this.startPositionsInWord, this.word);
            if (this.startPositionsInWord.size() < 2) {
                return false;
            }
            String clonedString = SuffixTreeCloneClass.buildClonedString(this.startPositionsInWord, this.length, this.word);
            if (clonedString.length() < CommonSubstringsAnalysisStep.this.minimumSubstringCloneLength) {
                return false;
            }
            int potentialSavings = CommonSubstringsAnalysisStep.computePotentialSavingsOfCloneClass(clonedString, this.startPositionsInWord, this.length, this.word);
            if (potentialSavings < CommonSubstringsAnalysisStep.this.minimumPotentialSavings) {
                return false;
            }
            this.clonesClasses.add(new SuffixTreeCloneClass(this.startPositionsInWord, this.length, this.word));
            return true;
        }

        private static @NonNull List<Integer> filterClonesFromSameLiterals(List<Integer> startPositionsInWord, List<SubstringToken> word) {
            Object lastSeenOriginLiteral = null;
            ArrayList<Integer> toKeepOriginLiterals = new ArrayList<Integer>();
            Iterator iterator = CollectionUtils.sort(startPositionsInWord).iterator();
            while (iterator.hasNext()) {
                int startPosition = (Integer)iterator.next();
                StringLiteralSequence currentOriginLiteral = word.get((int)startPosition).originLiteral;
                if (lastSeenOriginLiteral != null && lastSeenOriginLiteral.equals(currentOriginLiteral)) continue;
                toKeepOriginLiterals.add(startPosition);
                lastSeenOriginLiteral = currentOriginLiteral;
            }
            return toKeepOriginLiterals;
        }
    }

    public static final class FindingPropertyNames {
        public static final String CLONE_LENGTH = "Clone Length";
        public static final String NUMBER_OF_CLONES = "Number of Clones";
        public static final String POTENTIAL_SAVINGS = "Potential Savings";
        public static final String CLONED_CHARS = "Cloned Characters";
        public static final String ARCHITECTURE_COMPONENT = "Architecture Component";
    }
}

