aboutsummaryrefslogtreecommitdiff
path: root/include/clang/Analysis/CloneDetection.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/clang/Analysis/CloneDetection.h')
-rw-r--r--include/clang/Analysis/CloneDetection.h288
1 files changed, 288 insertions, 0 deletions
diff --git a/include/clang/Analysis/CloneDetection.h b/include/clang/Analysis/CloneDetection.h
new file mode 100644
index 000000000000..51cad7a96d6f
--- /dev/null
+++ b/include/clang/Analysis/CloneDetection.h
@@ -0,0 +1,288 @@
+//===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// /file
+/// This file defines classes for searching and anlyzing source code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
+#define LLVM_CLANG_AST_CLONEDETECTION_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringMap.h"
+
+#include <vector>
+
+namespace clang {
+
+class Stmt;
+class Decl;
+class VarDecl;
+class ASTContext;
+class CompoundStmt;
+
+/// \brief Identifies a list of statements.
+///
+/// Can either identify a single arbitrary Stmt object, a continuous sequence of
+/// child statements inside a CompoundStmt or no statements at all.
+class StmtSequence {
+ /// If this object identifies a sequence of statements inside a CompoundStmt,
+ /// S points to this CompoundStmt. If this object only identifies a single
+ /// Stmt, then S is a pointer to this Stmt.
+ const Stmt *S;
+
+ /// The related ASTContext for S.
+ ASTContext *Context;
+
+ /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
+ /// instance is representing the CompoundStmt children inside the array
+ /// [StartIndex, EndIndex).
+ unsigned StartIndex;
+ unsigned EndIndex;
+
+public:
+ /// \brief Constructs a StmtSequence holding multiple statements.
+ ///
+ /// The resulting StmtSequence identifies a continuous sequence of statements
+ /// in the body of the given CompoundStmt. Which statements of the body should
+ /// be identified needs to be specified by providing a start and end index
+ /// that describe a non-empty sub-array in the body of the given CompoundStmt.
+ ///
+ /// \param Stmt A CompoundStmt that contains all statements in its body.
+ /// \param Context The ASTContext for the given CompoundStmt.
+ /// \param StartIndex The inclusive start index in the children array of
+ /// \p Stmt
+ /// \param EndIndex The exclusive end index in the children array of \p Stmt.
+ StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
+ unsigned StartIndex, unsigned EndIndex);
+
+ /// \brief Constructs a StmtSequence holding a single statement.
+ ///
+ /// \param Stmt An arbitrary Stmt.
+ /// \param Context The ASTContext for the given Stmt.
+ StmtSequence(const Stmt *Stmt, ASTContext &Context);
+
+ /// \brief Constructs an empty StmtSequence.
+ StmtSequence();
+
+ typedef const Stmt *const *iterator;
+
+ /// Returns an iterator pointing to the first statement in this sequence.
+ iterator begin() const;
+
+ /// Returns an iterator pointing behind the last statement in this sequence.
+ iterator end() const;
+
+ /// Returns the first statement in this sequence.
+ ///
+ /// This method should only be called on a non-empty StmtSequence object.
+ const Stmt *front() const {
+ assert(!empty());
+ return begin()[0];
+ }
+
+ /// Returns the last statement in this sequence.
+ ///
+ /// This method should only be called on a non-empty StmtSequence object.
+ const Stmt *back() const {
+ assert(!empty());
+ return begin()[size() - 1];
+ }
+
+ /// Returns the number of statements this object holds.
+ unsigned size() const {
+ if (holdsSequence())
+ return EndIndex - StartIndex;
+ if (S == nullptr)
+ return 0;
+ return 1;
+ }
+
+ /// Returns true if and only if this StmtSequence contains no statements.
+ bool empty() const { return size() == 0; }
+
+ /// Returns the related ASTContext for the stored Stmts.
+ ASTContext &getASTContext() const {
+ assert(Context);
+ return *Context;
+ }
+
+ /// Returns true if this objects holds a list of statements.
+ bool holdsSequence() const { return EndIndex != 0; }
+
+ /// Returns the start sourcelocation of the first statement in this sequence.
+ ///
+ /// This method should only be called on a non-empty StmtSequence object.
+ SourceLocation getStartLoc() const;
+
+ /// Returns the end sourcelocation of the last statement in this sequence.
+ ///
+ /// This method should only be called on a non-empty StmtSequence object.
+ SourceLocation getEndLoc() const;
+
+ /// Returns the source range of the whole sequence - from the beginning
+ /// of the first statement to the end of the last statement.
+ SourceRange getSourceRange() const;
+
+ bool operator==(const StmtSequence &Other) const {
+ return std::tie(S, StartIndex, EndIndex) ==
+ std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+ }
+
+ bool operator!=(const StmtSequence &Other) const {
+ return std::tie(S, StartIndex, EndIndex) !=
+ std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+ }
+
+ /// Returns true if and only if this sequence covers a source range that
+ /// contains the source range of the given sequence \p Other.
+ ///
+ /// This method should only be called on a non-empty StmtSequence object
+ /// and passed a non-empty StmtSequence object.
+ bool contains(const StmtSequence &Other) const;
+};
+
+/// \brief Searches for clones in source code.
+///
+/// First, this class needs a translation unit which is passed via
+/// \p analyzeTranslationUnit . It will then generate and store search data
+/// for all statements inside the given translation unit.
+/// Afterwards the generated data can be used to find code clones by calling
+/// \p findClones .
+///
+/// This class only searches for clones in exectuable source code
+/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
+/// are not supported.
+class CloneDetector {
+public:
+ typedef unsigned DataPiece;
+
+ /// Holds the data about a StmtSequence that is needed during the search for
+ /// code clones.
+ struct CloneSignature {
+ /// \brief The hash code of the StmtSequence.
+ ///
+ /// The initial clone groups that are formed during the search for clones
+ /// consist only of Sequences that share the same hash code. This makes this
+ /// value the central part of this heuristic that is needed to find clones
+ /// in a performant way. For this to work, the type of this variable
+ /// always needs to be small and fast to compare.
+ ///
+ /// Also, StmtSequences that are clones of each others have to share
+ /// the same hash code. StmtSequences that are not clones of each other
+ /// shouldn't share the same hash code, but if they do, it will only
+ /// degrade the performance of the hash search but doesn't influence
+ /// the correctness of the result.
+ size_t Hash;
+
+ /// \brief The complexity of the StmtSequence.
+ ///
+ /// This value gives an approximation on how many direct or indirect child
+ /// statements are contained in the related StmtSequence. In general, the
+ /// greater this value, the greater the amount of statements. However, this
+ /// is only an approximation and the actual amount of statements can be
+ /// higher or lower than this value. Statements that are generated by the
+ /// compiler (e.g. macro expansions) for example barely influence the
+ /// complexity value.
+ ///
+ /// The main purpose of this value is to filter clones that are too small
+ /// and therefore probably not interesting enough for the user.
+ unsigned Complexity;
+
+ /// \brief Creates an empty CloneSignature without any data.
+ CloneSignature() : Complexity(1) {}
+
+ CloneSignature(llvm::hash_code Hash, unsigned Complexity)
+ : Hash(Hash), Complexity(Complexity) {}
+ };
+
+ /// Holds group of StmtSequences that are clones of each other and the
+ /// complexity value (see CloneSignature::Complexity) that all stored
+ /// StmtSequences have in common.
+ struct CloneGroup {
+ std::vector<StmtSequence> Sequences;
+ CloneSignature Signature;
+
+ CloneGroup() {}
+
+ CloneGroup(const StmtSequence &Seq, CloneSignature Signature)
+ : Signature(Signature) {
+ Sequences.push_back(Seq);
+ }
+
+ /// \brief Returns false if and only if this group should be skipped when
+ /// searching for clones.
+ bool isValid() const {
+ // A clone group with only one member makes no sense, so we skip them.
+ return Sequences.size() > 1;
+ }
+ };
+
+ /// \brief Generates and stores search data for all statements in the body of
+ /// the given Decl.
+ void analyzeCodeBody(const Decl *D);
+
+ /// \brief Stores the CloneSignature to allow future querying.
+ void add(const StmtSequence &S, const CloneSignature &Signature);
+
+ /// \brief Searches the provided statements for clones.
+ ///
+ /// \param Result Output parameter that is filled with a list of found
+ /// clone groups. Each group contains multiple StmtSequences
+ /// that were identified to be clones of each other.
+ /// \param MinGroupComplexity Only return clones which have at least this
+ /// complexity value.
+ /// \param CheckPatterns Returns only clone groups in which the referenced
+ /// variables follow the same pattern.
+ void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity,
+ bool CheckPatterns = true);
+
+ /// \brief Describes two clones that reference their variables in a different
+ /// pattern which could indicate a programming error.
+ struct SuspiciousClonePair {
+ /// \brief Utility class holding the relevant information about a single
+ /// clone in this pair.
+ struct SuspiciousCloneInfo {
+ /// The variable which referencing in this clone was against the pattern.
+ const VarDecl *Variable;
+ /// Where the variable was referenced.
+ const Stmt *Mention;
+ /// The variable that should have been referenced to follow the pattern.
+ /// If Suggestion is a nullptr then it's not possible to fix the pattern
+ /// by referencing a different variable in this clone.
+ const VarDecl *Suggestion;
+ SuspiciousCloneInfo(const VarDecl *Variable, const Stmt *Mention,
+ const VarDecl *Suggestion)
+ : Variable(Variable), Mention(Mention), Suggestion(Suggestion) {}
+ SuspiciousCloneInfo() {}
+ };
+ /// The first clone in the pair which always has a suggested variable.
+ SuspiciousCloneInfo FirstCloneInfo;
+ /// This other clone in the pair which can have a suggested variable.
+ SuspiciousCloneInfo SecondCloneInfo;
+ };
+
+ /// \brief Searches the provided statements for pairs of clones that don't
+ /// follow the same pattern when referencing variables.
+ /// \param Result Output parameter that will contain the clone pairs.
+ /// \param MinGroupComplexity Only clone pairs in which the clones have at
+ /// least this complexity value.
+ void findSuspiciousClones(std::vector<SuspiciousClonePair> &Result,
+ unsigned MinGroupComplexity);
+
+private:
+ /// Stores all encountered StmtSequences alongside their CloneSignature.
+ std::vector<std::pair<CloneSignature, StmtSequence>> Sequences;
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_AST_CLONEDETECTION_H