diff options
2272 files changed, 97497 insertions, 30871 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 75b80757038d..4a8d3339df77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -402,6 +402,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) FILES_MATCHING PATTERN "CMakeFiles" EXCLUDE PATTERN "*.inc" + PATTERN "*.h" ) endif() @@ -440,6 +441,11 @@ if(CLANG_ENABLE_STATIC_ANALYZER) add_definitions(-DCLANG_ENABLE_STATIC_ANALYZER) endif() +set(OPENMP_DEFAULT_LIB "" CACHE STRING "OpenMP library used by default for -fopenmp.") +if(OPENMP_DEFAULT_LIB) + add_definitions(-DOPENMP_DEFAULT_LIB=${OPENMP_DEFAULT_LIB}) +endif() + # Clang version information set(CLANG_EXECUTABLE_VERSION "${CLANG_VERSION_MAJOR}.${CLANG_VERSION_MINOR}" CACHE STRING diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index b58014fee1aa..905303fe11c2 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -12,6 +12,10 @@ N: Aaron Ballman E: aaron@aaronballman.com D: Clang attributes +N: Alexey Bataev +E: a.bataev@hotmail.com +D: OpenMP support + N: Chandler Carruth E: chandlerc@gmail.com E: chandlerc@google.com @@ -23,7 +27,11 @@ D: Debug Information, autotools/configure/make build, inline assembly N: Doug Gregor E: dgregor@apple.com -D: All parts of Clang not covered by someone else +D: Emeritus owner + +N: Reid Kleckner +E: rnk@google.com +D: Microsoft C++ ABI compatibility and general Windows support N: Anton Korobeynikov E: anton@korobeynikov.info @@ -43,4 +51,4 @@ D: Compiler driver N: Richard Smith E: richard@metafoo.co.uk -D: Clang Semantic Analysis (tools/clang/lib/Sema/* tools/clang/include/clang/Sema/*) +D: All parts of Clang not covered by someone else diff --git a/LICENSE.TXT b/LICENSE.TXT index 3b1153db44e8..fc4afae584b0 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -4,7 +4,7 @@ LLVM Release License University of Illinois/NCSA Open Source License -Copyright (c) 2007-2014 University of Illinois at Urbana-Champaign. +Copyright (c) 2007-2015 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py index 5792effea59b..f5caca8572cb 100644 --- a/bindings/python/clang/cindex.py +++ b/bindings/python/clang/cindex.py @@ -1476,6 +1476,18 @@ class Cursor(Structure): """ return TokenGroup.get_tokens(self._tu, self.extent) + def get_field_offsetof(self): + """Returns the offsetof the FIELD_DECL pointed by this Cursor.""" + return conf.lib.clang_Cursor_getOffsetOfField(self) + + def is_anonymous(self): + """ + Check if the record is anonymous. + """ + if self.kind == CursorKind.FIELD_DECL: + return self.type.get_declaration().is_anonymous() + return conf.lib.clang_Cursor_isAnonymous(self) + def is_bitfield(self): """ Check if the field is a bitfield. @@ -1884,6 +1896,21 @@ class Type(Structure): return RefQualifierKind.from_id( conf.lib.clang_Type_getCXXRefQualifier(self)) + def get_fields(self): + """Return an iterator for accessing the fields of this type.""" + + def visitor(field, children): + assert field != conf.lib.clang_getNullCursor() + + # Create reference to TU so it isn't GC'd before Cursor. + field._tu = self._tu + fields.append(field) + return 1 # continue + fields = [] + conf.lib.clang_Type_visitFields(self, + callbacks['fields_visit'](visitor), fields) + return iter(fields) + @property def spelling(self): """Retrieve the spelling of this Type.""" @@ -2780,6 +2807,7 @@ class Token(Structure): callbacks['translation_unit_includes'] = CFUNCTYPE(None, c_object_p, POINTER(SourceLocation), c_uint, py_object) callbacks['cursor_visit'] = CFUNCTYPE(c_int, Cursor, Cursor, py_object) +callbacks['fields_visit'] = CFUNCTYPE(c_int, Cursor, py_object) # Functions strictly alphabetical order. functionList = [ @@ -3367,6 +3395,10 @@ functionList = [ [Cursor, c_uint], c_ulonglong), + ("clang_Cursor_isAnonymous", + [Cursor], + bool), + ("clang_Cursor_isBitField", [Cursor], bool), @@ -3381,6 +3413,10 @@ functionList = [ _CXString, _CXString.from_result), + ("clang_Cursor_getOffsetOfField", + [Cursor], + c_longlong), + ("clang_Type_getAlignOf", [Type], c_longlong), @@ -3401,6 +3437,10 @@ functionList = [ ("clang_Type_getCXXRefQualifier", [Type], c_uint), + + ("clang_Type_visitFields", + [Type, callbacks['fields_visit'], py_object], + c_uint), ] class LibclangError(Exception): diff --git a/bindings/python/tests/cindex/test_type.py b/bindings/python/tests/cindex/test_type.py index a02c06fe5a13..f3dadf999bd8 100644 --- a/bindings/python/tests/cindex/test_type.py +++ b/bindings/python/tests/cindex/test_type.py @@ -363,6 +363,7 @@ def test_offset(): """Ensure Cursor.get_record_field_offset works in anonymous records""" source=""" struct Test { + struct {int a;} typeanon; struct { int bariton; union { @@ -371,15 +372,23 @@ struct Test { }; int bar; };""" - tries=[(['-target','i386-linux-gnu'],(4,16,0,32,64)), - (['-target','nvptx64-unknown-unknown'],(8,24,0,32,64)), - (['-target','i386-pc-win32'],(8,16,0,32,64)), - (['-target','msp430-none-none'],(2,14,0,32,64))] + tries=[(['-target','i386-linux-gnu'],(4,16,0,32,64,96)), + (['-target','nvptx64-unknown-unknown'],(8,24,0,32,64,96)), + (['-target','i386-pc-win32'],(8,16,0,32,64,96)), + (['-target','msp430-none-none'],(2,14,0,32,64,96))] for flags, values in tries: - align,total,bariton,foo,bar = values + align,total,f1,bariton,foo,bar = values tu = get_tu(source) teststruct = get_cursor(tu, 'Test') - fields = list(teststruct.get_children()) + children = list(teststruct.get_children()) + fields = list(teststruct.type.get_fields()) + assert children[0].kind == CursorKind.STRUCT_DECL + assert children[0].spelling != "typeanon" + assert children[1].spelling == "typeanon" + assert fields[0].kind == CursorKind.FIELD_DECL + assert fields[1].kind == CursorKind.FIELD_DECL + assert fields[1].is_anonymous() + assert teststruct.type.get_offset("typeanon") == f1 assert teststruct.type.get_offset("bariton") == bariton assert teststruct.type.get_offset("foo") == foo assert teststruct.type.get_offset("bar") == bar diff --git a/docs/AddressSanitizer.rst b/docs/AddressSanitizer.rst index cbdd7c65e847..617543334d05 100644 --- a/docs/AddressSanitizer.rst +++ b/docs/AddressSanitizer.rst @@ -23,8 +23,7 @@ Typical slowdown introduced by AddressSanitizer is **2x**. How to build ============ -Follow the `clang build instructions <../get_started.html>`_. CMake build is -supported. +Build LLVM/Clang with `CMake <http://llvm.org/docs/CMake.html>`_. Usage ===== diff --git a/docs/AutomaticReferenceCounting.rst b/docs/AutomaticReferenceCounting.rst index 1457b6082d15..2faed2379164 100644 --- a/docs/AutomaticReferenceCounting.rst +++ b/docs/AutomaticReferenceCounting.rst @@ -594,7 +594,9 @@ retainable pointer type <arc.misc.c-retainable>` and it is: * a message send, and the declared method either has the ``cf_returns_not_retained`` attribute or it has neither the ``cf_returns_retained`` attribute nor a :ref:`selector family - <arc.method-families>` that implies a retained result. + <arc.method-families>` that implies a retained result, or +* :when-revised:`[beginning LLVM 3.6]` :revision:`a load from a` ``const`` + :revision:`non-system global variable.` An expression is :arc-term:`known retained` if it is an rvalue of :ref:`C retainable pointer type <arc.misc.c-retainable>` and it is: @@ -631,6 +633,12 @@ retain-agnostic, the conversion is treated as a ``__bridge`` cast. to an ObjC-typed local, and then calling ``CFRelease`` when done --- are a bit too likely to be accidentally accepted, leading to mysterious behavior. + For loads from ``const`` global variables of :ref:`C retainable pointer type + <arc.misc.c-retainable>`, it is reasonable to assume that global system + constants were initialitzed with true constants (e.g. string literals), but + user constants might have been initialized with something dynamically + allocated, using a global initializer. + .. _arc.objects.restrictions.conversion-exception-contextual: Conversion from retainable object pointer type in certain contexts diff --git a/docs/ClangFormatStyleOptions.rst b/docs/ClangFormatStyleOptions.rst index ce6fae19c09b..c06a8c76ecd5 100644 --- a/docs/ClangFormatStyleOptions.rst +++ b/docs/ClangFormatStyleOptions.rst @@ -155,10 +155,23 @@ the configuration (without a prefix: ``Auto``). This applies to round brackets (parentheses), angle brackets and square brackets. This will result in formattings like - \code - someLongFunction(argument1, - argument2); - \endcode + + .. code-block:: c++ + + someLongFunction(argument1, + argument2); + +**AlignConsecutiveAssignments** (``bool``) + If ``true``, aligns consecutive assignments. + + This will align the assignment operators of consecutive lines. This + will result in formattings like + + .. code-block:: c++ + + int aaaa = 12; + int b = 23; + int ccc = 23; **AlignEscapedNewlinesLeft** (``bool``) If ``true``, aligns escaped newlines as far left as possible. @@ -330,10 +343,11 @@ the configuration (without a prefix: ``Auto``). instead of as function calls. These are expected to be macros of the form: - \code - FOREACH(<variable-declaration>, ...) - <loop-body> - \endcode + + .. code-block:: c++ + + FOREACH(<variable-declaration>, ...) + <loop-body> For example: BOOST_FOREACH. diff --git a/docs/ControlFlowIntegrity.rst b/docs/ControlFlowIntegrity.rst new file mode 100644 index 000000000000..915385b7b197 --- /dev/null +++ b/docs/ControlFlowIntegrity.rst @@ -0,0 +1,145 @@ +====================== +Control Flow Integrity +====================== + +.. toctree:: + :hidden: + + ControlFlowIntegrityDesign + +.. contents:: + :local: + +Introduction +============ + +Clang includes an implementation of a number of control flow integrity (CFI) +schemes, which are designed to abort the program upon detecting certain forms +of undefined behavior that can potentially allow attackers to subvert the +program's control flow. These schemes have been optimized for performance, +allowing developers to enable them in release builds. + +To enable Clang's available CFI schemes, use the flag ``-fsanitize=cfi``. +As currently implemented, CFI relies on link-time optimization (LTO); the CFI +schemes imply ``-flto``, and the linker used must support LTO, for example +via the `gold plugin`_. To allow the checks to be implemented efficiently, +the program must be structured such that certain object files are compiled +with CFI enabled, and are statically linked into the program. This may +preclude the use of shared libraries in some cases. + +Clang currently implements forward-edge CFI for member function calls and +bad cast checking. More schemes are under development. + +.. _gold plugin: http://llvm.org/docs/GoldPlugin.html + +Forward-Edge CFI for Virtual Calls +---------------------------------- + +This scheme checks that virtual calls take place using a vptr of the correct +dynamic type; that is, the dynamic type of the called object must be a +derived class of the static type of the object used to make the call. +This CFI scheme can be enabled on its own using ``-fsanitize=cfi-vcall``. + +For this scheme to work, all translation units containing the definition +of a virtual member function (whether inline or not) must be compiled +with ``-fsanitize=cfi-vcall`` enabled and be statically linked into the +program. Classes in the C++ standard library (under namespace ``std``) are +exempted from checking, and therefore programs may be linked against a +pre-built standard library, but this may change in the future. + +Performance +~~~~~~~~~~~ + +A performance overhead of less than 1% has been measured by running the +Dromaeo benchmark suite against an instrumented version of the Chromium +web browser. Another good performance benchmark for this mechanism is the +virtual-call-heavy SPEC 2006 xalancbmk. + +Note that this scheme has not yet been optimized for binary size; an increase +of up to 15% has been observed for Chromium. + +Bad Cast Checking +----------------- + +This scheme checks that pointer casts are made to an object of the correct +dynamic type; that is, the dynamic type of the object must be a derived class +of the pointee type of the cast. The checks are currently only introduced +where the class being casted to is a polymorphic class. + +Bad casts are not in themselves control flow integrity violations, but they +can also create security vulnerabilities, and the implementation uses many +of the same mechanisms. + +There are two types of bad cast that may be forbidden: bad casts +from a base class to a derived class (which can be checked with +``-fsanitize=cfi-derived-cast``), and bad casts from a pointer of +type ``void*`` or another unrelated type (which can be checked with +``-fsanitize=cfi-unrelated-cast``). + +The difference between these two types of casts is that the first is defined +by the C++ standard to produce an undefined value, while the second is not +in itself undefined behavior (it is well defined to cast the pointer back +to its original type). + +If a program as a matter of policy forbids the second type of cast, that +restriction can normally be enforced. However it may in some cases be necessary +for a function to perform a forbidden cast to conform with an external API +(e.g. the ``allocate`` member function of a standard library allocator). Such +functions may be blacklisted using a :doc:`SanitizerSpecialCaseList`. + +For this scheme to work, all translation units containing the definition +of a virtual member function (whether inline or not) must be compiled with +``-fsanitize=cfi-derived-cast`` or ``-fsanitize=cfi-unrelated-cast`` enabled +and be statically linked into the program. Classes in the C++ standard library +(under namespace ``std``) are exempted from checking, and therefore programs +may be linked against a pre-built standard library, but this may change in +the future. + +Non-Virtual Member Function Call Checking +----------------------------------------- + +This scheme checks that non-virtual calls take place using an object of +the correct dynamic type; that is, the dynamic type of the called object +must be a derived class of the static type of the object used to make the +call. The checks are currently only introduced where the object is of a +polymorphic class type. This CFI scheme can be enabled on its own using +``-fsanitize=cfi-nvcall``. + +For this scheme to work, all translation units containing the definition +of a virtual member function (whether inline or not) must be compiled +with ``-fsanitize=cfi-nvcall`` enabled and be statically linked into the +program. Classes in the C++ standard library (under namespace ``std``) are +exempted from checking, and therefore programs may be linked against a +pre-built standard library, but this may change in the future. + +.. _cfi-strictness: + +Strictness +~~~~~~~~~~ + +If a class has a single non-virtual base and does not introduce or override +virtual member functions or fields other than an implicitly defined virtual +destructor, it will have the same layout and virtual function semantics as +its base. By default, casts to such classes are checked as if they were made +to the least derived such class. + +Casting an instance of a base class to such a derived class is technically +undefined behavior, but it is a relatively common hack for introducing +member functions on class instances with specific properties that works under +most compilers and should not have security implications, so we allow it by +default. It can be disabled with ``-fsanitize=cfi-cast-strict``. + +Design +------ + +Please refer to the :doc:`design document<ControlFlowIntegrityDesign>`. + +Publications +------------ + +`Control-Flow Integrity: Principles, Implementations, and Applications <http://research.microsoft.com/pubs/64250/ccs05.pdf>`_. +Martin Abadi, Mihai Budiu, Úlfar Erlingsson, Jay Ligatti. + +`Enforcing Forward-Edge Control-Flow Integrity in GCC & LLVM <http://www.pcc.me.uk/~peter/acad/usenix14.pdf>`_. +Caroline Tice, Tom Roeder, Peter Collingbourne, Stephen Checkoway, +Úlfar Erlingsson, Luis Lozano, Geoff Pike. diff --git a/docs/ControlFlowIntegrityDesign.rst b/docs/ControlFlowIntegrityDesign.rst new file mode 100644 index 000000000000..89aa038d003e --- /dev/null +++ b/docs/ControlFlowIntegrityDesign.rst @@ -0,0 +1,275 @@ +=========================================== +Control Flow Integrity Design Documentation +=========================================== + +This page documents the design of the :doc:`ControlFlowIntegrity` schemes +supported by Clang. + +Forward-Edge CFI for Virtual Calls +================================== + +This scheme works by allocating, for each static type used to make a virtual +call, a region of read-only storage in the object file holding a bit vector +that maps onto to the region of storage used for those virtual tables. Each +set bit in the bit vector corresponds to the `address point`_ for a virtual +table compatible with the static type for which the bit vector is being built. + +For example, consider the following three C++ classes: + +.. code-block:: c++ + + struct A { + virtual void f1(); + virtual void f2(); + virtual void f3(); + }; + + struct B : A { + virtual void f1(); + virtual void f2(); + virtual void f3(); + }; + + struct C : A { + virtual void f1(); + virtual void f2(); + virtual void f3(); + }; + +The scheme will cause the virtual tables for A, B and C to be laid out +consecutively: + +.. csv-table:: Virtual Table Layout for A, B, C + :header: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + + A::offset-to-top, &A::rtti, &A::f1, &A::f2, &A::f3, B::offset-to-top, &B::rtti, &B::f1, &B::f2, &B::f3, C::offset-to-top, &C::rtti, &C::f1, &C::f2, &C::f3 + +The bit vector for static types A, B and C will look like this: + +.. csv-table:: Bit Vectors for A, B, C + :header: Class, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + + A, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 + B, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 + C, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 + +Bit vectors are represented in the object file as byte arrays. By loading +from indexed offsets into the byte array and applying a mask, a program can +test bits from the bit set with a relatively short instruction sequence. Bit +vectors may overlap so long as they use different bits. For the full details, +see the `ByteArrayBuilder`_ class. + +In this case, assuming A is laid out at offset 0 in bit 0, B at offset 0 in +bit 1 and C at offset 0 in bit 2, the byte array would look like this: + +.. code-block:: c++ + + char bits[] = { 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 5, 0, 0 }; + +To emit a virtual call, the compiler will assemble code that checks that +the object's virtual table pointer is in-bounds and aligned and that the +relevant bit is set in the bit vector. + +For example on x86 a typical virtual call may look like this: + +.. code-block:: none + + ca7fbb: 48 8b 0f mov (%rdi),%rcx + ca7fbe: 48 8d 15 c3 42 fb 07 lea 0x7fb42c3(%rip),%rdx + ca7fc5: 48 89 c8 mov %rcx,%rax + ca7fc8: 48 29 d0 sub %rdx,%rax + ca7fcb: 48 c1 c0 3d rol $0x3d,%rax + ca7fcf: 48 3d 7f 01 00 00 cmp $0x17f,%rax + ca7fd5: 0f 87 36 05 00 00 ja ca8511 + ca7fdb: 48 8d 15 c0 0b f7 06 lea 0x6f70bc0(%rip),%rdx + ca7fe2: f6 04 10 10 testb $0x10,(%rax,%rdx,1) + ca7fe6: 0f 84 25 05 00 00 je ca8511 + ca7fec: ff 91 98 00 00 00 callq *0x98(%rcx) + [...] + ca8511: 0f 0b ud2 + +The compiler relies on co-operation from the linker in order to assemble +the bit vectors for the whole program. It currently does this using LLVM's +`bit sets`_ mechanism together with link-time optimization. + +.. _address point: https://mentorembedded.github.io/cxx-abi/abi.html#vtable-general +.. _bit sets: http://llvm.org/docs/BitSets.html +.. _ByteArrayBuilder: http://llvm.org/docs/doxygen/html/structllvm_1_1ByteArrayBuilder.html + +Optimizations +------------- + +The scheme as described above is the fully general variant of the scheme. +Most of the time we are able to apply one or more of the following +optimizations to improve binary size or performance. + +In fact, if you try the above example with the current version of the +compiler, you will probably find that it will not use the described virtual +table layout or machine instructions. Some of the optimizations we are about +to introduce cause the compiler to use a different layout or a different +sequence of machine instructions. + +Stripping Leading/Trailing Zeros in Bit Vectors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a bit vector contains leading or trailing zeros, we can strip them from +the vector. The compiler will emit code to check if the pointer is in range +of the region covered by ones, and perform the bit vector check using a +truncated version of the bit vector. For example, the bit vectors for our +example class hierarchy will be emitted like this: + +.. csv-table:: Bit Vectors for A, B, C + :header: Class, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + + A, , , 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, , + B, , , , , , , , 1, , , , , , , + C, , , , , , , , , , , , , 1, , + +Short Inline Bit Vectors +~~~~~~~~~~~~~~~~~~~~~~~~ + +If the vector is sufficiently short, we can represent it as an inline constant +on x86. This saves us a few instructions when reading the correct element +of the bit vector. + +If the bit vector fits in 32 bits, the code looks like this: + +.. code-block:: none + + dc2: 48 8b 03 mov (%rbx),%rax + dc5: 48 8d 15 14 1e 00 00 lea 0x1e14(%rip),%rdx + dcc: 48 89 c1 mov %rax,%rcx + dcf: 48 29 d1 sub %rdx,%rcx + dd2: 48 c1 c1 3d rol $0x3d,%rcx + dd6: 48 83 f9 03 cmp $0x3,%rcx + dda: 77 2f ja e0b <main+0x9b> + ddc: ba 09 00 00 00 mov $0x9,%edx + de1: 0f a3 ca bt %ecx,%edx + de4: 73 25 jae e0b <main+0x9b> + de6: 48 89 df mov %rbx,%rdi + de9: ff 10 callq *(%rax) + [...] + e0b: 0f 0b ud2 + +Or if the bit vector fits in 64 bits: + +.. code-block:: none + + 11a6: 48 8b 03 mov (%rbx),%rax + 11a9: 48 8d 15 d0 28 00 00 lea 0x28d0(%rip),%rdx + 11b0: 48 89 c1 mov %rax,%rcx + 11b3: 48 29 d1 sub %rdx,%rcx + 11b6: 48 c1 c1 3d rol $0x3d,%rcx + 11ba: 48 83 f9 2a cmp $0x2a,%rcx + 11be: 77 35 ja 11f5 <main+0xb5> + 11c0: 48 ba 09 00 00 00 00 movabs $0x40000000009,%rdx + 11c7: 04 00 00 + 11ca: 48 0f a3 ca bt %rcx,%rdx + 11ce: 73 25 jae 11f5 <main+0xb5> + 11d0: 48 89 df mov %rbx,%rdi + 11d3: ff 10 callq *(%rax) + [...] + 11f5: 0f 0b ud2 + +If the bit vector consists of a single bit, there is only one possible +virtual table, and the check can consist of a single equality comparison: + +.. code-block:: none + + 9a2: 48 8b 03 mov (%rbx),%rax + 9a5: 48 8d 0d a4 13 00 00 lea 0x13a4(%rip),%rcx + 9ac: 48 39 c8 cmp %rcx,%rax + 9af: 75 25 jne 9d6 <main+0x86> + 9b1: 48 89 df mov %rbx,%rdi + 9b4: ff 10 callq *(%rax) + [...] + 9d6: 0f 0b ud2 + +Virtual Table Layout +~~~~~~~~~~~~~~~~~~~~ + +The compiler lays out classes of disjoint hierarchies in separate regions +of the object file. At worst, bit vectors in disjoint hierarchies only +need to cover their disjoint hierarchy. But the closer that classes in +sub-hierarchies are laid out to each other, the smaller the bit vectors for +those sub-hierarchies need to be (see "Stripping Leading/Trailing Zeros in Bit +Vectors" above). The `GlobalLayoutBuilder`_ class is responsible for laying +out the globals efficiently to minimize the sizes of the underlying bitsets. + +.. _GlobalLayoutBuilder: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/LowerBitSets.h?view=markup + +Alignment +~~~~~~~~~ + +If all gaps between address points in a particular bit vector are multiples +of powers of 2, the compiler can compress the bit vector by strengthening +the alignment requirements of the virtual table pointer. For example, given +this class hierarchy: + +.. code-block:: c++ + + struct A { + virtual void f1(); + virtual void f2(); + }; + + struct B : A { + virtual void f1(); + virtual void f2(); + virtual void f3(); + virtual void f4(); + virtual void f5(); + virtual void f6(); + }; + + struct C : A { + virtual void f1(); + virtual void f2(); + }; + +The virtual tables will be laid out like this: + +.. csv-table:: Virtual Table Layout for A, B, C + :header: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + + A::offset-to-top, &A::rtti, &A::f1, &A::f2, B::offset-to-top, &B::rtti, &B::f1, &B::f2, &B::f3, &B::f4, &B::f5, &B::f6, C::offset-to-top, &C::rtti, &C::f1, &C::f2 + +Notice that each address point for A is separated by 4 words. This lets us +emit a compressed bit vector for A that looks like this: + +.. csv-table:: + :header: 2, 6, 10, 14 + + 1, 1, 0, 1 + +At call sites, the compiler will strengthen the alignment requirements by +using a different rotate count. For example, on a 64-bit machine where the +address points are 4-word aligned (as in A from our example), the ``rol`` +instruction may look like this: + +.. code-block:: none + + dd2: 48 c1 c1 3b rol $0x3b,%rcx + +Padding to Powers of 2 +~~~~~~~~~~~~~~~~~~~~~~ + +Of course, this alignment scheme works best if the address points are +in fact aligned correctly. To make this more likely to happen, we insert +padding between virtual tables that in many cases aligns address points to +a power of 2. Specifically, our padding aligns virtual tables to the next +highest power of 2 bytes; because address points for specific base classes +normally appear at fixed offsets within the virtual table, this normally +has the effect of aligning the address points as well. + +This scheme introduces tradeoffs between decreased space overhead for +instructions and bit vectors and increased overhead in the form of padding. We +therefore limit the amount of padding so that we align to no more than 128 +bytes. This number was found experimentally to provide a good tradeoff. + +Eliminating Bit Vector Checks for All-Ones Bit Vectors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the bit vector is all ones, the bit vector check is redundant; we simply +need to check that the address is in range and well aligned. This is more +likely to occur if the virtual tables are padded. diff --git a/docs/InternalsManual.rst b/docs/InternalsManual.rst index 502cae44b336..7f2a8fafccc0 100644 --- a/docs/InternalsManual.rst +++ b/docs/InternalsManual.rst @@ -1323,11 +1323,13 @@ range of iterators over declarations of "``f``". ``DeclContext`` manages multiply-defined declaration contexts internally. The function ``DeclContext::getPrimaryContext`` retrieves the "primary" context for a given ``DeclContext`` instance, which is the ``DeclContext`` responsible for -maintaining the lookup table used for the semantics-centric view. Given the -primary context, one can follow the chain of ``DeclContext`` nodes that define -additional declarations via ``DeclContext::getNextContext``. Note that these -functions are used internally within the lookup and insertion methods of the -``DeclContext``, so the vast majority of clients can ignore them. +maintaining the lookup table used for the semantics-centric view. Given a +DeclContext, one can obtain the set of declaration contexts that are semanticaly +connected to this declaration context, in source order, including this context +(which will be the only result, for non-namespace contexts) via +``DeclContext::collectAllContexts``. Note that these functions are used +internally within the lookup and insertion methods of the ``DeclContext``, so +the vast majority of clients can ignore them. .. _CFG: @@ -1619,192 +1621,304 @@ How to change Clang How to add an attribute ----------------------- +Attributes are a form of metadata that can be attached to a program construct, +allowing the programmer to pass semantic information along to the compiler for +various uses. For example, attributes may be used to alter the code generation +for a program construct, or to provide extra semantic information for static +analysis. This document explains how to add a custom attribute to Clang. +Documentation on existing attributes can be found `here +<//clang.llvm.org/docs/AttributeReference.html>`_. Attribute Basics ^^^^^^^^^^^^^^^^ - -Attributes in clang come in two forms: parsed form, and semantic form. Both -forms are represented via a tablegen definition of the attribute, specified in -Attr.td. +Attributes in Clang are handled in three stages: parsing into a parsed attribute +representation, conversion from a parsed attribute into a semantic attribute, +and then the semantic handling of the attribute. + +Parsing of the attribute is determined by the various syntactic forms attributes +can take, such as GNU, C++11, and Microsoft style attributes, as well as other +information provided by the table definition of the attribute. Ultimately, the +parsed representation of an attribute object is an ``AttributeList`` object. +These parsed attributes chain together as a list of parsed attributes attached +to a declarator or declaration specifier. The parsing of attributes is handled +automatically by Clang, except for attributes spelled as keywords. When +implementing a keyword attribute, the parsing of the keyword and creation of the +``AttributeList`` object must be done manually. + +Eventually, ``Sema::ProcessDeclAttributeList()`` is called with a ``Decl`` and +an ``AttributeList``, at which point the parsed attribute can be transformed +into a semantic attribute. The process by which a parsed attribute is converted +into a semantic attribute depends on the attribute definition and semantic +requirements of the attribute. The end result, however, is that the semantic +attribute object is attached to the ``Decl`` object, and can be obtained by a +call to ``Decl::getAttr<T>()``. + +The structure of the semantic attribute is also governed by the attribute +definition given in Attr.td. This definition is used to automatically generate +functionality used for the implementation of the attribute, such as a class +derived from ``clang::Attr``, information for the parser to use, automated +semantic checking for some attributes, etc. ``include/clang/Basic/Attr.td`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The first step to adding a new attribute to Clang is to add its definition to +`include/clang/Basic/Attr.td +<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?view=markup>`_. +This tablegen definition must derive from the ``Attr`` (tablegen, not +semantic) type, or one of its derivatives. Most attributes will derive from the +``InheritableAttr`` type, which specifies that the attribute can be inherited by +later redeclarations of the ``Decl`` it is associated with. +``InheritableParamAttr`` is similar to ``InheritableAttr``, except that the +attribute is written on a parameter instead of a declaration. If the attribute +is intended to apply to a type instead of a declaration, such an attribute +should derive from ``TypeAttr``, and will generally not be given an AST +representation. (Note that this document does not cover the creation of type +attributes.) An attribute that inherits from ``IgnoredAttr`` is parsed, but will +generate an ignored attribute diagnostic when used, which may be useful when an +attribute is supported by another vendor but not supported by clang. + +The definition will specify several key pieces of information, such as the +semantic name of the attribute, the spellings the attribute supports, the +arguments the attribute expects, and more. Most members of the ``Attr`` tablegen +type do not require definitions in the derived definition as the default +suffice. However, every attribute must specify at least a spelling list, a +subject list, and a documentation list. + +Spellings +~~~~~~~~~ +All attributes are required to specify a spelling list that denotes the ways in +which the attribute can be spelled. For instance, a single semantic attribute +may have a keyword spelling, as well as a C++11 spelling and a GNU spelling. An +empty spelling list is also permissible and may be useful for attributes which +are created implicitly. The following spellings are accepted: + + ============ ================================================================ + Spelling Description + ============ ================================================================ + ``GNU`` Spelled with a GNU-style ``__attribute__((attr))`` syntax and + placement. + ``CXX11`` Spelled with a C++-style ``[[attr]]`` syntax. If the attribute + is meant to be used by Clang, it should set the namespace to + ``"clang"``. + ``Declspec`` Spelled with a Microsoft-style ``__declspec(attr)`` syntax. + ``Keyword`` The attribute is spelled as a keyword, and required custom + parsing. + ``GCC`` Specifies two spellings: the first is a GNU-style spelling, and + the second is a C++-style spelling with the ``gnu`` namespace. + Attributes should only specify this spelling for attributes + supported by GCC. + ``Pragma`` The attribute is spelled as a ``#pragma``, and requires custom + processing within the preprocessor. If the attribute is meant to + be used by Clang, it should set the namespace to ``"clang"``. + Note that this spelling is not used for declaration attributes. + ============ ================================================================ + +Subjects +~~~~~~~~ +Attributes appertain to one or more ``Decl`` subjects. If the attribute attempts +to attach to a subject that is not in the subject list, a diagnostic is issued +automatically. Whether the diagnostic is a warning or an error depends on how +the attribute's ``SubjectList`` is defined, but the default behavior is to warn. +The diagnostics displayed to the user are automatically determined based on the +subjects in the list, but a custom diagnostic parameter can also be specified in +the ``SubjectList``. The diagnostics generated for subject list violations are +either ``diag::warn_attribute_wrong_decl_type`` or +``diag::err_attribute_wrong_decl_type``, and the parameter enumeration is found +in `include/clang/Sema/AttributeList.h +<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/AttributeList.h?view=markup>`_ +If a previously unused Decl node is added to the ``SubjectList``, the logic used +to automatically determine the diagnostic parameter in `utils/TableGen/ClangAttrEmitter.cpp +<http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp?view=markup>`_ +may need to be updated. + +By default, all subjects in the SubjectList must either be a Decl node defined +in ``DeclNodes.td``, or a statement node defined in ``StmtNodes.td``. However, +more complex subjects can be created by creating a ``SubsetSubject`` object. +Each such object has a base subject which it appertains to (which must be a +Decl or Stmt node, and not a SubsetSubject node), and some custom code which is +called when determining whether an attribute appertains to the subject. For +instance, a ``NonBitField`` SubsetSubject appertains to a ``FieldDecl``, and +tests whether the given FieldDecl is a bit field. When a SubsetSubject is +specified in a SubjectList, a custom diagnostic parameter must also be provided. -First, add your attribute to the `include/clang/Basic/Attr.td -<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?view=markup>`_ -file. - -Each attribute gets a ``def`` inheriting from ``Attr`` or one of its -subclasses. ``InheritableAttr`` means that the attribute also applies to -subsequent declarations of the same name. ``InheritableParamAttr`` is similar -to ``InheritableAttr``, except that the attribute is written on a parameter -instead of a declaration, type or statement. Attributes inheriting from -``TypeAttr`` are pure type attributes which generally are not given a -representation in the AST. Attributes inheriting from ``TargetSpecificAttr`` -are attributes specific to one or more target architectures. An attribute that -inherits from ``IgnoredAttr`` is parsed, but will generate an ignored attribute -diagnostic when used. The attribute type may be useful when an attribute is -supported by another vendor, but not supported by clang. - -``Spellings`` lists the strings that can appear in ``__attribute__((here))`` or -``[[here]]``. All such strings will be synonymous. Possible ``Spellings`` -are: ``GNU`` (for use with GNU-style __attribute__ spellings), ``Declspec`` -(for use with Microsoft Visual Studio-style __declspec spellings), ``CXX11` -(for use with C++11-style [[foo]] and [[foo::bar]] spellings), and ``Keyword`` -(for use with attributes that are implemented as keywords, like C++11's -``override`` or ``final``). If you want to allow the ``[[]]`` C++11 syntax, you -have to define a list of ``Namespaces``, which will let users write -``[[namespace::spelling]]``. Using the empty string for a namespace will allow -users to write just the spelling with no "``::``". Attributes which g++-4.8 -or later accepts should also have a ``CXX11<"gnu", "spelling">`` spelling. - -``Subjects`` restricts what kinds of AST node to which this attribute can -appertain (roughly, attach). The subjects are specified via a ``SubjectList``, -which specify the list of subjects. Additionally, subject-related diagnostics -can be specified to be warnings or errors, with the default being a warning. -The diagnostics displayed to the user are automatically determined based on -the subjects in the list, but a custom diagnostic parameter can also be -specified in the ``SubjectList``. The diagnostics generated for subject list -violations are either ``diag::warn_attribute_wrong_decl_type`` or -``diag::err_attribute_wrong_decl_type``, and the parameter enumeration is -found in `include/clang/Sema/AttributeList.h -<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/AttributeList.h?view=markup>`_ -If you add new Decl nodes to the ``SubjectList``, you may need to update the -logic used to automatically determine the diagnostic parameter in `utils/TableGen/ClangAttrEmitter.cpp -<http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp?view=markup>`_. - -Diagnostic checking for attribute subject lists is automated except when +Diagnostic checking for attribute subject lists is automated except when ``HasCustomParsing`` is set to ``1``. -By default, all subjects in the SubjectList must either be a Decl node defined -in ``DeclNodes.td``, or a statement node defined in ``StmtNodes.td``. However, -more complex subjects can be created by creating a ``SubsetSubject`` object. -Each such object has a base subject which it appertains to (which must be a -Decl or Stmt node, and not a SubsetSubject node), and some custom code which is -called when determining whether an attribute appertains to the subject. For -instance, a ``NonBitField`` SubsetSubject appertains to a ``FieldDecl``, and -tests whether the given FieldDecl is a bit field. When a SubsetSubject is -specified in a SubjectList, a custom diagnostic parameter must also be provided. - -``Args`` names the arguments the attribute takes, in order. If ``Args`` is +Documentation +~~~~~~~~~~~~~ +All attributes must have some form of documentation associated with them. +Documentation is table generated on the public web server by a server-side +process that runs daily. Generally, the documentation for an attribute is a +stand-alone definition in `include/clang/Basic/AttrDocs.td +<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AttdDocs.td?view=markup>`_ +that is named after the attribute being documented. + +If the attribute is not for public consumption, or is an implicitly-created +attribute that has no visible spelling, the documentation list can specify the +``Undocumented`` object. Otherwise, the attribute should have its documentation +added to AttrDocs.td. + +Documentation derives from the ``Documentation`` tablegen type. All derived +types must specify a documentation category and the actual documentation itself. +Additionally, it can specify a custom heading for the attribute, though a +default heading will be chosen when possible. + +There are four predefined documentation categories: ``DocCatFunction`` for +attributes that appertain to function-like subjects, ``DocCatVariable`` for +attributes that appertain to variable-like subjects, ``DocCatType`` for type +attributes, and ``DocCatStmt`` for statement attributes. A custom documentation +category should be used for groups of attributes with similar functionality. +Custom categories are good for providing overview information for the attributes +grouped under it. For instance, the consumed annotation attributes define a +custom category, ``DocCatConsumed``, that explains what consumed annotations are +at a high level. + +Documentation content (whether it is for an attribute or a category) is written +using reStructuredText (RST) syntax. + +After writing the documentation for the attribute, it should be locally tested +to ensure that there are no issues generating the documentation on the server. +Local testing requires a fresh build of clang-tblgen. To generate the attribute +documentation, execute the following command:: + + clang-tblgen -gen-attr-docs -I /path/to/clang/include /path/to/clang/include/clang/Basic/Attr.td -o /path/to/clang/docs/AttributeReference.rst + +When testing locally, *do not* commit changes to ``AttributeReference.rst``. +This file is generated by the server automatically, and any changes made to this +file will be overwritten. + +Arguments +~~~~~~~~~ +Attributes may optionally specify a list of arguments that can be passed to the +attribute. Attribute arguments specify both the parsed form and the semantic +form of the attribute. For example, if ``Args`` is ``[StringArgument<"Arg1">, IntArgument<"Arg2">]`` then -``__attribute__((myattribute("Hello", 3)))`` will be a valid use. Attribute -arguments specify both the parsed form and the semantic form of the attribute. -The previous example shows an attribute which requires two attributes while -parsing, and the Attr subclass' constructor for the attribute will require a -string and integer argument. - -Diagnostic checking for argument counts is automated except when -``HasCustomParsing`` is set to ``1``, or when the attribute uses an optional or -variadic argument. Diagnostic checking for argument semantics is not automated. - -If the parsed form of the attribute is more complex, or differs from the -semantic form, the ``HasCustomParsing`` bit can be set to ``1`` for the class, -and the parsing code in `Parser::ParseGNUAttributeArgs -<http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/ParseDecl.cpp?view=markup>`_ -can be updated for the special case. Note that this only applies to arguments -with a GNU spelling -- attributes with a __declspec spelling currently ignore +``__attribute__((myattribute("Hello", 3)))`` will be a valid use; it requires +two arguments while parsing, and the Attr subclass' constructor for the +semantic attribute will require a string and integer argument. + +All arguments have a name and a flag that specifies whether the argument is +optional. The associated C++ type of the argument is determined by the argument +definition type. If the existing argument types are insufficient, new types can +be created, but it requires modifying `utils/TableGen/ClangAttrEmitter.cpp +<http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp?view=markup>`_ +to properly support the type. + +Other Properties +~~~~~~~~~~~~~~~~ +The ``Attr`` definition has other members which control the behavior of the +attribute. Many of them are special-purpose and beyond the scope of this +document, however a few deserve mention. + +If the parsed form of the attribute is more complex, or differs from the +semantic form, the ``HasCustomParsing`` bit can be set to ``1`` for the class, +and the parsing code in `Parser::ParseGNUAttributeArgs() +<http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/ParseDecl.cpp?view=markup>`_ +can be updated for the special case. Note that this only applies to arguments +with a GNU spelling -- attributes with a __declspec spelling currently ignore this flag and are handled by ``Parser::ParseMicrosoftDeclSpec``. -Custom accessors can be generated for an attribute based on the spelling list -for that attribute. For instance, if an attribute has two different spellings: -'Foo' and 'Bar', accessors can be created: -``[Accessor<"isFoo", [GNU<"Foo">]>, Accessor<"isBar", [GNU<"Bar">]>]`` -These accessors will be generated on the semantic form of the attribute, -accepting no arguments and returning a Boolean. +Note that setting this member to 1 will opt out of common attribute semantic +handling, requiring extra implementation efforts to ensure the attribute +appertains to the appropriate subject, etc. -Attributes which do not require an AST node should set the ``ASTNode`` field to -``0`` to avoid polluting the AST. Note that anything inheriting from -``TypeAttr`` or ``IgnoredAttr`` automatically do not generate an AST node. All -other attributes generate an AST node by default. The AST node is the semantic +If the attribute should not be propagated from from a template declaration to an +instantiation of the template, set the ``Clone`` member to 0. By default, all +attributes will be cloned to template instantiations. + +Attributes that do not require an AST node should set the ``ASTNode`` field to +``0`` to avoid polluting the AST. Note that anything inheriting from +``TypeAttr`` or ``IgnoredAttr`` automatically do not generate an AST node. All +other attributes generate an AST node by default. The AST node is the semantic representation of the attribute. -Attributes which do not require custom semantic handling should set the -``SemaHandler`` field to ``0``. Note that anything inheriting from -``IgnoredAttr`` automatically do not get a semantic handler. All other -attributes are assumed to use a semantic handler by default. Attributes -without a semantic handler are not given a parsed attribute Kind enumeration. - -The ``LangOpts`` field can be used to specify a list of language options -required by the attribute. For instance, all of the CUDA-specific attributes -specify ``[CUDA]`` for the ``LangOpts`` field, and when the CUDA language -option is not enabled, an "attribute ignored" warning diagnostic is emitted. -Since language options are not table generated nodes, new language options must -be created manually and should specify the spelling used by ``LangOptions`` class. - -Target-specific attribute sometimes share a spelling with other attributes in -different targets. For instance, the ARM and MSP430 targets both have an -attribute spelled ``GNU<"interrupt">``, but with different parsing and semantic -requirements. To support this feature, an attribute inheriting from -``TargetSpecificAttribute`` make specify a ``ParseKind`` field. This field -should be the same value between all arguments sharing a spelling, and -corresponds to the parsed attribute's Kind enumeration. This allows attributes -to share a parsed attribute kind, but have distinct semantic attribute classes. -For instance, ``AttributeList::AT_Interrupt`` is the shared parsed attribute -kind, but ARMInterruptAttr and MSP430InterruptAttr are the semantic attributes -generated. - -By default, when declarations are merging attributes, an attribute will not be -duplicated. However, if an attribute can be duplicated during this merging -stage, set ``DuplicatesAllowedWhileMerging`` to ``1``, and the attribute will +The ``LangOpts`` field specifies a list of language options required by the +attribute. For instance, all of the CUDA-specific attributes specify ``[CUDA]`` +for the ``LangOpts`` field, and when the CUDA language option is not enabled, an |