src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2013-12-22 00:07:40 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2013-12-22 00:07:40 +0000
commit	bfef399519ca9b8a4b4c6b563253bad7e0eeffe0 (patch)
tree	df8df0b0067b381eab470a3b8f28d14a552a6340 /lib/Format/Encoding.h
parent	6a0372513edbc473b538d2f724efac50405d6fef (diff)
download	src-bfef399519ca9b8a4b4c6b563253bad7e0eeffe0.tar.gz src-bfef399519ca9b8a4b4c6b563253bad7e0eeffe0.zip

Vendor import of clang release_34 branch r197841 (effectively, 3.4 RC3):vendor/clang/clang-release_34-r197841

https://llvm.org/svn/llvm-project/cfe/branches/release_34@197841

Notes

Notes: svn path=/vendor/clang/dist/; revision=259701 svn path=/vendor/clang/clang-release_34-r197841/; revision=259703; tag=vendor/clang/clang-release_34-r197841

Diffstat (limited to 'lib/Format/Encoding.h')

-rw-r--r--

lib/Format/Encoding.h

144

1 files changed, 144 insertions, 0 deletions

diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h
new file mode 100644
index 000000000000..356334d5376f
--- /dev/null
+++ b/lib/Format/Encoding.h

@@ -0,0 +1,144 @@

+//===--- Encoding.h - Format C++ code -------------------------------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+///

+/// \file

+/// \brief Contains functions for text encoding manipulation. Supports UTF-8,

+/// 8-bit encodings and escape sequences in C++ string literals.

+///

+//===----------------------------------------------------------------------===//

+#ifndef LLVM_CLANG_FORMAT_ENCODING_H

+#define LLVM_CLANG_FORMAT_ENCODING_H

+#include "clang/Basic/LLVM.h"

+#include "llvm/Support/ConvertUTF.h"

+#include "llvm/Support/Unicode.h"

+namespace clang {

+namespace format {

+namespace encoding {

+enum Encoding {

+ Encoding_UTF8,

+ Encoding_Unknown // We treat all other encodings as 8-bit encodings.

+};

+/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,

+/// it is considered UTF8, otherwise we treat it as some 8-bit encoding.

+inline Encoding detectEncoding(StringRef Text) {

+ const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin());

+ const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end());

+ if (::isLegalUTF8String(&Ptr, BufEnd))

+ return Encoding_UTF8;

+ return Encoding_Unknown;

+inline unsigned getCodePointCountUTF8(StringRef Text) {

+ unsigned CodePoints = 0;

+ for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) {

+ ++CodePoints;

+ }

+ return CodePoints;

+/// \brief Gets the number of code points in the Text using the specified

+/// Encoding.

+inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {

+ switch (Encoding) {

+ case Encoding_UTF8:

+ return getCodePointCountUTF8(Text);

+ default:

+ return Text.size();

+ }

+/// \brief Returns the number of columns required to display the \p Text on a

+/// generic Unicode-capable terminal. Text is assumed to use the specified

+/// \p Encoding.

+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {

+ if (Encoding == Encoding_UTF8) {

+ int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);

+ if (ContentWidth >= 0)

+ return ContentWidth;

+ }

+ return Text.size();

+/// \brief Returns the number of columns required to display the \p Text,

+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The

+/// text is assumed to use the specified \p Encoding.

+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,

+ unsigned TabWidth, Encoding Encoding) {

+ unsigned TotalWidth = 0;

+ StringRef Tail = Text;

+ for (;;) {

+ StringRef::size_type TabPos = Tail.find('\t');

+ if (TabPos == StringRef::npos)

+ return TotalWidth + columnWidth(Tail, Encoding);

+ int Width = columnWidth(Tail.substr(0, TabPos), Encoding);

+ assert(Width >= 0);

+ TotalWidth += Width;

+ TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;

+ Tail = Tail.substr(TabPos + 1);

+ }

+/// \brief Gets the number of bytes in a sequence representing a single

+/// codepoint and starting with FirstChar in the specified Encoding.

+inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {

+ switch (Encoding) {

+ case Encoding_UTF8:

+ return getNumBytesForUTF8(FirstChar);

+ default:

+ return 1;

+ }

+inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; }

+inline bool isHexDigit(char c) {

+ return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||

+ ('A' <= c && c <= 'F');

+/// \brief Gets the length of an escape sequence inside a C++ string literal.

+/// Text should span from the beginning of the escape sequence (starting with a

+/// backslash) to the end of the string literal.

+inline unsigned getEscapeSequenceLength(StringRef Text) {

+ assert(Text[0] == '\\');

+ if (Text.size() < 2)

+ return 1;

+ switch (Text[1]) {

+ case 'u':

+ return 6;

+ case 'U':

+ return 10;

+ case 'x': {

+ unsigned I = 2; // Point after '\x'.

+ while (I < Text.size() && isHexDigit(Text[I]))

+ ++I;

+ return I;

+ }

+ default:

+ if (isOctDigit(Text[1])) {

+ unsigned I = 1;

+ while (I < Text.size() && I < 4 && isOctDigit(Text[I]))

+ ++I;

+ return I;

+ }

+ return 2;

+ }

+} // namespace encoding

+} // namespace format

+} // namespace clang

+#endif // LLVM_CLANG_FORMAT_ENCODING_H