aboutsummaryrefslogtreecommitdiff
path: root/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2013-04-08 18:45:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2013-04-08 18:45:10 +0000
commit809500fc2c13c8173a16b052304d983864e4a1e1 (patch)
tree4fc2f184c499d106f29a386c452b49e5197bf63d /utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
parentbe7c9ec198dcdb5bf73a35bfbb00b3333cb87909 (diff)
downloadsrc-809500fc2c13c8173a16b052304d983864e4a1e1.tar.gz
src-809500fc2c13c8173a16b052304d983864e4a1e1.zip
Vendor import of clang trunk r178860:vendor/clang/clang-trunk-r178860
Notes
Notes: svn path=/vendor/clang/dist/; revision=249261 svn path=/vendor/clang/clang-trunk-r178860/; revision=249262; tag=vendor/clang/clang-trunk-r178860
Diffstat (limited to 'utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp')
-rw-r--r--utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp85
1 files changed, 85 insertions, 0 deletions
diff --git a/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
new file mode 100644
index 000000000000..bfdb268b63ba
--- /dev/null
+++ b/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
@@ -0,0 +1,85 @@
+//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits an fficient function to translate HTML named
+// character references to UTF-8 sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <vector>
+
+using namespace llvm;
+
+/// \brief Convert a code point to the corresponding UTF-8 sequence represented
+/// as a C string literal.
+///
+/// \returns true on success.
+static bool translateCodePointToUTF8(unsigned CodePoint,
+ SmallVectorImpl<char> &CLiteral) {
+ char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+ char *TranslatedPtr = Translated;
+ if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
+ return false;
+
+ StringRef UTF8(Translated, TranslatedPtr - Translated);
+
+ raw_svector_ostream OS(CLiteral);
+ OS << "\"";
+ for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
+ OS << "\\x";
+ OS.write_hex(static_cast<unsigned char>(UTF8[i]));
+ }
+ OS << "\"";
+
+ return true;
+}
+
+namespace clang {
+void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
+ raw_ostream &OS) {
+ std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
+ std::vector<StringMatcher::StringPair> NameToUTF8;
+ SmallString<32> CLiteral;
+ for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
+ I != E; ++I) {
+ Record &Tag = **I;
+ std::string Spelling = Tag.getValueAsString("Spelling");
+ uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
+ CLiteral.clear();
+ CLiteral.append("return ");
+ if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
+ SrcMgr.PrintMessage(Tag.getLoc().front(),
+ SourceMgr::DK_Error,
+ Twine("invalid code point"));
+ continue;
+ }
+ CLiteral.append(";");
+
+ StringMatcher::StringPair Match(Spelling, CLiteral.str());
+ NameToUTF8.push_back(Match);
+ }
+
+ emitSourceFileHeader("HTML named character reference to UTF-8 "
+ "translation", OS);
+
+ OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
+ " StringRef Name) {\n";
+ StringMatcher("Name", NameToUTF8, OS).Emit();
+ OS << " return StringRef();\n"
+ << "}\n\n";
+}
+
+} // end namespace clang
+