From eac9cc9d6712c4417ce57cb0aedeb8f0a39667c5 Mon Sep 17 00:00:00 2001
From: Nguyen Anh Quynh <aquynh@gmail.com>
Date: Sat, 10 Sep 2016 02:04:54 +0800
Subject: [PATCH] support new syntax RADIX16. see issue #11

---
 include/keystone/keystone.h              | 11 ++++++-----
 llvm/include/llvm/MC/MCAsmInfo.h         |  5 +++++
 llvm/include/llvm/MC/MCParser/AsmLexer.h |  1 +
 llvm/keystone/ks.cpp                     |  7 +++++++
 llvm/lib/MC/MCParser/AsmLexer.cpp        | 11 +++++++++--
 samples/sample.c                         |  9 +++++++++
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/include/keystone/keystone.h b/include/keystone/keystone.h
index 2a885d0..5420c41 100644
--- a/include/keystone/keystone.h
+++ b/include/keystone/keystone.h
@@ -149,11 +149,12 @@ typedef enum ks_opt_type {
 
 // Runtime option value (associated with ks_opt_type above)
 typedef enum ks_opt_value {
-	KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX).
-	KS_OPT_SYNTAX_ATT   = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX).
-	KS_OPT_SYNTAX_NASM  = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX).
-	KS_OPT_SYNTAX_MASM  = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet.
-	KS_OPT_SYNTAX_GAS   = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX).
+	KS_OPT_SYNTAX_INTEL =   1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX).
+	KS_OPT_SYNTAX_ATT   =   1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX).
+	KS_OPT_SYNTAX_NASM  =   1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX).
+	KS_OPT_SYNTAX_MASM  =   1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet.
+	KS_OPT_SYNTAX_GAS   =   1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX).
+	KS_OPT_SYNTAX_RADIX16 = 1 << 5, // All immediates are in hex format (i.e 12 is 0x12)
 } ks_opt_value;
 
 
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 0b6613a..a061946 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -151,6 +151,9 @@ protected:
   /// Which dialect of an assembler variant to use.  Defaults to 0
   unsigned AssemblerDialect;
 
+  /// Default Radix for immediate 
+  unsigned Radix;
+
   /// This is true if the assembler allows @ characters in symbol names.
   /// Defaults to false.
   bool AllowAtInName;
@@ -474,6 +477,8 @@ public:
   const char *getCode64Directive() const { return Code64Directive; }
   unsigned getAssemblerDialect() const { return AssemblerDialect; }
   void setAssemblerDialect(unsigned v) { AssemblerDialect = v; }
+  void setRadix(unsigned v) { Radix = v; }
+  unsigned getRadix() const { return Radix; }
   bool doesAllowAtInName() const { return AllowAtInName; }
   bool supportsNameQuoting() const { return SupportsQuotedNames; }
   bool doesSupportDataRegionDirectives() const {
diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 1bb6d21..6fc909b 100644
--- a/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -30,6 +30,7 @@ class AsmLexer : public MCAsmLexer {
   const char *CurPtr;
   StringRef CurBuf;
   bool isAtStartOfLine;
+  unsigned defaultRadix;
 
   void operator=(const AsmLexer&) = delete;
   AsmLexer(const AsmLexer&) = delete;
diff --git a/llvm/keystone/ks.cpp b/llvm/keystone/ks.cpp
index 9b1b63b..fbd38ca 100644
--- a/llvm/keystone/ks.cpp
+++ b/llvm/keystone/ks.cpp
@@ -495,11 +495,18 @@ ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value)
             switch(value) {
                 default:
                     return KS_ERR_OPT_INVALID;
+                case KS_OPT_SYNTAX_RADIX16: // default syntax is Intel
+                case KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16:
+                case KS_OPT_SYNTAX_INTEL | KS_OPT_SYNTAX_RADIX16:
+                    ks->MAI->setRadix(16);
                 case KS_OPT_SYNTAX_NASM:
                 case KS_OPT_SYNTAX_INTEL:
                     ks->syntax = (ks_opt_value)value;
                     ks->MAI->setAssemblerDialect(1);
                     break;
+                case KS_OPT_SYNTAX_GAS | KS_OPT_SYNTAX_RADIX16:
+                case KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16:
+                    ks->MAI->setRadix(16);
                 case KS_OPT_SYNTAX_GAS:
                 case KS_OPT_SYNTAX_ATT:
                     ks->syntax = (ks_opt_value)value;
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index ebcb922..0eb6c3b 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -25,6 +25,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
   CurPtr = nullptr;
   isAtStartOfLine = true;
   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
+  defaultRadix = MAI.getRadix();
 }
 
 AsmLexer::~AsmLexer() {
@@ -259,6 +260,10 @@ AsmToken AsmLexer::LexDigit()
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
     unsigned Radix = doLookAhead(CurPtr, 10);
+
+    if (defaultRadix == 16)
+      Radix = 16;
+
     bool isHex = Radix == 16;
     // Check for floating point literals.
     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@@ -274,8 +279,10 @@ AsmToken AsmLexer::LexDigit()
                            "invalid hexdecimal number");
 
     // Consume the [bB][hH].
-    if (Radix == 2 || Radix == 16)
-      ++CurPtr;
+    if (defaultRadix != 16) {
+      if (Radix == 2 || Radix == 16)
+        ++CurPtr;
+    }
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores type
     // suffices on integer literals.
diff --git a/samples/sample.c b/samples/sample.c
index 0077a3d..d540f65 100644
--- a/samples/sample.c
+++ b/samples/sample.c
@@ -53,6 +53,15 @@ int main(int argc, char **argv)
     test_ks(KS_ARCH_X86, KS_MODE_32, "add %ecx, %eax", KS_OPT_SYNTAX_ATT);
     test_ks(KS_ARCH_X86, KS_MODE_64, "add %rcx, %rax", KS_OPT_SYNTAX_ATT);
 
+    test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 0x15", 0);
+    test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15h", 0);
+    test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", 0);
+
+    // RADIX16 syntax Intel (default syntax)
+    test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", KS_OPT_SYNTAX_RADIX16);
+    // RADIX16 syntax for AT&T
+    test_ks(KS_ARCH_X86, KS_MODE_32, "add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT);
+
     // ARM
     test_ks(KS_ARCH_ARM, KS_MODE_ARM, "sub r1, r2, r5", 0);
     test_ks(KS_ARCH_ARM, KS_MODE_ARM + KS_MODE_BIG_ENDIAN, "sub r1, r2, r5", 0);
-- 
GitLab