diff --git a/include/keystone/keystone.h b/include/keystone/keystone.h index 2a885d04c0190d9c1ceedc327b2d23a66784beaf..5420c41418d467ff8e8002703778a3fc5a4a5d25 100644 --- a/include/keystone/keystone.h +++ b/include/keystone/keystone.h @@ -149,11 +149,12 @@ typedef enum ks_opt_type { // Runtime option value (associated with ks_opt_type above) typedef enum ks_opt_value { - KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX). - KS_OPT_SYNTAX_ATT = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX). - KS_OPT_SYNTAX_NASM = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX). - KS_OPT_SYNTAX_MASM = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet. - KS_OPT_SYNTAX_GAS = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_ATT = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_NASM = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_MASM = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet. + KS_OPT_SYNTAX_GAS = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_RADIX16 = 1 << 5, // All immediates are in hex format (i.e 12 is 0x12) } ks_opt_value; diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 0b6613ae62cb78b3e15f559e2a70598100749946..a0619461c7cb2ac106d1e506288556635ae4d6e4 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -151,6 +151,9 @@ protected: /// Which dialect of an assembler variant to use. Defaults to 0 unsigned AssemblerDialect; + /// Default Radix for immediate + unsigned Radix; + /// This is true if the assembler allows @ characters in symbol names. /// Defaults to false. bool AllowAtInName; @@ -474,6 +477,8 @@ public: const char *getCode64Directive() const { return Code64Directive; } unsigned getAssemblerDialect() const { return AssemblerDialect; } void setAssemblerDialect(unsigned v) { AssemblerDialect = v; } + void setRadix(unsigned v) { Radix = v; } + unsigned getRadix() const { return Radix; } bool doesAllowAtInName() const { return AllowAtInName; } bool supportsNameQuoting() const { return SupportsQuotedNames; } bool doesSupportDataRegionDirectives() const { diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h index 1bb6d212784eec327ae9fb2de4d1510a6c67fbec..6fc909b0569e48438c21e37878f44f00b9dd8f1e 100644 --- a/llvm/include/llvm/MC/MCParser/AsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h @@ -30,6 +30,7 @@ class AsmLexer : public MCAsmLexer { const char *CurPtr; StringRef CurBuf; bool isAtStartOfLine; + unsigned defaultRadix; void operator=(const AsmLexer&) = delete; AsmLexer(const AsmLexer&) = delete; diff --git a/llvm/keystone/ks.cpp b/llvm/keystone/ks.cpp index 9b1b63b7613440aa37c6c08dc75754fe60dab4e4..fbd38ca21e10277cb73199da99a26c88ef9a23cd 100644 --- a/llvm/keystone/ks.cpp +++ b/llvm/keystone/ks.cpp @@ -495,11 +495,18 @@ ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value) switch(value) { default: return KS_ERR_OPT_INVALID; + case KS_OPT_SYNTAX_RADIX16: // default syntax is Intel + case KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16: + case KS_OPT_SYNTAX_INTEL | KS_OPT_SYNTAX_RADIX16: + ks->MAI->setRadix(16); case KS_OPT_SYNTAX_NASM: case KS_OPT_SYNTAX_INTEL: ks->syntax = (ks_opt_value)value; ks->MAI->setAssemblerDialect(1); break; + case KS_OPT_SYNTAX_GAS | KS_OPT_SYNTAX_RADIX16: + case KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16: + ks->MAI->setRadix(16); case KS_OPT_SYNTAX_GAS: case KS_OPT_SYNTAX_ATT: ks->syntax = (ks_opt_value)value; diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index ebcb922a33a9b2d9d468ad2198fd760113cc56df..0eb6c3b5a0eb841144ee48af4c92ef29129474fb 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -25,6 +25,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; isAtStartOfLine = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); + defaultRadix = MAI.getRadix(); } AsmLexer::~AsmLexer() { @@ -259,6 +260,10 @@ AsmToken AsmLexer::LexDigit() // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doLookAhead(CurPtr, 10); + + if (defaultRadix == 16) + Radix = 16; + bool isHex = Radix == 16; // Check for floating point literals. if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { @@ -274,8 +279,10 @@ AsmToken AsmLexer::LexDigit() "invalid hexdecimal number"); // Consume the [bB][hH]. - if (Radix == 2 || Radix == 16) - ++CurPtr; + if (defaultRadix != 16) { + if (Radix == 2 || Radix == 16) + ++CurPtr; + } // The darwin/x86 (and x86-64) assembler accepts and ignores type // suffices on integer literals. diff --git a/samples/sample.c b/samples/sample.c index 0077a3d316593b5e0cbd67427239b12a4c096452..d540f65a74012c77605a70a6bc7485b26a225241 100644 --- a/samples/sample.c +++ b/samples/sample.c @@ -53,6 +53,15 @@ int main(int argc, char **argv) test_ks(KS_ARCH_X86, KS_MODE_32, "add %ecx, %eax", KS_OPT_SYNTAX_ATT); test_ks(KS_ARCH_X86, KS_MODE_64, "add %rcx, %rax", KS_OPT_SYNTAX_ATT); + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 0x15", 0); + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15h", 0); + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", 0); + + // RADIX16 syntax Intel (default syntax) + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", KS_OPT_SYNTAX_RADIX16); + // RADIX16 syntax for AT&T + test_ks(KS_ARCH_X86, KS_MODE_32, "add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT); + // ARM test_ks(KS_ARCH_ARM, KS_MODE_ARM, "sub r1, r2, r5", 0); test_ks(KS_ARCH_ARM, KS_MODE_ARM + KS_MODE_BIG_ENDIAN, "sub r1, r2, r5", 0);