Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Z
Zipr Toolchain
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Open Source Software
Zipr Toolchain
Commits
a485336d
Commit
a485336d
authored
11 years ago
by
jdh8d
Browse files
Options
Downloads
Patches
Plain Diff
Fixed jmp label insns to work for 64-bit, where label may not be in range
Former-commit-id: 5d0d140f3e89e3e6cc457f692a7644c65112cdbc
parent
5c729099
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libIRDB/src/core/generate_spri.cpp
+150
-85
150 additions, 85 deletions
libIRDB/src/core/generate_spri.cpp
tools/spasm/spasm.cpp
+29
-16
29 additions, 16 deletions
tools/spasm/spasm.cpp
with
179 additions
and
101 deletions
libIRDB/src/core/generate_spri.cpp
+
150
−
85
View file @
a485336d
...
...
@@ -49,10 +49,32 @@ static string addressify(Instruction_t* insn);
//
// determine if this branch has a short offset that can't be represented as a long branch
//
static
int
needs_short_branch_rewrite
(
const
DISASM
&
disasm
)
static
bool
needs_short_branch_rewrite
(
Instruction_t
*
newinsn
,
const
DISASM
&
disasm
)
{
return
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"jecxz"
)
||
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"loop"
)
||
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"loopne"
)
||
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"loope"
)
;
if
(
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"jecxz"
)
||
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"loop"
)
||
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"loopne"
)
||
strstr
(
disasm
.
Instruction
.
Mnemonic
,
"loope"
)
)
return
true
;
/* 64-bit has more needs than this */
if
(
sizeof
(
void
*
)
!=
8
)
return
false
;
if
(
disasm
.
Instruction
.
BranchType
==
0
)
/* non-branches, jumps, calls and returns don't need this rewrite */
return
false
;
if
(
disasm
.
Instruction
.
BranchType
==
JmpType
)
return
false
;
if
(
disasm
.
Instruction
.
BranchType
==
CallType
)
return
false
;
if
(
disasm
.
Instruction
.
BranchType
==
RetType
)
return
false
;
/* all other branches (on x86-64) need further checking */
if
(
!
newinsn
->
GetTarget
())
/* no specified target, no need to modify it */
return
false
;
string
new_target
=
labelfy
(
newinsn
->
GetTarget
());
if
(
new_target
.
c_str
()[
0
]
==
'0'
)
/* if we're jumping back to the base instruction */
return
true
;
return
false
;
}
...
...
@@ -181,6 +203,14 @@ static string get_short_branch_label(Instruction_t *newinsn)
return
"sj_"
+
labelfy
(
newinsn
);
}
static
string
get_data_label
(
Instruction_t
*
newinsn
)
{
if
(
!
newinsn
)
return
string
(
""
);
else
return
"da_"
+
labelfy
(
newinsn
);
}
static
string
getPostCallbackLabel
(
Instruction_t
*
newinsn
)
{
if
(
!
newinsn
)
...
...
@@ -195,6 +225,122 @@ static void emit_relocation(FileIR_t* fileIRp, ostream& fout, int offset, string
fout
<<
"
\t
"
<<
labelfy
(
insn
)
<<
" rl "
<<
offset
<<
" "
<<
type
<<
" "
<<
URLToFile
(
fileIRp
->
GetFile
()
->
GetURL
())
<<
endl
;
}
void
covert_jump_for_64bit
(
Instruction_t
*
newinsn
,
string
&
final
,
string
new_target
)
{
/* skip for x86-32 */
if
(
sizeof
(
void
*
)
==
4
)
return
;
/* skip for labeled addresses */
if
(
new_target
.
c_str
()[
0
]
!=
'0'
)
return
;
string
datalabel
=
get_data_label
(
newinsn
);
/* convert a "call <addr>" into "call qword [rel data_label] \n data_label ** dq <addr>" */
int
start
=
final
.
find
(
new_target
,
0
);
final
=
final
.
substr
(
0
,
start
)
+
" qword [ rel "
+
datalabel
+
"]
\n\t
"
+
datalabel
+
" ** dq "
+
final
.
substr
(
start
);
return
;
}
void
emit_jump
(
FileIR_t
*
fileIRp
,
ostream
&
fout
,
DISASM
&
disasm
,
Instruction_t
*
newinsn
,
Instruction_t
*
old_insn
,
string
&
original_target
)
{
string
label
=
labelfy
(
newinsn
);
string
complete_instr
=
string
(
disasm
.
CompleteInstr
);
string
address_string
=
string
(
disasm
.
Argument1
.
ArgMnemonic
);
/* if we have a target instruction in the database */
if
(
newinsn
->
GetTarget
()
||
needs_short_branch_rewrite
(
newinsn
,
disasm
))
{
/* change the target to be symbolic */
/* first get the new target */
string
new_target
;
if
(
newinsn
->
GetTarget
())
new_target
=
labelfy
(
newinsn
->
GetTarget
());
/* if this is a short branch, write this branch to jump to the next insn */
if
(
needs_short_branch_rewrite
(
newinsn
,
disasm
))
{
new_target
=
get_short_branch_label
(
newinsn
);
/* also get the real target if it's a short branch */
if
(
newinsn
->
GetTarget
())
original_target
=
labelfy
(
newinsn
->
GetTarget
());
else
original_target
=
address_string
;
}
/* find the location in the disassembled string of the old target */
int
start
=
complete_instr
.
find
(
address_string
,
0
);
/* and build up a new string that has the label of the target instead of the address */
string
final
=
complete_instr
.
substr
(
0
,
start
)
+
new_target
+
complete_instr
.
substr
(
start
+
address_string
.
length
());
/* sanity, no segment registers for absolute mode */
assert
(
disasm
.
Argument1
.
SegmentReg
==
0
);
covert_jump_for_64bit
(
newinsn
,
final
,
new_target
);
fout
<<
final
<<
endl
;
if
(
new_target
.
c_str
()[
0
]
==
'0'
)
{
// if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction
if
(
disasm
.
Instruction
.
Opcode
==
0xeb
||
// jmp with 8-bit addr -- should be recompiled to 32-bit
disasm
.
Instruction
.
Opcode
==
0xe8
||
// jmp with 32-bit addr
disasm
.
Instruction
.
Opcode
==
0xe9
// call with 32-bit addr
)
{
/* jumps have a 1-byte opcode */
emit_relocation
(
fileIRp
,
fout
,
1
,
"32-bit"
,
newinsn
);
}
else
{
/* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */
emit_relocation
(
fileIRp
,
fout
,
2
,
"32-bit"
,
newinsn
);
}
}
}
else
/* this instruction has a target, but it's not in the DB */
{
/* so we'll just emit the instruction and let it go back to the application text. */
fout
<<
complete_instr
<<
endl
;
// needs relocation info.
if
(
complete_instr
.
compare
(
"call 0x00000000"
)
==
0
||
complete_instr
.
compare
(
"jmp 0x00000000"
)
==
0
)
{
// just ignore these bogus instructions.
}
else
{
if
(
disasm
.
Instruction
.
Opcode
==
0xeb
||
// jmp with 8-bit addr
disasm
.
Instruction
.
Opcode
==
0xe8
||
// jmp with 32-bit addr
disasm
.
Instruction
.
Opcode
==
0xe9
// call with 32-bit addr
)
{
emit_relocation
(
fileIRp
,
fout
,
1
,
"32-bit"
,
newinsn
);
}
else
{
// assert this is the "main" file and no relocation is necessary.
assert
(
strstr
(
fileIRp
->
GetFile
()
->
GetURL
().
c_str
(),
"a.ncexe"
)
!=
0
);
}
}
}
}
//
// emit this instruction as spri code.
//
...
...
@@ -275,88 +421,7 @@ static string emit_spri_instruction(FileIR_t* fileIRp, Instruction_t *newinsn, o
(
disasm
.
Argument1
.
ArgType
&
CONSTANT_TYPE
)
!=
0
// and has a constant argument type 1
)
{
/* if we have a target instruction in the database */
if
(
newinsn
->
GetTarget
()
||
needs_short_branch_rewrite
(
disasm
))
{
/* change the target to be symbolic */
/* first get the new target */
string
new_target
;
if
(
newinsn
->
GetTarget
())
new_target
=
labelfy
(
newinsn
->
GetTarget
());
/* if this is a short branch, write this branch to jump to the next insn */
if
(
needs_short_branch_rewrite
(
disasm
))
{
new_target
=
get_short_branch_label
(
newinsn
);
/* also get the real target if it's a short branch */
if
(
newinsn
->
GetTarget
())
original_target
=
labelfy
(
newinsn
->
GetTarget
());
else
original_target
=
address_string
;
}
/* find the location in the disassembled string of the old target */
int
start
=
complete_instr
.
find
(
address_string
,
0
);
/* and build up a new string that has the label of the target instead of the address */
string
final
=
complete_instr
.
substr
(
0
,
start
)
+
new_target
+
complete_instr
.
substr
(
start
+
address_string
.
length
());
/* sanity, no segment registers for absolute mode */
assert
(
disasm
.
Argument1
.
SegmentReg
==
0
);
fout
<<
final
<<
endl
;
if
(
new_target
.
c_str
()[
0
]
==
'0'
)
{
// if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction
if
(
disasm
.
Instruction
.
Opcode
==
0xeb
||
// jmp with 8-bit addr -- should be recompiled to 32-bit
disasm
.
Instruction
.
Opcode
==
0xe8
||
// jmp with 32-bit addr
disasm
.
Instruction
.
Opcode
==
0xe9
// call with 32-bit addr
)
{
/* jumps have a 1-byte opcode */
emit_relocation
(
fileIRp
,
fout
,
1
,
"32-bit"
,
newinsn
);
}
else
{
/* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */
emit_relocation
(
fileIRp
,
fout
,
2
,
"32-bit"
,
newinsn
);
}
}
}
else
/* this instruction has a target, but it's not in the DB */
{
/* so we'll just emit the instruction and let it go back to the application text. */
fout
<<
complete_instr
<<
endl
;
// needs relocation info.
if
(
complete_instr
.
compare
(
"call 0x00000000"
)
==
0
||
complete_instr
.
compare
(
"jmp 0x00000000"
)
==
0
)
{
// just ignore these bogus instructions.
}
else
{
if
(
disasm
.
Instruction
.
Opcode
==
0xeb
||
// jmp with 8-bit addr
disasm
.
Instruction
.
Opcode
==
0xe8
||
// jmp with 32-bit addr
disasm
.
Instruction
.
Opcode
==
0xe9
// call with 32-bit addr
)
{
emit_relocation
(
fileIRp
,
fout
,
1
,
"32-bit"
,
newinsn
);
}
else
{
// assert this is the "main" file and no relocation is necessary.
assert
(
strstr
(
fileIRp
->
GetFile
()
->
GetURL
().
c_str
(),
"a.ncexe"
)
!=
0
);
}
}
}
emit_jump
(
fileIRp
,
fout
,
disasm
,
newinsn
,
old_insn
,
original_target
);
}
else
{
...
...
This diff is collapsed.
Click to expand it.
tools/spasm/spasm.cpp
+
29
−
16
View file @
a485336d
...
...
@@ -2,6 +2,7 @@
#include
<vector>
#include
<regex.h>
#include
<iostream>
#include
<ios>
#include
<sstream>
#include
<fstream>
#include
<map>
...
...
@@ -10,6 +11,8 @@
#include
<climits>
#include
<cstring>
#include
<assert.h>
#include
<stdint.h>
#include
<algorithm>
#include
"ben_lib.h"
...
...
@@ -51,11 +54,11 @@ typedef struct bin_instruction {
static
u
nsigned
in
t
const
ORG_PC
=
0xff000000
;
static
u
intptr_
t
const
ORG_PC
=
0xff000000
;
//padding is added to the ORG_PC for the first vpc
//the padding amount is [0-PC_PADDING_MAX), i.e., not inclusive of PC_PADDING_MAX
static
unsigned
int
const
PC_PADDING_MAX
=
8001
;
static
u
nsigned
in
t
vpc
=
ORG_PC
;
static
u
intptr_
t
vpc
=
ORG_PC
;
static
map
<
string
,
string
>
symMap
;
static
map
<
string
,
string
>
callbackMap
;
...
...
@@ -90,7 +93,7 @@ static void resolveSymbols(const string &mapFile);
//static vector<bin_instruction_t> parseBin(const string &binFile);
//static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector<spasmline_t> &spasmlines, const string &symbolFilename);
//static void printVector(const string &outputFile, const vector<string> &lines);
static
int
getSymbolAddress
(
const
string
&
symbolFilename
,
const
string
&
symbol
)
throw
(
exception
);
static
u
int
ptr_t
getSymbolAddress
(
const
string
&
symbolFilename
,
const
string
&
symbol
)
throw
(
exception
);
//
// @todo: need to cache results
...
...
@@ -106,13 +109,13 @@ static string getCallbackAddress(const string &symbolFilename, const string &sym
}
static
int
getSymbolAddress
(
const
string
&
symbolFilename
,
const
string
&
symbol
)
throw
(
exception
)
static
u
int
ptr_t
getSymbolAddress
(
const
string
&
symbolFilename
,
const
string
&
symbol
)
throw
(
exception
)
{
string
symbolFullName
=
symbolFilename
+
"+"
+
symbol
;
map
<
string
,
string
>::
iterator
callbackMapIterator
;
if
(
callbackMap
.
find
(
symbolFullName
)
!=
callbackMap
.
end
())
{
return
strtol
(
callbackMap
[
symbolFullName
].
c_str
(),
NULL
,
16
);
return
(
uintptr_t
)
strto
ul
l
(
callbackMap
[
symbolFullName
].
c_str
(),
NULL
,
16
);
}
// nm -a stratafier.o.exe | egrep " integer_overflow_detector$" | cut -f1 -d' '
...
...
@@ -138,7 +141,7 @@ static int getSymbolAddress(const string &symbolFilename, const string &symbol)
callbackMap
[
symbolFullName
]
=
addressString
;
return
strtol
(
addressString
.
c_str
(),
NULL
,
16
);
return
(
uintptr_t
)
strto
ul
l
(
addressString
.
c_str
(),
NULL
,
16
);
}
bool
fexists
(
const
string
&
filename
)
...
...
@@ -156,9 +159,16 @@ void a2bspri(const vector<string> &input,const string &outFilename, const string
srand
(
time
(
0
));
vpc
+=
rand
()
%
PC_PADDING_MAX
;
/* make start at 0xff00000000000000 for x86-64 */
if
(
sizeof
(
void
*
)
==
8
)
{
vpc
<<=
32
;
vpc
+=
rand
();
}
else
vpc
+=
rand
()
%
PC_PADDING_MAX
;
cout
<<
"VPC init loc: "
<<
hex
<<
vpc
<<
endl
;
cout
<<
"VPC init loc: "
<<
hex
<<
nouppercase
<<
vpc
<<
endl
;
for
(
unsigned
int
i
=
0
;
i
<
input
.
size
();
i
++
)
{
...
...
@@ -357,7 +367,7 @@ static void assemble(const string &assemblyFile)
nasm_bit_width
=
"BITS 32"
;
asmFile
<<
nasm_bit_width
<<
endl
;
asmFile
<<
"ORG 0x"
<<
hex
<<
vpc
<<
endl
;
asmFile
<<
"ORG 0x"
<<
hex
<<
nouppercase
<<
vpc
<<
endl
;
asmFile
<<
"[map symbols "
<<
assemblyFile
<<
".map]"
<<
endl
;
spasmline_t
sline
;
...
...
@@ -506,24 +516,27 @@ static void resolveSymbols(const string &mapFile)
//and the third is the symbol.
char
*
endptr
;
char
*
tok_c_str
=
const_cast
<
char
*>
(
tokens
[
0
].
c_str
());
long
long
addrval
;
addrval
=
strtoll
(
tok_c_str
,
&
endptr
,
16
);
uintptr_t
addrval
;
addrval
=
(
uintptr_t
)
strto
u
ll
(
tok_c_str
,
&
endptr
,
16
);
if
((
errno
==
ERANGE
&&
(
addrval
==
LLONG_MAX
||
addrval
==
LLONG_MIN
))
||
((
errno
!=
0
&&
addrval
==
0
)
||
endptr
==
tok_c_str
))
if
((
errno
==
ERANGE
&&
(
addrval
==
(
uintptr_t
)
U
LLONG_MAX
||
addrval
==
(
uintptr_t
)
0
))
||
((
errno
!=
0
&&
addrval
==
(
uintptr_t
)
0
)
||
endptr
==
tok_c_str
))
{
continue
;
}
tok_c_str
=
const_cast
<
char
*>
(
tokens
[
1
].
c_str
());
addrval
=
strtoll
(
tok_c_str
,
&
endptr
,
16
);
addrval
=
(
uintptr_t
)
strto
u
ll
(
tok_c_str
,
&
endptr
,
16
);
if
((
errno
==
ERANGE
&&
(
addrval
==
LLONG_MAX
||
addrval
==
LLONG_MIN
))
||
((
errno
!=
0
&&
addrval
==
0
)
||
endptr
==
tok_c_str
))
if
((
errno
==
ERANGE
&&
(
addrval
==
(
uintptr_t
)
U
LLONG_MAX
||
addrval
==
(
uintptr_t
)
0
))
||
((
errno
!=
0
&&
addrval
==
(
uintptr_t
)
0
)
||
endptr
==
tok_c_str
))
{
continue
;
}
// convert tokens[1] to lower case
transform
(
tokens
[
1
].
begin
(),
tokens
[
1
].
end
(),
tokens
[
1
].
begin
(),
::
tolower
);
if
(
symMap
.
find
(
tokens
[
2
])
!=
symMap
.
end
())
{
symMap
[
tokens
[
2
]]
=
tokens
[
1
];
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment