diff --git a/.gitattributes b/.gitattributes index 57cc5bc08abae8f9f73cc4677f524b541037fc63..c4e35bc6c1ad70449407412d32fe7a89f2a7e604 100644 --- a/.gitattributes +++ b/.gitattributes @@ -228,6 +228,9 @@ xform/instrmap_hash.h -text xform/instruction_descriptor.cpp -text xform/instruction_descriptor.h -text xform/instrument.h -text +xform/kmd9q_p1xform/Makefile -text +xform/kmd9q_p1xform/p1_transform_v2.cpp -text +xform/kmd9q_p1xform/p1_transform_v2.h -text xform/null_transform.cpp -text xform/null_transform.h -text xform/p1_transform.cpp -text diff --git a/xform/function_descriptor.cpp b/xform/function_descriptor.cpp index 0f39a37227e76b37871c70b6d22c0f6242fc37e2..77c98626e90260d11dcb352e4186049856ae1e4b 100644 --- a/xform/function_descriptor.cpp +++ b/xform/function_descriptor.cpp @@ -8,6 +8,7 @@ wahoo::Function::Function() m_address = -1; m_size = -1; m_isSafe = false; + m_useFP = false; m_outArgsRegionSize = 0; m_functionID = -1; } @@ -18,6 +19,7 @@ wahoo::Function::Function(string p_name, app_iaddr_t p_start, int p_size) m_address = p_start; m_size = p_size; m_isSafe = false; + m_useFP = false; m_outArgsRegionSize = 0; m_functionID = -1; } diff --git a/xform/function_descriptor.h b/xform/function_descriptor.h index 095e795980fec19e43d89b1eb17d19d22caaaab2..ef544dc5070377649b3494309ad6b384a99c7f50 100644 --- a/xform/function_descriptor.h +++ b/xform/function_descriptor.h @@ -36,6 +36,9 @@ class Function void setOutArgsRegionSize(int p_size) { m_outArgsRegionSize = p_size; } int getOutArgsRegionSize() { return m_outArgsRegionSize; } + void setUseFramePointer(bool p_useFP) { m_useFP = p_useFP; } + bool getUseFramePointer() { return m_useFP; } + void addInstruction(wahoo::Instruction *); void addStackAllocationInstruction(wahoo::Instruction *); void addStackDeallocationInstruction(wahoo::Instruction *); @@ -58,6 +61,7 @@ class Function app_iaddr_t m_address; int m_size; bool m_isSafe; + bool m_useFP; int m_outArgsRegionSize; vector<wahoo::Instruction*> m_allInstructions; diff --git a/xform/kmd9q_p1xform/Makefile b/xform/kmd9q_p1xform/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..888dcd651628ae3692a4f714ff552da4ee5b0a92 --- /dev/null +++ b/xform/kmd9q_p1xform/Makefile @@ -0,0 +1,22 @@ + +.SUFFIXES: .exe .cpp + +PROGS=p1_transform_v2.exe + +all: $(PROGS) + +#$(PROGS): ../lib/libIRDB.a + +.cpp.exe: $< p1_transform_v2.cpp + g++ -g $< -I../../libIRDB/include/ -I../../beaengine/include -L ../../libIRDB/lib/ -lIRDB -lpqxx -L ../../beaengine/lib/Linux.gnu.Debug -lBeaEngine_s_d -o $@ + +#print_variant: print_variant.cpp ../lib/libIRDB.a +# g++ -g print_variant.cpp -I../include/ -L ../lib/ -lIRDB -lpqxx -o print_variant +# +# +#list_programs: list_programs.cpp ../lib/libIRDB.a +# g++ -g list_programs.cpp -I../include/ -L ../lib/ -lIRDB -lpqxx -o list_programs + +clean: + rm -f $(PROGS) + diff --git a/xform/kmd9q_p1xform/p1_transform_v2.cpp b/xform/kmd9q_p1xform/p1_transform_v2.cpp new file mode 100755 index 0000000000000000000000000000000000000000..1750713fc7c759c1b739ce4fa8aa9eee82353614 --- /dev/null +++ b/xform/kmd9q_p1xform/p1_transform_v2.cpp @@ -0,0 +1,458 @@ +#include "beaengine/BeaEngine.h" +#include "p1_transform_v2.h" + +using namespace libIRDB; +using namespace std; + +P1Transform::P1Transform() +{ + /* Initialize regular expressions */ + /* # mov dword [ebp+eax*4-0x70] , edx */ + /* regex_t m_fancy_ebp_pattern; */ + if (regcomp(&m_fancy_ebp_pattern, "(.*)(ebp[+].*-)(.*)(])(.*)", REG_EXTENDED | REG_ICASE) != 0) + { + fprintf(stderr,"Error: regular expression for <ebp+<stuff>*4-K> failed to compile\n"); + exit(1); + } + + /* match <anything>[ebp-<K>]<anything>*/ + if (regcomp(&m_stack_ebp_pattern, "(.+)[[:blank:]]*ebp[[:blank:]]*-(.+)[[:blank:]]*(])(.*)", REG_EXTENDED | REG_ICASE) != 0) + { + fprintf(stderr,"Error: regular expression for <ebp-K> failed to compile\n"); + exit(1); + } + + /* match <anything>[esp+<K>]<anything>*/ + if (regcomp(&m_stack_esp_pattern, "(.+)[[:blank:]]*esp[[:blank:]]*[+](.+)[[:blank:]]*(])(.*)", REG_EXTENDED | REG_ICASE) != 0) + { + fprintf(stderr,"Error: regular expression for <esp+K> failed to compile\n"); + exit(1); + } + + /* stack allocation: match sub esp , K*/ + if (regcomp(&m_stack_alloc_pattern, "[[:blank:]]*sub[[:blank:]]*esp[[:blank:]]*,[[:blank:]]*(.+)", REG_EXTENDED | REG_ICASE) != 0) + { + fprintf(stderr,"Error: regular expression for <sub esp, K> failed to compile\n"); + exit(1); + } + + /* stack deallocation: match add esp , K*/ + if (regcomp(&m_stack_dealloc_pattern, "[[:blank:]]*add[[:blank:]]*esp[[:blank:]]*,[[:blank:]]*(.+)", REG_EXTENDED | REG_ICASE) != 0) + { + fprintf(stderr,"Error: regular expression for <add esp, K> failed to compile\n"); + exit(1); + } + + /* match lea <anything> dword [<stuff>]*/ + if (regcomp(&m_lea_hack_pattern, "(.*lea.*,.*)dword(.*)", REG_EXTENDED | REG_ICASE) != 0) + { + fprintf(stderr,"Error: regular expression for lea hack failed to compile\n"); + exit(1); + } +} + +bool P1Transform::rewrite(libIRDB::VariantIR_t *virp, libIRDB::Function_t *f, std::map<Instruction_t*, std::string> & undoList) +{ + int stack_frame_padding = getStackFramePadding(f); + int stack_frame_size = -1; + int new_stack_frame_size = -1; + + bool stackAlloc = false; + bool stackDealloc = false; + bool rewriteFunction = false; + + for( + set<Instruction_t*>::const_iterator it=f->GetInstructions().begin(); + it!=f->GetInstructions().end(); + ++it) + { + Instruction_t* instr=*it; + char buf[1024]; + + DISASM disasm; + instr->Disassemble(disasm); + sprintf(buf, "%s", disasm.CompleteInstr); + + int k = 10; + regmatch_t pmatch[k]; + memset(pmatch, 0,sizeof(regmatch_t) * k); + + /* AllocSite Section */ + if(regexec(&m_stack_alloc_pattern, buf, 5, pmatch, 0)==0) + { + char new_instr[2048]; + char matched[1024]; + /* extract K from: sub esp, K */ + if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0) + { + int mlen = pmatch[1].rm_eo - pmatch[1].rm_so; + strncpy(matched, &buf[pmatch[1].rm_so], mlen); + matched[mlen] = '\0'; + /* extract K */ + sscanf(matched,"%x", &stack_frame_size); + /* add padding */ + new_stack_frame_size = stack_frame_size + stack_frame_padding; + sprintf(new_instr, "sub esp, 0x%x", new_stack_frame_size); + /* assemble the instruction into raw bits */ + undoList[instr] = instr->GetDataBits(); + if (!instr->Assemble(new_instr)) + { + return false; + } + stackAlloc=true; + } + } + /* matches dealloc site: add esp, K */ + else if(regexec(&m_stack_dealloc_pattern, buf, 5, pmatch, 0)==0) + { + char new_instr[2048]; + char matched[1024]; + if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0) + { + int mlen = pmatch[1].rm_eo - pmatch[1].rm_so; + strncpy(matched, &buf[pmatch[1].rm_so], mlen); + matched[mlen] = '\0'; + + sscanf(matched,"%x", &stack_frame_size); + new_stack_frame_size = stack_frame_size + stack_frame_padding; + sprintf(new_instr, "add esp, 0x%x", new_stack_frame_size); + + undoList[instr] = instr->GetDataBits(); + if (!instr->Assemble(new_instr)) + { + return false; + } + + instr->SetComment(instr->GetComment() + " p1-xformed"); + stackDealloc = true; + } + } + else if (strstr(buf, "leave")) + { + stackDealloc = true; + } + /* matched: lea <anything> dword [<stuff>] */ + else { + if(regexec(&m_lea_hack_pattern, buf, 5, pmatch, 0)==0) + { + int k; + char tmp[1024]; + char matched[1024]; + memset(matched, 0,1024); + for (k = 0; k < 5; ++k) + { + if (pmatch[k].rm_so >= 0 && pmatch[k].rm_eo >= 0) + { + int mlen = pmatch[k].rm_eo - pmatch[k].rm_so; + strncpy(matched, &buf[pmatch[k].rm_so], mlen); + matched[mlen] = '\0'; + + if (k == 1) + strcpy(tmp, matched); + else if (k == 2) + strcat(tmp, matched); + } + } + strcpy(buf, tmp); + } + if(regexec(&m_stack_ebp_pattern, buf, 5, pmatch, 0)==0) + { + char new_instr[2048]; + for (k = 0; k < 8; ++k) + { + char matched[1024]; + if (pmatch[k].rm_so >= 0 && pmatch[k].rm_eo >= 0) + { + int mlen = pmatch[k].rm_eo - pmatch[k].rm_so; + strncpy(matched, &buf[pmatch[k].rm_so], mlen); + matched[mlen] = '\0'; + + if (k == 1) { + strcpy(new_instr, matched); + } + else if (k == 2) { + unsigned offset; + char offset_str[128]; + sscanf(matched,"%x", &offset); + offset += stack_frame_padding; + sprintf(offset_str,"ebp - 0x%0x", offset); + strcat(new_instr, offset_str); + } + else if (strlen(matched) > 0) + { + strcat(new_instr, matched); + } + } + } + undoList[instr] = instr->GetDataBits(); + if (!instr->Assemble(new_instr)) + { + return false; + } + + instr->SetComment(instr->GetComment() + " p1-xformed"); + } + else if(regexec(&m_fancy_ebp_pattern, buf, 8, pmatch, 0)==0) + { + char matched[1024]; + char new_instr[1024]; + memset(matched, 0,1024); + for (k = 0; k < 8; ++k) + { + if (pmatch[k].rm_so >= 0 && pmatch[k].rm_eo >= 0) + { + int mlen = pmatch[k].rm_eo - pmatch[k].rm_so; + strncpy(matched, &buf[pmatch[k].rm_so], mlen); + matched[mlen] = '\0'; + + if (mlen <= 0) + continue; + + if (k == 0) {;} + else if (k == 1) + strcpy(new_instr, matched); + else if (k == 3) + { + unsigned offset; + char offset_str[128]; + sscanf(matched,"%x", &offset); + offset += stack_frame_padding; + sprintf(offset_str,"0x%0x", offset); + strcat(new_instr, offset_str); + } + else + strcat(new_instr, matched); + } + } + undoList[instr] = instr->GetDataBits(); + + if (!instr->Assemble(new_instr)) + { + return false; + } + instr->SetComment(instr->GetComment() + " p1-xformed"); + } + else if(regexec(&m_stack_esp_pattern, buf, 5, pmatch, 0)==0 && !f->GetUseFramePointer()) + { + char new_instr[2048]; + unsigned originalOffset = 0; + for (k = 0; k < 8; ++k) + { + char matched[1024]; + if (pmatch[k].rm_so >= 0 && pmatch[k].rm_eo >= 0) + { + int mlen = pmatch[k].rm_eo - pmatch[k].rm_so; + strncpy(matched, &buf[pmatch[k].rm_so], mlen); + matched[mlen] = '\0'; + + if (k == 1) { + strcpy(new_instr, matched); + } + else if (k == 2) { + unsigned offset = 0; + char offset_str[128]; + sscanf(matched,"%x", &offset); + originalOffset = offset; + offset += stack_frame_padding; + sprintf(offset_str,"esp + 0x%0x", offset); + strcat(new_instr, offset_str); + } + else if (strlen(matched) > 0) + { + strcat(new_instr, matched); + } + } + } + + int sizeOutArgs = f->GetOutArgsRegionSize(); + if (originalOffset >= sizeOutArgs){ + undoList[instr] = instr->GetDataBits(); + + if (!instr->Assemble(new_instr)) + { + return false; + } + instr->SetComment(instr->GetComment() + " p1-xformed"); + } + } + } + } + + if (!stackAlloc) + { + fprintf(stderr,"Could not process function <%s>: no stack allocation routine found\n", f->GetName().c_str()); + return false; + } + else if (!stackDealloc) + { + fprintf(stderr,"Could not process function <%s>: no stack deallocation routine found\n", f->GetName().c_str()); + return false; + } + else{ + fprintf(stderr,"Setting rewriteFunction to true; should rewite this function %s\n", f->GetName().c_str()); + rewriteFunction=true; + } + return rewriteFunction; +} + +// return stack frame size padding +// sure we pad by at least 8 bytes +int P1Transform::getStackFramePadding(libIRDB::Function_t *p_fn) +{ + // @todo: add some random variation + int stack_frame_padding = p_fn->GetStackFrameSize(); + if (stack_frame_padding < 8) stack_frame_padding = 8; + + return stack_frame_padding; +} + +static void undo(map<libIRDB::Instruction_t*, string> undoList) +{ + // rollback any changes + for( + map<Instruction_t*, std::string>::const_iterator mit=undoList.begin(); + mit != undoList.end(); + ++mit) + { + Instruction_t* insn = mit->first; + std::string dataBits = mit->second; + + DISASM disasm; + insn->Disassemble(disasm); + insn->SetDataBits(dataBits); + } +} + +int main(int argc, char **argv) +{ + if(argc!=3) + { + cerr<<"Usage: [the executable] <vid> [p1.candidates]"<<endl; + exit(-1); + } + + VariantID_t *pidp=NULL; + VariantIR_t *virp=NULL; + + int progid = atoi(argv[1]); + + //setup the interface to the sql server + pqxxDB_t pqxx_interface; + BaseObj_t::SetInterface(&pqxx_interface); + + try + { + // read the variant ID using variant id number = atoi(argv[1]) + pidp=new VariantID_t(atoi(argv[1])); + + // verify that we read it correctly. + assert(pidp->IsRegistered()==true); + + // read the IR from the db + virp=new VariantIR_t(*pidp); + } + catch (DatabaseError_t pnide) + { + cout<<"Unexpected database error: "<<pnide<<endl; + exit(-1); + } + + P1Transform *p1GoodTransform = new P1Transform(); + + vector<std::string> functionsTransformed; + try { + //iterate through the functions that compose a particular variant + for( + set<Function_t*>::const_iterator it=virp->GetFunctions().begin(); + it!=virp->GetFunctions().end(); + ++it + ) + { + Function_t* func=*it; + map<libIRDB::Instruction_t*, std::string> undoList; + + string funcName = func->GetName().c_str(); + bool continueValue = false; + string line; + ifstream candidateFile; + candidateFile.open(argv[2]); + if(candidateFile.is_open()){ + while(!candidateFile.eof()){ + getline(candidateFile, line); + if((line.find(funcName,0))!= string::npos){ + continueValue=true; + } + } + candidateFile.close(); + } + + if(!continueValue) + continue; + + fprintf(stderr, "P1: Looking at function: %s size: %d\n", func->GetName().c_str(), undoList.size()); + + //perform the p1 transform on the given variant's function + bool rewriteFunction = p1GoodTransform->rewrite(virp,func,undoList); + + if (!rewriteFunction) + { + fprintf(stderr,"P1: %d instructions to rollback for function %s\n", undoList.size(), func->GetName().c_str()); + undo(undoList); + } + else { + string dirname = "p1.xform/" + func->GetName(); + string cmd = "mkdir -p " + dirname; + system(cmd.c_str()); + + string filename = dirname + "/a.irdb.aspri"; + ofstream aspriFile; + aspriFile.open(filename.c_str()); + if(!aspriFile.is_open()) + { + fprintf(stderr, "P1: Could not open: %s\n", filename.c_str()); + continue; + } + + fprintf(stderr, "P1: generating aspri file: %s\n", filename.c_str()); + virp->generate_spri(aspriFile); // p1.xform/<function_name>/a.irdb.aspri + aspriFile.close(); + + char new_instr[1024]; + //This script generates the aspri and bspri files; it also runs BED + sprintf(new_instr, "$PEASOUP_HOME/tools/p1xform_v2.sh %d %s", progid, func->GetName().c_str()); + + //If OK=BED(func), then commit + fprintf (stderr, "P1: about to execute\n", new_instr); + + int rt=system(new_instr); + int actual_exit = -1, actual_signal = -1; + if (WIFEXITED(rt)) actual_exit = WEXITSTATUS(rt); + else actual_signal = WTERMSIG(rt); + int retval = actual_exit; + + if(retval == 0){ + //Run BED; if passed, commit to DB + virp->WriteToDB(); + functionsTransformed.push_back(func->GetName()); + } + else { + undo(undoList); + } + } + } + pqxx_interface.Commit(); + } + catch (DatabaseError_t pnide) + { + cout<<"Unexpected database error: "<<pnide<<endl; + exit(-1); + } + + cout << "List of functions transformed:" << endl; + for (int i = 0; i < functionsTransformed.size(); ++i) + cout << "function: " << functionsTransformed[i] << endl; + + return 0; +} + + diff --git a/xform/kmd9q_p1xform/p1_transform_v2.h b/xform/kmd9q_p1xform/p1_transform_v2.h new file mode 100755 index 0000000000000000000000000000000000000000..7e4b87d40fcc720f190f8e1516f9d793b7e8f99e --- /dev/null +++ b/xform/kmd9q_p1xform/p1_transform_v2.h @@ -0,0 +1,30 @@ +#include <libIRDB.hpp> +#include <iostream> +#include <stdlib.h> +#include <regex.h> +#include <string.h> +#include <sys/wait.h> +#include <map> +#include <fstream> + +class P1Transform +{ + public: + P1Transform(); + + void rewrite(char *); + bool rewrite(libIRDB::VariantIR_t*, libIRDB::Function_t*, std::map< libIRDB::Instruction_t*, std::string>&); + + private: + int getStackFramePadding(libIRDB::Function_t*); + + // regex patterns for detecting and transforming stack instruction references + regex_t m_stack_ebp_pattern; + regex_t m_stack_alloc_pattern; + regex_t m_stack_dealloc_pattern; + regex_t m_lea_hack_pattern; + regex_t m_fancy_ebp_pattern; + regex_t m_stack_esp_pattern; + regex_t m_fancy_esp_pattern; + +}; diff --git a/xform/rewriter.cpp b/xform/rewriter.cpp index 93c68e94e77d1a72b23fb7f07bfbee073c104b32..b8ef7a093311c7d1dbbc26a7e32be5cb3c9903f2 100644 --- a/xform/rewriter.cpp +++ b/xform/rewriter.cpp @@ -109,6 +109,9 @@ void Rewriter::readAnnotationFile(char p_filename[]) else fn->setUnsafe(); + if (strstr(remainder, "USEFP")) + fn->setUseFramePointer(true); + m_functions[addr] = fn; line++;