diff --git a/include/elfwrite.h b/include/elfwrite.h index 4025208c018c38733725b3dfa85af9c38023baa4..2bebecbd29d02e105447147e5cc8bfb0bd9629bd 100644 --- a/include/elfwrite.h +++ b/include/elfwrite.h @@ -10,93 +10,47 @@ #endif -class ElfWriter +class ElfWriter : public ExeWriter { protected: - class StringTable_t - { - public: - - StringTable_t() { } ; - void AddString(const std::string &s) - { - if(locations.find(s)!=locations.end()) - return; - - locations[s]=table.size(); - table+=s; - table+='\0'; - } - void Write(FILE* fout) const + class StringTable_t { - fwrite(table.c_str(), table.size(), 1, fout); - } - std::size_t size() const { return table.size(); } - std::size_t location(const std::string &s) const { return locations.at(s); } - - private: - - std::string table; - std::map<std::string,std::size_t> locations; - }; + public: - class PageData_t - { - - public: - PageData_t() : m_perms(0), is_relro(false), data(PAGE_SIZE), inuse(PAGE_SIZE) { } - - void union_permissions(int p_perms) { m_perms|=p_perms; } - - bool is_zero_initialized() const - { - for(unsigned int i=0;i<data.size();i++) - { - if(data.at(i)!=0) - return false; - } - return true; + StringTable_t() { } ; + void AddString(const std::string &s) + { + if(locations.find(s)!=locations.end()) + return; + + locations[s]=table.size(); + table+=s; + table+='\0'; } - - int m_perms; - bool is_relro; - - std::vector<unsigned char> data; - std::vector<bool> inuse; - }; - class LoadSegment_t - { - public: - LoadSegment_t() :filesz(0), memsz(0), filepos(0), start_page(0), m_perms(0) { } - - LoadSegment_t( unsigned int p_filesz, unsigned int p_memsz, unsigned int p_filepos, unsigned int p_start_page, unsigned int p_m_perms) - : - filesz(p_filesz), - memsz(p_memsz), - filepos(p_filepos), - start_page(p_start_page), - m_perms(p_m_perms) + void Write(FILE* fout) const { - + fwrite(table.c_str(), table.size(), 1, fout); } + std::size_t size() const { return table.size(); } + std::size_t location(const std::string &s) const { return locations.at(s); } + + private: + std::string table; + std::map<std::string,std::size_t> locations; + }; - unsigned int filesz; - unsigned int memsz; - unsigned int filepos; - unsigned int start_page; - unsigned int m_perms; - - }; - typedef std::vector<LoadSegment_t*> LoadSegmentVector_t; - - typedef std::map<IRDB_SDK::VirtualOffset_t, PageData_t> PageMap_t; public: - ElfWriter(IRDB_SDK::FileIR_t* firp, bool write_sections, bool bss_opts) : m_firp(firp), m_write_sections(write_sections), m_bss_opts(bss_opts) { } + ElfWriter(IRDB_SDK::FileIR_t* firp, bool write_sections, bool bss_opts) + : + ExeWriter(firp,write_sections,bss_opts) + { + } + virtual ~ElfWriter() {} - void Write(const ELFIO::elfio *elfiop, IRDB_SDK::FileIR_t* firp, const std::string &out_file, const std::string &infile); + void Write(const EXEIO::exeio *exeiop, const std::string &out_file, const std::string &infile); protected: @@ -108,40 +62,8 @@ class ElfWriter virtual void CreateNewPhdrs(const IRDB_SDK::VirtualOffset_t &min_addr, const IRDB_SDK::VirtualOffset_t &max_addr)=0; virtual void WriteElf(FILE* fout)=0; virtual void AddSections(FILE* fout)=0; - - - PageMap_t pagemap; - LoadSegmentVector_t segvec; - - template <class T> static T page_align(const T& in) - { - - return in&~(PAGE_SIZE-1); - } - - - - protected: - IRDB_SDK::FileIR_t* m_firp; - bool m_write_sections; - bool m_bss_opts; - private: - IRDB_SDK::VirtualOffset_t DetectMinAddr(const ELFIO::elfio *elfiop, IRDB_SDK::FileIR_t* firp, const std::string &out_file); - IRDB_SDK::VirtualOffset_t DetectMaxAddr(const ELFIO::elfio *elfiop, IRDB_SDK::FileIR_t* firp, const std::string &out_file); - - void CreatePagemap(const ELFIO::elfio *elfiop, IRDB_SDK::FileIR_t* firp, const std::string &out_file); - void CreateSegmap(const ELFIO::elfio *elfiop, IRDB_SDK::FileIR_t* firp, const std::string &out_file); - void SortSegmap(); - - - - }; - -// - - template <class T_Elf_Ehdr, class T_Elf_Phdr, class T_Elf_Addr, class T_Elf_Shdr, class T_Elf_Sym, class T_Elf_Rel, class T_Elf_Rela, class T_Elf_Dyn> class ElfWriterImpl : public ElfWriter { diff --git a/include/exewrite.h b/include/exewrite.h new file mode 100644 index 0000000000000000000000000000000000000000..d54f36f693893e9298aa2322cff95962845aca04 --- /dev/null +++ b/include/exewrite.h @@ -0,0 +1,110 @@ + +#ifndef ExeWriter_h +#define ExeWriter_h + +#include <vector> +#include <map> + + +class ExeWriter +{ + protected: + + + class PageData_t + { + + public: + PageData_t() : m_perms(0), is_relro(false), data(PAGE_SIZE), inuse(PAGE_SIZE) { } + + void union_permissions(int p_perms) { m_perms|=p_perms; } + + bool is_zero_initialized() const + { + for(unsigned int i=0;i<data.size();i++) + { + if(data.at(i)!=0) + return false; + } + return true; + } + + int m_perms; + bool is_relro; + + std::vector<unsigned char> data; + std::vector<bool> inuse; + }; + class LoadSegment_t + { + public: + LoadSegment_t() :filesz(0), memsz(0), filepos(0), start_page(0), m_perms(0) { } + + LoadSegment_t( unsigned int p_filesz, unsigned int p_memsz, unsigned int p_filepos, unsigned int p_start_page, unsigned int p_m_perms) + : + filesz(p_filesz), + memsz(p_memsz), + filepos(p_filepos), + start_page(p_start_page), + m_perms(p_m_perms) + { + + } + + + unsigned int filesz; + unsigned int memsz; + unsigned int filepos; + unsigned int start_page; + unsigned int m_perms; + + }; + using LoadSegmentVector_t = std::vector<LoadSegment_t*>; + using PageMap_t = std::map<IRDB_SDK::VirtualOffset_t, PageData_t>; + + public: + ExeWriter(IRDB_SDK::FileIR_t* firp, bool write_sections, bool bss_opts) : m_firp(firp), m_write_sections(write_sections), m_bss_opts(bss_opts) { } + virtual ~ExeWriter() {} + virtual void Write(const EXEIO::exeio *exeio, const std::string &out_file, const std::string &infile) = 0; + + + protected: + + IRDB_SDK::FileIR_t* m_firp; + bool m_write_sections; + bool m_bss_opts; + PageMap_t pagemap; + LoadSegmentVector_t segvec; + +#ifndef PAGE_SIZE + const int PAGE_SIZE=4096; +#endif + template <class T> + static T page_align(const T& in) + { + + return in&~(PAGE_SIZE-1); + } + template <class T> + static inline T page_round_down(const T& x) + { + return x & (~(PAGE_SIZE-1)); + } + template <class T> + static inline T page_round_up(const T& x) + { + return ( (((uintptr_t)(x)) + PAGE_SIZE-1) & (~(PAGE_SIZE-1)) ); + } + + IRDB_SDK::VirtualOffset_t DetectMinAddr(); + IRDB_SDK::VirtualOffset_t DetectMaxAddr(); + + void CreatePagemap(); + void CreateSegmap(); + void SortSegmap(); + +}; + + +#endif + diff --git a/include/pewrite.h b/include/pewrite.h index 92b551d3d44806289ec951fa703974a7afdb801f..bf98b2d1b1c7e2e8fbe2dd29fedcce7f7ad3ae4a 100644 --- a/include/pewrite.h +++ b/include/pewrite.h @@ -10,22 +10,18 @@ #endif -class PeWriter +class PeWriter : ExeWriter { public: - PeWriter(IRDB_SDK::FileIR_t* firp, bool write_sections, bool bss_opts) : m_firp(firp), m_write_sections(write_sections), m_bss_opts(bss_opts) { } + PeWriter(IRDB_SDK::FileIR_t* firp, bool write_sections, bool bss_opts) + : + ExeWriter(firp,write_sections,bss_opts) + { + } virtual ~PeWriter() {} void Write(const EXEIO::exeio *exeiop, const std::string &out_file, const std::string &infile); protected: - - FileIR_t* m_firp=nullptr; - bool m_write_sections=false; - bool m_bss_opts=false; - - - - }; diff --git a/include/zipr_all.h b/include/zipr_all.h index 63831e4fb0cbe71bc56405596993f66eee3f1594..7fcf39e746c14301f51d28cb1ad2af463abac72b 100644 --- a/include/zipr_all.h +++ b/include/zipr_all.h @@ -79,6 +79,7 @@ namespace zipr #include <zipr_impl.h> #include <zipr_optimizations.h> #include <zipr_stats.h> +#include <exewrite.h> #include <elfwrite.h> #include <pewrite.h> #include <ehwrite.h> diff --git a/src/elfwrite.cpp b/src/elfwrite.cpp index 7651deca1e2842e17fcd0e10b552e5f62b8635b9..af747e0e5f5dfc589dc151b4298e850d166e157f 100644 --- a/src/elfwrite.cpp +++ b/src/elfwrite.cpp @@ -34,18 +34,17 @@ static inline uintptr_t page_round_up(uintptr_t x) -void ElfWriter::Write(const ELFIO::elfio *elfiop, FileIR_t* firp, const string &out_file, const string &infile) +void ElfWriter::Write(const EXEIO::exeio *exeiop, const string &out_file, const string &infile) { - - FILE* fin=fopen(infile.c_str(), "r"); - FILE* fout=fopen(out_file.c_str(), "w"); + auto fin=fopen(infile.c_str(), "r"); + auto fout=fopen(out_file.c_str(), "w"); assert(fin && fout); - CreatePagemap(elfiop, firp, out_file); - CreateSegmap(elfiop, firp, out_file); + CreatePagemap(); + CreateSegmap(); //SortSegmap(); - VirtualOffset_t min_addr=DetectMinAddr(elfiop, firp, out_file); - VirtualOffset_t max_addr=DetectMaxAddr(elfiop, firp, out_file); + VirtualOffset_t min_addr=DetectMinAddr(); + VirtualOffset_t max_addr=DetectMaxAddr(); LoadEhdr(fin); LoadPhdrs(fin); @@ -59,8 +58,9 @@ void ElfWriter::Write(const ELFIO::elfio *elfiop, FileIR_t* firp, const string & } -VirtualOffset_t ElfWriter::DetectMinAddr(const ELFIO::elfio *elfiop, FileIR_t* firp, const string &out_file) +VirtualOffset_t ExeWriter::DetectMinAddr() { + auto firp=m_firp; VirtualOffset_t min_addr=(*(firp->getDataScoops().begin()))->getStart()->getVirtualOffset(); for(DataScoopSet_t::iterator it=firp->getDataScoops().begin(); it!=firp->getDataScoops().end(); ++it) { @@ -74,8 +74,9 @@ VirtualOffset_t ElfWriter::DetectMinAddr(const ELFIO::elfio *elfiop, FileIR_t* f } -VirtualOffset_t ElfWriter::DetectMaxAddr(const ELFIO::elfio *elfiop, FileIR_t* firp, const string &out_file) +VirtualOffset_t ExeWriter::DetectMaxAddr() { + auto firp=m_firp; VirtualOffset_t max_addr=(*(firp->getDataScoops().begin()))->getEnd()->getVirtualOffset(); for(DataScoopSet_t::iterator it=firp->getDataScoops().begin(); it!=firp->getDataScoops().end(); ++it) { @@ -89,8 +90,9 @@ VirtualOffset_t ElfWriter::DetectMaxAddr(const ELFIO::elfio *elfiop, FileIR_t* f } -void ElfWriter::CreatePagemap(const ELFIO::elfio *elfiop, FileIR_t* firp, const string &out_file) +void ExeWriter::CreatePagemap() { + auto firp=m_firp; // for(DataScoopSet_t::iterator it=firp->getDataScoops().begin(); it!=firp->getDataScoops().end(); ++it) // DataScoop_t* scoop=*it; @@ -144,7 +146,7 @@ void ElfWriter::CreatePagemap(const ELFIO::elfio *elfiop, FileIR_t* firp, const } } -void ElfWriter::SortSegmap() +void ExeWriter::SortSegmap() { // do one interation of a bubble sort to move the segement with the largest bss last. for (unsigned int i=0; i<segvec.size()-1;i++) @@ -160,7 +162,7 @@ void ElfWriter::SortSegmap() } } -void ElfWriter::CreateSegmap(const ELFIO::elfio *elfiop, FileIR_t* firp, const string &out_file) +void ExeWriter::CreateSegmap() { const auto should_bss_optimize= [&] (const PageData_t& perms) { diff --git a/src/pewrite.cpp b/src/pewrite.cpp index 5d9395e5ec8786eea6a1128d4fddeaf864d6c424..adaa0af51f469883f2df36fc2ed8d21479016a7d 100644 --- a/src/pewrite.cpp +++ b/src/pewrite.cpp @@ -30,10 +30,84 @@ static inline uintptr_t page_round_up(uintptr_t x) return ( (((uintptr_t)(x)) + PAGE_SIZE-1) & (~(PAGE_SIZE-1)) ); } +#if 0 +/* +Note: all multi-byte values are stored LSB first. One block is 512 bytes, one paragraph is 16 bytes. See also the entry in Ralf Brown's Interrupt List + +Offset (hex) +Meaning +00-01 0x4d, 0x5a. This is the "magic number" of an EXE file. The first byte of the file is 0x4d and the second is 0x5a. +02-03 The number of bytes in the last block of the program that are actually used. If this value is zero, that means the entire last block is used (i.e. the effective value is 512). +04-05 Number of blocks in the file that are part of the EXE file. If [02-03] is non-zero, only that much of the last block is used. +06-07 Number of relocation entries stored after the header. May be zero. +08-09 Number of paragraphs in the header. The program's data begins just after the header, and this field can be used to calculate the appropriate file offset. The header includes the relocation entries. Note that some OSs and/or programs may fail if the header is not a multiple of 512 bytes. +0A-0B Number of paragraphs of additional memory that the program will need. This is the equivalent of the BSS size in a Unix program. The program can't be loaded if there isn't at least this much memory available to it. +0C-0D Maximum number of paragraphs of additional memory. Normally, the OS reserves all the remaining conventional memory for your program, but you can limit it with this field. +0E-0F Relative value of the stack segment. This value is added to the segment the program was loaded at, and the result is used to initialize the SS register. +10-11 Initial value of the SP register. +12-13 Word checksum. If set properly, the 16-bit sum of all words in the file should be zero. Usually, this isn't filled in. +14-15 Initial value of the IP register. +16-17 Initial value of the CS register, relative to the segment the program was loaded at. +18-19 Offset of the first relocation item in the file. +1A-1B Overlay number. Normally zero, meaning that it's the main program. +Here is a structure that can be used to represent the EXE header and relocation entries, assuming a 16-bit LSB machine: +*/ +struct EXE { + unsigned short signature; /* == 0x5a4D */ + unsigned short bytes_in_last_block; + unsigned short blocks_in_file; + unsigned short num_relocs; + unsigned short header_paragraphs; + unsigned short min_extra_paragraphs; + unsigned short max_extra_paragraphs; + unsigned short ss; + unsigned short sp; + unsigned short checksum; + unsigned short ip; + unsigned short cs; + unsigned short reloc_table_offset; + unsigned short overlay_number; +}; + +struct EXE_RELOC { + unsigned short offset; + unsigned short segment; +}; +/* +The offset of the beginning of the EXE data is computed like this: + +exe_data_start = exe.header_paragraphs * 16L; +The offset of the byte just after the EXE data (in DJGPP, the size of the stub and the start of the COFF image) is computed like this: + +extra_data_start = exe.blocks_in_file * 512L; +if (exe.bytes_in_last_block) + extra_data_start -= (512 - exe.bytes_in_last_block); + */ +#endif +uint8_t dos_header[]= + { + /* 00000000 */ 0x4d, 0x5a, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, // |MZ..............| + /* 00000010 */ 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // |........@.......| + /* 00000020 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // |................| + /* 00000030 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, // |................| + /* 00000040 */ 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, // |........!..L.!Th| + /* 00000050 */ 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, // |is program canno| + /* 00000060 */ 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, // |t be run in DOS | + /* 00000070 */ 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x0d, 0x0d, 0x0a, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // |mode....$.......| + }; +/* + * notes: + * 0) A PE32+ file starts with a dos header (explained above). This is mostly ignored, except the word at byte 0x3c. + * 1) The 4-bytes at location 0x3c specify where the PE file header starts, this is the real meat of the heading. + * 2) the PE file header comes next. + */ + + void PeWriter::Write(const EXEIO::exeio *exeiop, const string &out_file, const string &infile) { +// const auto time_since_epoch=time(nullptr); assert(0); // to do diff --git a/src/zipr.cpp b/src/zipr.cpp index 3f658f1334f3923fe636823363425c919eec76cf..904fb6c67f9e01154b742c4229cc3a11fd8d0841 100644 --- a/src/zipr.cpp +++ b/src/zipr.cpp @@ -1916,14 +1916,14 @@ void ZiprImpl_t::OutputBinaryFile(const string &name) // create the output file in a totally different way using elfwriter. later we may // use this instead of the old way. - auto elfiop=reinterpret_cast<ELFIO::elfio*>(exeiop->get_elfio()); + //auto elfiop=reinterpret_cast<ELFIO::elfio*>(exeiop->get_elfio()); auto ew=unique_ptr<ElfWriter>(); ew.reset( bit_width == 64 ? (ElfWriter*)new ElfWriter64(m_firp, *m_add_sections, *m_bss_opts) : bit_width == 32 ? (ElfWriter*)new ElfWriter32(m_firp, *m_add_sections, *m_bss_opts) : throw invalid_argument("Unknown machine width") ); - ew->Write(elfiop,m_firp, output_filename, "a.ncexe"); + ew->Write(exeiop,output_filename, "a.ncexe"); ew.reset(nullptr); // explicitly free ew as we're done with it } else if (is_pe)