From 4f4bef374ccc3a50f7a14ee894a35e091383bb45 Mon Sep 17 00:00:00 2001 From: wghost Date: Mon, 23 Mar 2015 12:56:51 +0400 Subject: [PATCH] Pseudo-code decompiler and new [@] command --- HexToPseudoCode.cpp | 85 ++++ ModScript.cpp | 88 +++- ModScript.h | 1 + UENativeTablesReader.cpp | 69 +++ UPKInfo.h | 6 +- UPKUtils.cbp | 76 ++- UPKUtils.cpp | 1 + UToken.cpp | 705 +++++++++++++++++++++++++++ UToken.h | 920 +++++++++++++++++++++++++++++++++++ UTokenFactory.cpp | 361 ++++++++++++++ UTokenFactory.h | 14 + build/CMakeLists.txt | 4 + doc/PatchUPK_Mod_Example.txt | 54 +- doc/PatchUPK_Readme.txt | 222 ++++----- doc/UPKUtils_Readme.txt | 80 ++- 15 files changed, 2500 insertions(+), 186 deletions(-) create mode 100644 HexToPseudoCode.cpp create mode 100644 UENativeTablesReader.cpp create mode 100644 UToken.cpp create mode 100644 UToken.h create mode 100644 UTokenFactory.cpp create mode 100644 UTokenFactory.h diff --git a/HexToPseudoCode.cpp b/HexToPseudoCode.cpp new file mode 100644 index 0000000..552bd0b --- /dev/null +++ b/HexToPseudoCode.cpp @@ -0,0 +1,85 @@ +#include +#include + +#include "UPKUtils.h" +#include "UToken.h" + +using namespace std; + +string GetFilename(string str) +{ + unsigned found = str.find_last_of("/\\"); + return str.substr(found + 1); +} + +int main(int argN, char* argV[]) +{ + //cout << "HexToPseudoCode" << endl; + + if (argN < 3 || argN > 4) + { + cerr << "Usage: HexToPseudoCode UnpackedResourceFile.upk ObjectName [/d]" << endl; + return 1; + } + + UPKUtils package(argV[1]); + + UPKReadErrors err = package.GetError(); + + if (err != UPKReadErrors::NoErrors) + { + cerr << "Error reading package:\n" << FormatReadErrors(err); + if (package.IsCompressed()) + cerr << "Compression flags:\n" << FormatCompressionFlags(package.GetCompressionFlags()); + return 1; + } + + string NameToFind = argV[2]; + + //cout << "Object to find: " << NameToFind << endl; + + UObjectReference ObjRef = package.FindObject(NameToFind, false); + + if (ObjRef == 0) + { + cerr << "Unable to find object entry by name " << NameToFind << endl; + return 1; + } + if (ObjRef > 0 && argN == 4 && string(argV[3]) == "/d") + { + package.SaveExportData((uint32_t)ObjRef); + } + if (ObjRef > 0) + { + //cout << "Found Export Object:\n" << package.FormatExport(ObjRef, true); + } + else + { + //cout << "Found Import Object:\n" << package.FormatImport(-ObjRef, true); + } + + if (ObjRef <= 0 || (package.GetExportEntry(ObjRef).Type != "Function" && package.GetExportEntry(ObjRef).Type != "State")) + { + cerr << "Object is not a Function nor a State, can not convert to pseudo-code!\n"; + return 1; + } + + //cout << "Attempting deserialization:\n"; + + vector ObjData = package.GetExportData(ObjRef); + stringstream stream; + stream.write(ObjData.data(), ObjData.size()); + size_t ScrPos = package.GetScriptRelOffset(ObjRef); + stream.seekg(ScrPos); + + UScriptCode ScrCode; + string PseudoCode = ScrCode.Deserialize(stream, package); + cout << "//This script was generated by HexToPseudoCode decompiler for use with PatchUPK/PatcherGUI tool\n" + << "UPK_FILE = " << GetFilename(argV[1]) << "\n" + << "OBJECT = " << NameToFind << " : AUTO\n" + << "[REPLACEMENT_CODE]\n" + << PseudoCode; + + return 0; +} + diff --git a/ModScript.cpp b/ModScript.cpp index 79bb2de..9545ee1 100644 --- a/ModScript.cpp +++ b/ModScript.cpp @@ -3,6 +3,7 @@ #include #include #include +#include void ModScript::SetExecutors() { @@ -1856,6 +1857,11 @@ std::string GetWord(std::istream& in) } return word; } + /// extract marker + if (ch == '(' || ch == ')') + { + return word; + } /// extract HEX if (isxdigit(ch)) { @@ -1900,6 +1906,7 @@ std::string ModScript::ParseScript(std::string ScriptData, unsigned* ScriptMemSi std::ostringstream ScriptHEX; std::istringstream WorkingData(ScriptData); std::map Labels; + std::stack MarkerLabels; unsigned ScriptMemSize = 0, MemSize = 0; bool needSecondPass = false; unsigned numPasses = 0; @@ -1912,6 +1919,12 @@ std::string ModScript::ParseScript(std::string ScriptData, unsigned* ScriptMemSi WorkingData.clear(); ScriptHEX.str(""); ScriptHEX.clear(); + if (MarkerLabels.size() != 0) + { + *ErrorMessages << "Unresolved marker(s) found!" << std::endl; + SetBad(); + return std::string(""); + } } while (!WorkingData.eof()) { @@ -1951,15 +1964,41 @@ std::string ModScript::ParseScript(std::string ScriptData, unsigned* ScriptMemSi Command = EatWhite(Command); /// remove white-spaces if (Command[0] == '@') /// label reference { - if (Labels.count(Command.substr(1)) != 0) /// found reference + if (Command.length() == 1) /// [@] - auto-calculate memory size { - uint16_t LabelPos = Labels[Command.substr(1)]; - ScriptHEX << MakeTextBlock(reinterpret_cast(&LabelPos), 2); + if (numPasses == 0) /// first pass - create label + { + std::string autoLabel = "__automemsize__" + FormatHEX(ScriptMemSize); + if (Labels.count(autoLabel) != 0) + { + *ErrorMessages << "Internal error! Duplicated auto-label: " << autoLabel << std::endl; + SetBad(); + return std::string(""); + } + Labels[autoLabel] = 0; /// init new label + MarkerLabels.push(autoLabel); /// keep track of unresolved labels + needSecondPass = true; /// request second pass + ScriptHEX << "[@" << autoLabel << "] "; /// put auto-generated label back into the stream + } + else + { + *ErrorMessages << "Internal error! Unresolved command: " << NextWord << std::endl; + SetBad(); + return std::string(""); + } } - else + else /// [@label_name] - resolve named labels { - ScriptHEX << NextWord << " "; - needSecondPass = true; + if (Labels.count(Command.substr(1)) != 0) /// found reference + { + uint16_t LabelPos = Labels[Command.substr(1)]; + ScriptHEX << MakeTextBlock(reinterpret_cast(&LabelPos), 2); + } + else + { + ScriptHEX << NextWord << " "; + needSecondPass = true; + } } if (numPasses == 0) ScriptMemSize += 2; @@ -1976,6 +2015,36 @@ std::string ModScript::ParseScript(std::string ScriptData, unsigned* ScriptMemSi Labels[Command.substr(1)] = ScriptMemSize; } } + else if(IsMarker(NextWord)) /// resolve memory size markers '(' and ')' + { + if (numPasses != 0) /// should not be here at the second pass + { + *ErrorMessages << "Internal error! Unresolved marker: " << NextWord << std::endl; + SetBad(); + return std::string(""); + } + if (NextWord == "(") /// start position + { + if (MarkerLabels.size() == 0 || Labels[MarkerLabels.top()] != 0) + { + *ErrorMessages << "Bad marker: " << NextWord << std::endl; + SetBad(); + return std::string(""); + } + Labels[MarkerLabels.top()] = ScriptMemSize; + } + else /// end position + { + if (MarkerLabels.size() == 0 || Labels[MarkerLabels.top()] == 0) + { + *ErrorMessages << "Bad marker: " << NextWord << std::endl; + SetBad(); + return std::string(""); + } + Labels[MarkerLabels.top()] = ScriptMemSize - Labels[MarkerLabels.top()]; + MarkerLabels.pop(); /// close the current marker + } + } else { *ErrorMessages << "Bad token: " << NextWord << std::endl; @@ -2013,6 +2082,13 @@ bool ModScript::IsCommand(std::string word) return (word.front() == '[' && word.back() == ']'); } +bool ModScript::IsMarker(std::string word) +{ + if (word.length() != 1) /// one symbol '(' or ')' + return false; + return (word.front() == '(' || word.back() == ')'); +} + std::string ModScript::TokenToHEX(std::string Token, unsigned* MemSizeRef) { std::string Code = Token.substr(1, Token.length()-2); /// remove <> diff --git a/ModScript.h b/ModScript.h index df7246e..2f92632 100644 --- a/ModScript.h +++ b/ModScript.h @@ -145,6 +145,7 @@ class ModScript bool IsHEX(std::string word); bool IsToken(std::string word); bool IsCommand(std::string word); + bool IsMarker(std::string word); std::string TokenToHEX(std::string Token, unsigned* MemSizeRef = nullptr); }; diff --git a/UENativeTablesReader.cpp b/UENativeTablesReader.cpp new file mode 100644 index 0000000..2e2244f --- /dev/null +++ b/UENativeTablesReader.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +using namespace std; + +#define MAGIC (uint32_t)747443441 + +int main(int argN, char* argV[]) +{ + cout << "UENativeTablesReader" << endl; + + if (argN != 2) + { + cerr << "Usage: UENativeTablesReader NativeTable.NTL" << endl; + return 1; + } + + ifstream table(argV[1], ios::binary); + if (!table.is_open()) + { + cerr << "Can't open " << argV[1] << endl; + return 1; + } + + uint32_t magic; + table.read(reinterpret_cast(&magic), sizeof(magic)); + if (magic != MAGIC) + { + cerr << "Input file is not a NTL file!\n"; + return 1; + } + + uint32_t num; + table.read(reinterpret_cast(&num), sizeof(num)); + if (num < 0 || num > 0xFFF) + { + cerr << "Input file is not a NTL file!\n"; + return 1; + } + + cout << "Num = " << num << endl; + + cout << "HEX\tName\t\t\t\tOpPrec\tType\tToken\n"; + + for (unsigned i = 0; i < num; ++i) + { + uint8_t NameLen; + string Name; + uint8_t OperPrecedence; + uint8_t Type; + uint32_t ByteToken; + char ch[255]; + + table.read(reinterpret_cast(&NameLen), sizeof(NameLen)); + table.read(ch, NameLen); + Name = string(ch, NameLen); + table.read(reinterpret_cast(&OperPrecedence), sizeof(OperPrecedence)); + table.read(reinterpret_cast(&Type), sizeof(Type)); + table.read(reinterpret_cast(&ByteToken), sizeof(ByteToken)); + + cout << "0x" << setfill('0') << setw(2) << hex << ByteToken << "\t" << Name + << "\t\t\t\t" + << dec << (int)OperPrecedence << "\t" << (int)Type << "\t" << ByteToken << endl; + } + + return 0; +} diff --git a/UPKInfo.h b/UPKInfo.h index df5fc87..6322a3c 100644 --- a/UPKInfo.h +++ b/UPKInfo.h @@ -160,7 +160,7 @@ class UPKInfo { public: /// constructors - UPKInfo(): Summary(), NoneIdx(0), ReadError(UPKReadErrors::NoErrors), Compressed(false), CompressedChunk(false) {}; + UPKInfo(): Summary(), NoneIdx(0), ReadError(UPKReadErrors::NoErrors), Compressed(false), CompressedChunk(false), LastAccessedExportObjIdx(0) {}; UPKInfo(std::istream& stream); /// destructor ~UPKInfo() {}; @@ -187,7 +187,8 @@ class UPKInfo bool IsCompressed() { return Compressed; } bool IsFullyCompressed() { return (Compressed && CompressedChunk); } UPKReadErrors GetError() { return ReadError; } - uint32_t GetCompressionFlags() { return Summary.CompressionFlags; } + uint32_t GetCompressionFlags() { return Summary.CompressionFlags; } + UObjectReference GetLastAccessedExportObjIdx() { return LastAccessedExportObjIdx; } /// format header to text string std::string FormatCompressedHeader(); std::string FormatSummary(); @@ -208,6 +209,7 @@ class UPKInfo bool Compressed; bool CompressedChunk; FCompressedChunkHeader CompressedHeader; + UObjectReference LastAccessedExportObjIdx; }; /// helper functions diff --git a/UPKUtils.cbp b/UPKUtils.cbp index 76ad228..c60bd9c 100644 --- a/UPKUtils.cbp +++ b/UPKUtils.cbp @@ -76,6 +76,22 @@