#include #include #include #include #include #include #include #include #include #include #include #define ROMSIZE 32768 std::regex_constants::syntax_option_type regex_flags = std::regex::ECMAScript | std::regex::icase; unsigned parse_arg(const std::string& arg); struct location { public: location() { loc = ""; } location(const std::string& filename) { loc = filename; } location(const std::string& filename, uint64_t linenum) { std::ostringstream x; x << filename << ":" << linenum; loc = x.str(); } operator std::string() const { return loc; } private: std::string loc; }; struct assembly_error : public std::runtime_error { public: assembly_error(const location& loc, const std::string& msg) : std::runtime_error(tostring(loc, msg)) {} private: static std::string tostring(const location& loc, const std::string& msg) { std::ostringstream x; x << (std::string)loc << ": " << msg; return x.str(); } }; struct regex { private: bool something; std::regex r; size_t hash_start; size_t hash_end; unsigned hash; static unsigned hash_range(const std::string& str, size_t start, size_t end) { //This is FNV-1a. unsigned hash = 2166136261; for(size_t i = start; i < end; i++) { hash ^= ((unsigned char)str[i] & 0xdf); //Mask bit for ignore case. hash *= 16777619; } return hash; } size_t pick_hash_end(const std::string& str) { size_t i; for(i = 0; i < str.length(); i++) if(strchr("^$\\.*+?()[]{}| ", (unsigned char)str[i])) break; return i; } public: struct match { public: match() : m(false) {} match(std::list sub) : m(true), ms(sub.begin(), sub.end()) {} operator bool() const { return m; } bool operator!() const { return !m; } size_t size() const { return ms.size(); } const std::string& operator[](size_t i) const { return ms[i]; } private: bool m; std::vector ms; }; regex() : something(false), hash_start(0), hash_end(0), hash(0) { } regex(const std::string& rgx, std::regex_constants::syntax_option_type flags) : something(true), r(rgx, flags), hash_start(0), hash_end(pick_hash_end(rgx)), hash(hash_range(rgx, hash_start, hash_end)) { } match operator()(const std::string& input) { std::smatch matches; if(!something || hash_end > input.length()) return match(); if(hash_range(input, hash_start, hash_end) != hash) return match(); if(!std::regex_match(input, matches, r)) return match(); return match(std::list(matches.begin(), matches.end())); } }; template struct regex_map_match { public: regex_map_match(regex::match _m, const T& _a) : m(_m), a(_a) {} operator bool() const { return m; } bool operator!() const { return !m; } const regex::match& get_match() const { return m; } const T& get_arg() const { return a; } private: regex::match m; T a; }; template struct regex_map { public: void add(const regex& r, const T& arg) { exlist.push_back(std::make_pair(r, arg)); } regex_map_match operator()(const std::string& line) { regex::match m; for(auto& i : exlist) { if((m = i.first(line))) return regex_map_match(m, i.second); } return regex_map_match(regex::match(), T()); } private: std::list> exlist; }; struct instruction_data { enum attr { BLANK = -1, NONE = 0, BYTE = 1, WORD = 2, REL = 3, LABEL = 4, MULTINOP = 5, RAWBYTES = 6, }; instruction_data() : opcode(0), attribute(BLANK) {} instruction_data(attr _attr) : opcode(0), attribute(_attr) {} instruction_data(unsigned _opc, attr _attr) : opcode(_opc), attribute(_attr) {} unsigned opcode; attr attribute; static attr pattern_attribute(const char* pattern) { attr a = NONE; for(const char* i = pattern; *i; i++) { if(*i == '%') { switch(*(++i)) { case 'b': a = combine_attr(a, BYTE); break; case 'B': a = combine_attr(a, BYTE); break; case 'w': a = combine_attr(a, WORD); break; case 'r': a = combine_attr(a, REL); break; case 's': a = combine_attr(a, BYTE); break; case 'S': a = combine_attr(a, BYTE); break; } } } return a; } static regex pattern_regex(const char* pattern) { std::ostringstream x; for(const char* i = pattern; *i; i++) { int ch = (unsigned char)*i; if(ch == '%') { int ch2 = (unsigned char)*(++i); switch(ch2) { case 'b': x << "([[:xdigit:]]{2})"; break; case 'B': x << "([[:xdigit:]]{2}|/\\w+(\\.\\w+)?|/\\.\\w+)"; break; case 'w': x << "([[:xdigit:]]{4}|/\\w+(\\.\\w+)?|/\\.\\w+)"; break; case 'r': x << "(/\\w+(\\.\\w+)?|/\\.\\w+)"; break; case 's': x << "([+-][0-7][[:xdigit:]]|-80)"; break; case 'S': x << "([+-]?[0-7][[:xdigit:]]|-80)"; break; case '%': x << "%"; break; default: throw std::logic_error("Unknown % replacement"); } } else if(strchr("^$\\.*+?()[]{}|", ch)) x << "\\" << (char)ch; else if(ch == ',') x << " ?, ?"; else x << (char)ch; } return regex(x.str(), regex_flags); } private: static bool is_dummy_attr(attr a) { return (a == BLANK || a == NONE); } static attr combine_attr(attr old, attr _new) { if(!is_dummy_attr(old) && !is_dummy_attr(_new)) throw std::runtime_error("Conflicting attributes"); return is_dummy_attr(_new) ? old : _new; } }; struct label { public: label() {} label(const std::string& _name, int _offset, location loc) : name(_name), offset(_offset), locator(loc) {} std::string get_name() { return name; } int get_offset() { return offset; } location get_location() { return locator; } void qualify(const std::string& _block) { if(name != "" && name[0] == '.') name = _block + name; } private: std::string name; int offset; location locator; }; struct region_map { region_map(unsigned _total) { ranges.insert(std::make_pair(0, _total)); used = 0; } void reserve(unsigned addr, unsigned size, const std::string& name) { if(!size) return; unsigned aaddr = 0, aend = 0; for(auto i : ranges) { if(i.first <= addr && i.second > addr) { aaddr = i.first; aend = i.second; break; } } if(aend < addr + size) throw std::runtime_error("Trying to reserve already reserved region"); ranges.erase(std::make_pair(aaddr, aend)); if(aaddr < addr) ranges.insert(std::make_pair(aaddr, addr)); if(addr + size < aend) ranges.insert(std::make_pair(addr + size, aend)); if(getenv("GBASM_SHOW_LAYOUT")) { std::ostringstream x; x << "Reserved " << std::hex << addr << "-" << (addr + size - 1) << " for " << name << std::endl; std::cout << x.str(); } used = std::max(used, addr + size); } unsigned alloc(unsigned size, const std::string& name) { if(!size) return 0; unsigned aaddr = 0, aend = 0; for(auto i : ranges) { if(i.second - i.first >= size) { aaddr = i.first; aend = i.second; break; } } if(aend - aaddr < size) throw std::runtime_error("Out of ROM space"); ranges.erase(std::make_pair(aaddr, aend)); if(aaddr + size < aend) ranges.insert(std::make_pair(aaddr + size, aend)); used = std::max(used, aaddr + size); if(getenv("GBASM_SHOW_LAYOUT")) { std::ostringstream x; x << "Allocated " << std::hex << aaddr << "-" << (aaddr + size - 1) << " for " << name << std::endl; std::cout << x.str(); } return aaddr; } size_t get_used() { return used; } private: std::set> ranges; unsigned used; }; struct block { enum reloc { REL = 0, ABS = 1, FFABS = 2, }; block() { assigned_addr = -1; } block(const std::string& _name, const location& _locator) { assigned_addr = -1; name = _name; locator = _locator; } const std::string& get_name() { return name; } const location& get_location() { return locator; } bool has_assigned_addr() { return (assigned_addr >= 0); } unsigned get_assigned_addr() { return assigned_addr; } void assign_address(unsigned addr, std::map& lmap) { assigned_addr = addr; for(auto& i : labels) lmap[i.get_name()] = assigned_addr + i.get_offset(); } unsigned get_size() { return bytes.size(); } void qualify_labels() { for(auto& j : relocations) j.first.qualify(name); for(auto& j : labels) j.qualify(name); } void add_label(const std::string& _name, int _offset, location loc) { labels.push_back(label(_name, _offset, loc)); } void parse_line(regex_map& ptable, const std::string& line, const location& locator) { if(line == "") return; auto idata = ptable(line); if(idata) { auto& idatam = idata.get_match(); auto& idataa = idata.get_arg(); std::string arg = (idatam.size() > 1) ? idatam[1] : ""; if(idataa.attribute == instruction_data::LABEL) { add_label(arg, bytes.size(), locator); } else if(idataa.attribute == instruction_data::RAWBYTES) { add_bytes(arg); } else { add_instruction(idataa.opcode, idataa.attribute, arg, locator); } return; } throw std::runtime_error("Unrecognized instruction"); } void write(unsigned char* rom, std::map& labels) { if(!has_assigned_addr() || get_assigned_addr() + get_size() > 32768) throw assembly_error(locator, "Invalid block assigned address."); unsigned addrbase = get_assigned_addr(); int offset; for(size_t j = 0; j < bytes.size(); j++) rom[addrbase + j] = bytes[j]; for(auto& i : relocations) { unsigned roff = addrbase + i.first.get_offset(); location errloc = i.first.get_location(); if(!labels.count(i.first.get_name())) throw assembly_error(errloc, "Undefined reference to '" + i.first.get_name() + "'"); unsigned loff = labels[i.first.get_name()]; switch(i.second) { case block::REL: offset = loff - (roff + 1); if(offset < -128 || offset > 127) throw assembly_error(errloc, "Jump out of range"); rom[roff] = offset & 0xFF; break; case block::ABS: rom[roff] = loff & 0xFF; rom[roff + 1] = loff >> 8; break; case block::FFABS: if((loff >> 8) != 0xFF) throw assembly_error(errloc, "Label in ldh does not point to FFxx"); rom[roff] = loff & 0xFF; break; } } } void check_duplicate_labels(std::map& labels_seen) { for(auto& j : labels) { if(labels_seen.count(j.get_name())) throw assembly_error(j.get_location(), "Duplicate label'" + j.get_name() + "' (previously seen at " + (std::string)labels_seen[j.get_name()] + ")"); labels_seen[j.get_name()] = j.get_location(); } } static void make_regex_table(regex_map& table, const char** patterns) { for(unsigned i = 0; i < 512; i++) { if(!patterns[i]) continue; instruction_data::attr a = instruction_data::pattern_attribute(patterns[i]); auto rgx = instruction_data::pattern_regex(patterns[i]); table.add(rgx, instruction_data(i, a)); } for(unsigned i = 0; i < 256; i++) { char buf[6] = {'x', 'x', 'x', 0, 0, 0}; buf[4] = "0123456789abcdef"[i / 16]; buf[5] = "0123456789abcdef"[i % 16]; table.add(regex(buf, regex_flags), instruction_data(i, instruction_data::NONE)); } //The label does not allow component before '.'. table.add(regex("(.?\\w+):", regex_flags), instruction_data(instruction_data::LABEL)); table.add(regex("nops ([[:xdigit:]]+|\\$\\S+)", regex_flags), instruction_data(instruction_data::MULTINOP)); table.add(regex("data ([[:xdigit:]]{2}( [[:xdigit:]]{2})*)", regex_flags), instruction_data(instruction_data::RAWBYTES)); } private: std::list