From bfa3a3bf404f5c86971a2f5a83520fb83fc2b9bd Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Wed, 14 Aug 2024 12:48:38 +0100 Subject: [PATCH] int2ptr support --- ir/globals.cpp | 1 + ir/globals.h | 1 + ir/instr.cpp | 1 + ir/memory.cpp | 174 ++------ ir/memory.h | 12 +- ir/pointer.cpp | 408 ++++++++++++++---- ir/pointer.h | 25 +- smt/exprs.cpp | 5 + smt/exprs.h | 10 + tests/alive-tv/asm/gep_mem.srctgt.ll | 16 + tests/alive-tv/int2ptr/gep_cmp.srctgt.ll | 11 + tests/alive-tv/int2ptr/intro.srctgt.ll | 15 + .../removal.srctgt.ll} | 0 tools/transform.cpp | 11 +- 14 files changed, 448 insertions(+), 242 deletions(-) create mode 100644 tests/alive-tv/asm/gep_mem.srctgt.ll create mode 100644 tests/alive-tv/int2ptr/gep_cmp.srctgt.ll create mode 100644 tests/alive-tv/int2ptr/intro.srctgt.ll rename tests/alive-tv/{int2ptr.srctgt.ll => int2ptr/removal.srctgt.ll} (100%) diff --git a/ir/globals.cpp b/ir/globals.cpp index cce50a491..f3b2647ba 100644 --- a/ir/globals.cpp +++ b/ir/globals.cpp @@ -31,6 +31,7 @@ unsigned strlen_unroll_cnt = 8; unsigned memcmp_unroll_cnt = 8; bool little_endian = true; bool observes_addresses = true; +bool has_int2ptr = true; bool has_alloca = true; bool has_fncall = true; bool has_write_fncall = true; diff --git a/ir/globals.h b/ir/globals.h index 95e98ea55..8fc3d7e37 100644 --- a/ir/globals.h +++ b/ir/globals.h @@ -62,6 +62,7 @@ extern bool little_endian; /// Whether pointer addresses are observed extern bool observes_addresses; +extern bool has_int2ptr; /// Whether there is an alloca extern bool has_alloca; diff --git a/ir/instr.cpp b/ir/instr.cpp index 623d6e84d..3fa147b97 100644 --- a/ir/instr.cpp +++ b/ir/instr.cpp @@ -3803,6 +3803,7 @@ StateValue GEP::toSMT(State &s) const { AndExpr inbounds_np; AndExpr idx_all_zeros; + // FIXME: not implemented for physical pointers if (inbounds) inbounds_np.add(ptr.inbounds()); diff --git a/ir/memory.cpp b/ir/memory.cpp index 007be9442..983540b1a 100644 --- a/ir/memory.cpp +++ b/ir/memory.cpp @@ -441,7 +441,7 @@ expr Byte::refined(const Byte &other) const { if (asm_mode) { extra = !is_ptr2 && other.boolNonptrNonpoison() && - castPtrToInt() == v2; + castPtrToInt() == other.nonptrValue(); } ptr_cnstr = ptrNonpoison().implies( (other.ptrNonpoison() && @@ -869,11 +869,7 @@ weak_ordering Memory::MemBlock::operator<=>(const MemBlock &rhs) const { static set all_leaf_ptrs(const Memory &m, const expr &ptr) { set ptrs; for (auto &ptr_val : ptr.leafs()) { - Pointer p(m, ptr_val); - auto offset = p.getOffset(); - for (auto &bid : p.getBid().leafs()) { - ptrs.emplace(m, bid, offset); - } + ptrs.emplace(m, ptr_val); } return ptrs; } @@ -881,7 +877,7 @@ static set all_leaf_ptrs(const Memory &m, const expr &ptr) { static set extract_possible_local_bids(Memory &m, const expr &eptr) { set ret; for (auto &ptr : all_leaf_ptrs(m, eptr)) { - if (!ptr.isLocal().isFalse()) + if (!ptr.isLocal().isFalse() && !ptr.isLogical().isFalse()) ret.emplace(ptr.getShortBid()); } return ret; @@ -906,13 +902,17 @@ unsigned Memory::numNonlocals() const { } expr Memory::isBlockAlive(const expr &bid, bool local) const { + uint64_t bid_n; + if (!local && bid.isUInt(bid_n) && always_alive(bid_n)) + return true; + return load_bv(local ? local_block_liveness : non_local_block_liveness, bid) && (!local && has_null_block && !null_is_dereferenceable ? bid != 0 : true); } bool Memory::mayalias(bool local, unsigned bid0, const expr &offset0, - unsigned bytes, uint64_t align, bool write) const { + const expr &bytes, uint64_t align, bool write) const { if (local && bid0 >= next_local_bid) return false; @@ -969,19 +969,14 @@ bool Memory::mayalias(bool local, unsigned bid0, const expr &offset0, } else if (local) // allocated in another branch return false; - if (local || !always_alive(bid0)) { - if ((local ? local_block_liveness : non_local_block_liveness) - .extract(bid0, bid0).isZero()) - return false; - } + if (isBlockAlive(bid, local).isFalse()) + return false; return true; } -Memory::AliasSet Memory::computeAliasing(const Pointer &ptr, unsigned bytes, +Memory::AliasSet Memory::computeAliasing(const Pointer &ptr, const expr &bytes, uint64_t align, bool write) const { - assert(bytes % (bits_byte/8) == 0); - AliasSet aliasing(*this); auto sz_local = next_local_bid; auto sz_nonlocal = aliasing.size(false); @@ -1004,7 +999,7 @@ Memory::AliasSet Memory::computeAliasing(const Pointer &ptr, unsigned bytes, AliasSet this_alias = aliasing; auto is_local = p.isLocal(); auto shortbid = p.getShortBid(); - expr offset = p.getOffset(); + expr offset = p.getOffset(); uint64_t bid; if (shortbid.isUInt(bid) && (!isAsmMode() || p.isInbounds(true).isTrue())) { if (!is_local.isFalse() && bid < sz_local) @@ -1054,9 +1049,10 @@ Memory::AliasSet Memory::computeAliasing(const Pointer &ptr, unsigned bytes, return aliasing; } -void Memory::access(const Pointer &ptr, unsigned bytes, uint64_t align, +void Memory::access(const Pointer &ptr, const expr &bytes, uint64_t align, bool write, const function &fn) { + assert(!ptr.isLogical().isFalse()); auto aliasing = computeAliasing(ptr, bytes, align, write); unsigned has_local = aliasing.numMayAlias(true); unsigned has_nonlocal = aliasing.numMayAlias(false); @@ -1072,25 +1068,17 @@ void Memory::access(const Pointer &ptr, unsigned bytes, uint64_t align, auto sz_local = aliasing.size(true); auto sz_nonlocal = aliasing.size(false); +#define call_fn(block, local, cond_log) \ + Pointer this_ptr(*this, i, local); \ + fn(block, this_ptr + offset, is_singleton ? expr(true) : (cond_log)); + for (unsigned i = 0; i < sz_local; ++i) { if (!aliasing.mayAlias(true, i)) continue; - Pointer this_ptr(*this, i, true); - expr cond_eq; - - if (isAsmMode() && !ptr.isInbounds(true).isTrue()) { - // in asm mode, all pointers have full provenance - cond_eq = ptr.isInboundsOf(this_ptr, bytes); - this_ptr += addr - this_ptr.getAddress(); - } else { - auto n = expr::mkUInt(i, Pointer::bitsShortBid()); - cond_eq - = has_local == 1 ? is_local : (bid == (has_both ? one.concat(n) : n)); - this_ptr += offset; - } - - fn(local_block_val[i], ptr, is_singleton ? expr(true) : std::move(cond_eq)); + auto n = expr::mkUInt(i, Pointer::bitsShortBid()); + call_fn(local_block_val[i], true, + has_local == 1 ? is_local : (bid == (has_both ? one.concat(n) : n))) } for (unsigned i = 0; i < sz_nonlocal; ++i) { @@ -1102,20 +1090,9 @@ void Memory::access(const Pointer &ptr, unsigned bytes, uint64_t align, // If aliasing info says it can, either imprecise analysis or incorrect // block id encoding is happening. assert(!is_fncall_mem(i)); - Pointer this_ptr(*this, i, false); - expr cond_eq; - if (isAsmMode() && !ptr.isInbounds(true).isTrue()) { - // in asm mode, all pointers have full provenance - cond_eq = ptr.isInboundsOf(this_ptr, bytes); - this_ptr += addr - this_ptr.getAddress(); - } else { - cond_eq = has_nonlocal == 1 ? !is_local : bid == i; - this_ptr += offset; - } - - fn(non_local_block_val[i], this_ptr, - is_singleton ? expr(true) : std::move(cond_eq)); + call_fn(non_local_block_val[i], false, + has_nonlocal == 1 ? !is_local : bid == i) } } @@ -1169,7 +1146,7 @@ vector Memory::load(const Pointer &ptr, unsigned bytes, set &undef, } }; - access(ptr, bytes, align, false, fn); + access(ptr, expr::mkUInt(bytes, bits_size_t), align, false, fn); vector ret; for (auto &disj : loaded) { @@ -1261,7 +1238,8 @@ void Memory::store(const Pointer &ptr, blk.undef.insert(undef.begin(), undef.end()); }; - access(ptr, bytes, align, !state->isInitializationPhase(), fn); + access(ptr, expr::mkUInt(bytes, bits_size_t), align, + !state->isInitializationPhase(), fn); } void Memory::storeLambda(const Pointer &ptr, const expr &offset, @@ -1309,10 +1287,7 @@ void Memory::storeLambda(const Pointer &ptr, const expr &offset, blk.undef.insert(undef.begin(), undef.end()); }; - uint64_t size = bits_byte / 8; - bytes.isUInt(size); - - access(ptr, size, align, true, fn); + access(ptr, bytes.zextOrTrunc(bits_size_t), align, true, fn); } static bool memory_unused() { @@ -1640,6 +1615,9 @@ expr Memory::mkInput(const char *name, const ParamAttrs &attrs0) { unsigned max_bid = has_null_block + num_globals_src + next_ptr_input++; assert(max_bid < num_nonlocals_src); + // FIXME: doesn't consider physical ptrs + // consider how to do POR? + auto attrs = attrs0; attrs.set(ParamAttrs::IsArg); Pointer p(*this, name, attrs); @@ -2395,7 +2373,7 @@ void Memory::copy(const Pointer &src, const Pointer &dst) { dst_blk.type |= blk.type; has_bv_val |= 1u << blk.val.isBV(); }; - access(src, bits_byte/8, bits_byte/8, false, fn); + access(src, expr::mkUInt(bits_byte/8, bits_size_t), bits_byte/8, false, fn); // if we have mixed array/non-array blocks, switch them all to array if (has_bv_val == 3) { @@ -2415,93 +2393,18 @@ void Memory::fillPoison(const expr &bid) { std::move(blksz), bits_byte / 8, {}, false); } -expr Memory::ptr2int(const expr &ptr) const { +expr Memory::ptr2int(const expr &ptr) { assert(!memory_unused() && observesAddresses()); Pointer p(*this, ptr); + observesAddr(p); state->addUB(!p.isNocapture()); return p.getAddress(); } -Pointer Memory::searchPointer(const expr &val0) const { - DisjointExpr ret; - expr val = val0.zextOrTrunc(bits_ptr_address); - - auto add = [&](unsigned limit, bool local) { - for (unsigned i = 0; i != limit; ++i) { - Pointer p(*this, i, local); - Pointer p_end = p + p.blockSize(); - ret.add(p + (val - p.getAddress()), - !local && i == 0 && has_null_block - ? val == 0 - : val.uge(p.getAddress()) && val.ult(p_end.getAddress())); - } - }; - add(numLocals(), true); - add(numNonlocals(), false); - return *std::move(ret)(); -} - -expr Memory::int2ptr(const expr &val0) const { +expr Memory::int2ptr(const expr &val) const { assert(!memory_unused() && observesAddresses()); - if (isAsmMode()) { - DisjointExpr ret(Pointer::mkNullPointer(*this).release()); - expr val = val0; - OrExpr domain; - bool processed_all = true; - - // Try to optimize the conversion - // Note that the result of int2ptr is always used to dereference the ptr - // Hence we can throw away null & OOB pointers - // Also, these pointers must have originated from ptr->int type punning - // so they must have a (blk_addr bid) expression in them (+ some offset) - for (auto [e, cond] : DisjointExpr(val, 5)) { - auto blks = e.get_apps_of("blk_addr", "local_addr!"); - if (blks.empty()) { - expr subst = false; - if (cond.isNot(cond)) - subst = true; - val = val.subst(cond, subst); - continue; - } - // There's only one possible bid in this expression - if (blks.size() == 1) { - auto &fn = *blks.begin(); - expr bid; - if (fn.fn_name().starts_with("local_addr!")) { - for (auto &[bid0, addr] : local_blk_addr) { - auto blks = addr.get_apps_of("blk_addr", "local_addr!"); - assert(blks.size() == 1); - if (blks.begin()->eq(fn)) { - bid = Pointer::mkLongBid(bid0, true); - break; - } - } - } else { - // non-local block - assert(fn.fn_name() == "blk_addr"); - bid = Pointer::mkLongBid(fn.getFnArg(0), false); - } - assert(bid.isValid()); - Pointer base(*this, bid, expr::mkUInt(0, bits_for_offset)); - expr offset = (e - base.getAddress()).sextOrTrunc(bits_for_offset); - ret.add(Pointer(*this, bid, offset).release(), cond); - } else { - processed_all = false; - } - domain.add(std::move(cond)); - } - state->addUB(std::move(domain)()); - - if (processed_all) - return std::move(ret)()->simplify(); - - return searchPointer(val.simplify()).release(); - } - - expr null = Pointer::mkNullPointer(*this).release(); - expr fn = expr::mkUF("int2ptr", { val0 }, null); - state->doesApproximation("inttoptr", fn); - return expr::mkIf(val0 == 0, null, fn); + return + Pointer::mkPhysical(*this, val.zextOrTrunc(bits_ptr_address)).release(); } expr Memory::blockValRefined(const Memory &other, unsigned bid, bool local, @@ -2599,12 +2502,15 @@ Memory::refined(const Memory &other, bool fncall, AliasSet block_alias(*this, other); auto min_read_sz = bits_byte / 8; + expr min_read_sz_expr = expr::mkUInt(min_read_sz, bits_size_t); + auto sets = { make_pair(this, set_ptrs), make_pair(&other, set_ptrs2) }; for (const auto &[mem, set] : sets) { if (set) { for (auto &it: *set_ptrs) { block_alias.unionWith(computeAliasing(Pointer(*mem, it.val.value), - min_read_sz, min_read_sz, false)); + min_read_sz_expr, min_read_sz, + false)); } } else { if (mem->next_nonlocal_bid > 0) diff --git a/ir/memory.h b/ir/memory.h index 70e1739eb..47d221bed 100644 --- a/ir/memory.h +++ b/ir/memory.h @@ -191,12 +191,13 @@ class Memory { void mkNonlocalValAxioms(bool skip_consts) const; bool mayalias(bool local, unsigned bid, const smt::expr &offset, - unsigned bytes, uint64_t align, bool write) const; + const smt::expr &bytes, uint64_t align, bool write) const; - AliasSet computeAliasing(const Pointer &ptr, unsigned bytes, uint64_t align, - bool write) const; + AliasSet computeAliasing(const Pointer &ptr, const smt::expr &bytes, + uint64_t align, bool write) const; - void access(const Pointer &ptr, unsigned btyes, uint64_t align, bool write, + void access(const Pointer &ptr, const smt::expr &bytes, uint64_t align, + bool write, const std::function &fn); @@ -353,9 +354,8 @@ class Memory { void fillPoison(const smt::expr &bid); - smt::expr ptr2int(const smt::expr &ptr) const; + smt::expr ptr2int(const smt::expr &ptr); smt::expr int2ptr(const smt::expr &val) const; - Pointer searchPointer(const smt::expr &val) const; std::tuple> refined(const Memory &other, bool fncall, diff --git a/ir/pointer.cpp b/ir/pointer.cpp index 0065d28af..c8158619d 100644 --- a/ir/pointer.cpp +++ b/ir/pointer.cpp @@ -12,6 +12,30 @@ using namespace smt; using namespace std; using namespace util; +static bool hasLogicalBit() { + return has_int2ptr; +} + +static unsigned total_bits_logical() { + return hasLogicalBit() + bits_for_ptrattrs + bits_for_bid + bits_for_offset; +} + +static unsigned total_bits_physical() { + return hasLogicalBit() * (1 + bits_for_ptrattrs + bits_ptr_address); +} + +static unsigned padding_logical() { + auto l = total_bits_logical(); + auto p = total_bits_physical(); + return l > p ? 0 : p - l; +} + +static unsigned padding_physical() { + auto l = total_bits_logical(); + auto p = total_bits_physical(); + return p > l ? 0 : l - p; +} + static expr prepend_if(const expr &pre, expr &&e, bool prepend) { return prepend ? pre.concat(e) : std::move(e); } @@ -45,7 +69,9 @@ static expr attr_to_bitvec(const ParamAttrs &attrs) { namespace IR { Pointer::Pointer(const Memory &m, const expr &bid, const expr &offset, - const expr &attr) : m(m), p(bid.concat(offset)) { + const expr &attr) : m(m), + p(prepend_if(expr::mkUInt(0, 1 + padding_logical()), + bid.concat(offset), hasLogicalBit())) { if (bits_for_ptrattrs) p = p.concat(attr); assert(!bid.isValid() || !offset.isValid() || p.bits() == totalBits()); @@ -54,8 +80,8 @@ Pointer::Pointer(const Memory &m, const expr &bid, const expr &offset, Pointer::Pointer(const Memory &m, const char *var_name, const ParamAttrs &attr) : m(m) { unsigned bits = bitsShortBid() + bits_for_offset; - p = prepend_if(expr::mkUInt(0, 1), - expr::mkVar(var_name, bits, false), hasLocalBit()); + p = expr::mkVar(var_name, bits, false) + .zext(hasLocalBit() + (1 + padding_logical()) * hasLogicalBit()); if (bits_for_ptrattrs) p = p.concat(attr_to_bitvec(attr)); assert(p.bits() == totalBits()); @@ -65,12 +91,15 @@ Pointer::Pointer(const Memory &m, expr repr) : m(m), p(std::move(repr)) { assert(!p.isValid() || p.bits() == totalBits()); } -Pointer::Pointer(const Memory &m, unsigned bid, bool local) +Pointer::Pointer(const Memory &m, unsigned bid, bool local, expr attr) : m(m), p( - prepend_if(expr::mkUInt(local, 1), - expr::mkUInt(bid, bitsShortBid()) - .concat_zeros(bits_for_offset + bits_for_ptrattrs), - hasLocalBit())) { + prepend_if(expr::mkUInt(0, 1 + padding_logical()), + prepend_if(expr::mkUInt(local, 1), + expr::mkUInt(bid, bitsShortBid()) + .concat_zeros(bits_for_offset), + hasLocalBit()), + hasLogicalBit())) { + p = attr.isValid() ? p.concat(attr) : p.concat_zeros(bits_for_ptrattrs); assert((local && bid < m.numLocals()) || (!local && bid < num_nonlocals)); assert(p.bits() == totalBits()); } @@ -79,6 +108,22 @@ Pointer::Pointer(const Memory &m, const expr &bid, const expr &offset, const ParamAttrs &attr) : Pointer(m, bid, offset, attr_to_bitvec(attr)) {} +Pointer Pointer::mkPhysical(const Memory &m, const expr &addr) { + return mkPhysical(m, addr, expr::mkUInt(0, bits_for_ptrattrs)); +} + +Pointer Pointer::mkPhysical(const Memory &m, const expr &addr, + const expr &attr) { + assert(hasLogicalBit()); + assert(addr.bits() == bits_ptr_address); + auto p = expr::mkUInt(1, 1) + .concat_zeros(padding_physical()) + .concat(addr); + if (bits_for_ptrattrs) + p = p.concat(attr); + return { m, std::move(p) }; +} + expr Pointer::mkLongBid(const expr &short_bid, bool local) { assert((local && (num_locals_src || num_locals_tgt)) || (!local && num_nonlocals)); @@ -96,19 +141,33 @@ expr Pointer::mkUndef(State &s) { force_local = !force_nonlocal && m.numNonlocals() == 0; } - unsigned var_bits = bits_for_bid + bits_for_offset - - (force_local | force_nonlocal); + unsigned bits_phy = hasLogicalBit() * bits_ptr_address; + unsigned bits_log + = bits_for_bid + bits_for_offset - (force_local | force_nonlocal); + unsigned var_bits = hasLogicalBit() + max(bits_log, bits_phy); + expr var = expr::mkFreshVar("undef", expr::mkUInt(0, var_bits)); - s.addUndefVar(expr(var)); - if (force_local || force_nonlocal) - var = mkLongBid(var, force_local); + expr ptr; + if (bits_log >= bits_phy) { + ptr = var.trunc(bits_log); + if (force_local || force_nonlocal) + ptr = mkLongBid(ptr, force_local); + if (hasLogicalBit()) + ptr = var.sign().concat(ptr); + } else { + ptr = var; + } + + ptr = ptr.concat_zeros(bits_for_ptrattrs); - return var.concat_zeros(bits_for_ptrattrs); + s.addUndefVar(std::move(var)); + assert(ptr.bits() == totalBits()); + return ptr; } unsigned Pointer::totalBits() { - return bits_for_ptrattrs + bits_for_bid + bits_for_offset; + return max(total_bits_logical(), total_bits_physical()); } unsigned Pointer::bitsShortBid() { @@ -128,13 +187,17 @@ bool Pointer::hasLocalBit() { return (num_locals_src || num_locals_tgt) && num_nonlocals; } +expr Pointer::isLogical() const { + return hasLogicalBit() ? p.sign() == 0 : true; +} + expr Pointer::isLocal(bool simplify) const { if (m.numLocals() == 0) return false; if (m.numNonlocals() == 0) return true; - auto bit = totalBits() - 1; + auto bit = bits_for_bid - 1 + bits_for_offset + bits_for_ptrattrs; expr local = p.extract(bit, bit); if (simplify) { @@ -174,12 +237,13 @@ expr Pointer::isWritableGlobal() const { } expr Pointer::getBid() const { - return p.extract(totalBits() - 1, bits_for_offset + bits_for_ptrattrs); + auto start = bits_for_offset + bits_for_ptrattrs; + return p.extract(start + bits_for_bid - 1, start); } expr Pointer::getShortBid() const { - return p.extract(totalBits() - 1 - hasLocalBit(), - bits_for_offset + bits_for_ptrattrs); + auto start = bits_for_offset + bits_for_ptrattrs; + return p.extract(start + bits_for_bid - 1 - hasLocalBit(), start); } expr Pointer::getOffset() const { @@ -242,9 +306,17 @@ expr Pointer::getBlockBaseAddress(bool simplify) const { return non_local; } +expr Pointer::getLogAddress(bool simplify) const { + return getBlockBaseAddress(simplify) + + getOffset().sextOrTrunc(bits_ptr_address); +} + expr Pointer::getAddress(bool simplify) const { - return - getBlockBaseAddress(simplify) + getOffset().sextOrTrunc(bits_ptr_address); + return mkIf_fold(isLogical(), getLogAddress(simplify), getPhysicalAddress()); +} + +expr Pointer::getPhysicalAddress() const { + return p.extract(bits_for_ptrattrs + bits_ptr_address - 1, bits_for_ptrattrs); } expr Pointer::blockSize() const { @@ -266,8 +338,12 @@ Pointer Pointer::mkPointerFromNoAttrs(const Memory &m, const expr &e) { } Pointer Pointer::operator+(const expr &bytes) const { - return { m, getBid(), getOffset() + bytes.zextOrTrunc(bits_for_offset), - getAttrs() }; + return mkIf_fold(isLogical(), + (Pointer{m, getBid(), + getOffset() + bytes.zextOrTrunc(bits_for_offset), getAttrs()}), + mkPhysical(m, getPhysicalAddress() + bytes.zextOrTrunc(bits_ptr_address), + getAttrs()) + ); } Pointer Pointer::operator+(unsigned bytes) const { @@ -275,14 +351,18 @@ Pointer Pointer::operator+(unsigned bytes) const { } void Pointer::operator+=(const expr &bytes) { - p = (*this + bytes).p; + *this = *this + bytes; } Pointer Pointer::maskOffset(const expr &mask) const { - return { m, getBid(), - ((getAddress() & mask.zextOrTrunc(bits_ptr_address)) - - getBlockBaseAddress()).zextOrTrunc(bits_for_offset), - getAttrs() }; + return mkIf_fold(isLogical(), + (Pointer{ m, getBid(), + ((getLogAddress() & mask.zextOrTrunc(bits_ptr_address)) + - getBlockBaseAddress()).zextOrTrunc(bits_for_offset), + getAttrs() }), + mkPhysical(m, getPhysicalAddress() & mask.zextOrTrunc(bits_ptr_address), + getAttrs()) + ); } expr Pointer::addNoUSOverflow(const expr &offset, bool offset_only) const { @@ -305,11 +385,28 @@ expr Pointer::operator!=(const Pointer &rhs) const { return !operator==(rhs); } -expr Pointer::isInboundsOf(const Pointer &block, const expr &bytes0) const { +expr Pointer::isOfBlock(const Pointer &block, const expr &bytes, + bool is_phy) const { + assert(block.getOffset().isZero()); + expr addr = is_phy ? getPhysicalAddress() : getAddress(); + expr block_addr = block.getLogAddress(); + expr block_size = block.blockSize(); + + if (bytes.eq(block_size)) + return addr == block_addr; + if (bytes.ugt(block_size).isTrue()) + return false; + + return addr.uge(block_addr) && + addr.ult(block_addr + block_size.zextOrTrunc(bits_ptr_address)); +} + +expr Pointer::isInboundsOf(const Pointer &block, const expr &bytes0, + bool is_phy) const { assert(block.getOffset().isZero()); expr bytes = bytes0.zextOrTrunc(bits_ptr_address); - expr addr = getAddress(); - expr block_addr = block.getAddress(); + expr addr = is_phy ? getPhysicalAddress() : getAddress(); + expr block_addr = block.getLogAddress(); expr block_size = block.blockSize().zextOrTrunc(bits_ptr_address); if (bytes.eq(block_size)) @@ -322,10 +419,6 @@ expr Pointer::isInboundsOf(const Pointer &block, const expr &bytes0) const { (addr + bytes).ule(block_addr + block_size); } -expr Pointer::isInboundsOf(const Pointer &block, unsigned bytes) const { - return isInboundsOf(block, expr::mkUInt(bytes, bits_ptr_address)); -} - expr Pointer::isInbounds(bool strict) const { auto offset = getOffsetSizet(); auto size = blockSizeOffsetT(); @@ -381,7 +474,7 @@ expr Pointer::isAligned(uint64_t align) { // FIXME: this is not sound; the function may be called only when a global // has a better alignment at run time - if (blk_align.isConst() && offset.isConst()) { + if (blk_align.isConst() && offset.isConst() && isLogical().isTrue()) { // This is stricter than checking getAddress(), but as addresses are not // observed, program shouldn't be able to distinguish this from checking // getAddress() @@ -414,6 +507,7 @@ pair Pointer::isDereferenceable(const expr &bytes0, uint64_t align, bool iswrite, bool ignore_accessability, bool round_size_to_align) { + bool is_asm = m.state->isAsmMode(); expr bytes = bytes0.zextOrTrunc(bits_size_t); if (round_size_to_align) bytes = bytes.round_up(expr::mkUInt(align, bytes)); @@ -428,29 +522,15 @@ Pointer::isDereferenceable(const expr &bytes0, uint64_t align, return ret; }; - auto is_dereferenceable = [&](Pointer &p) { + auto log_ptr = [&](Pointer &p) { expr block_sz = p.blockSizeOffsetT(); expr offset = p.getOffset(); - expr cond; - if (isUndef(offset) || isUndef(p.getBid())) { - cond = false; - } else if (m.state->isAsmMode()) { - // Pointers have full provenance in ASM mode - auto check = [&](unsigned limit, bool local) { - for (unsigned i = 0; i != limit; ++i) { - Pointer this_ptr(m, i, local); - cond |= p.isInboundsOf(this_ptr, bytes) && - block_constraints(this_ptr); - } - }; - cond = false; - check(m.numLocals(), true); - check(m.numNonlocals(), false); - - // try some constant folding - } else if (bytes.ugt(p.blockSize()).isTrue() || - p.getOffsetSizet().uge(block_sz).isTrue()) { + expr cond; + if (isUndef(offset) || + isUndef(p.getBid()) || + bytes.ugt(p.blockSize()).isTrue() || + p.getOffsetSizet().uge(block_sz).isTrue()) { cond = false; } else { // check that the offset is within bounds and that arith doesn't overflow @@ -461,21 +541,110 @@ Pointer::isDereferenceable(const expr &bytes0, uint64_t align, cond &= block_constraints(p); } - return make_pair(std::move(cond), p.isAligned(align)); + return cond; + }; + + bool observes_local = m.observed_addrs.numMayAlias(true) > 0; + + auto phy_ptr = [&](Pointer &p, bool is_phy) -> pair { + DisjointExpr bids, addrs; + expr ub = false; + bool all_same_size = true; + + auto add = [&](unsigned limit, bool local) { + for (unsigned i = 0; i != limit; ++i) { + // address not observed; can't alias with that + if (local && !m.observed_addrs.mayAlias(true, i)) + continue; + + Pointer this_ptr(m, i, local, p.getAttrs()); + + expr cond = p.isInboundsOf(this_ptr, bytes, is_phy) && + block_constraints(this_ptr); + if (cond.isFalse()) + continue; + + ub |= cond; + + cond = p.isOfBlock(this_ptr, bytes, is_phy); + + all_same_size &= bytes.eq(this_ptr.blockSize()); + + bids.add( + observes_local ? this_ptr.getBid() : this_ptr.getShortBid(), cond); + addrs.add(this_ptr.getLogAddress(), std::move(cond)); + } + }; + add(m.numLocals(), true); + add(m.numNonlocals(), false); + + expr bid = *std::move(bids)(); + if (!observes_local) + bid = mkLongBid(bid, false); + + expr addr = is_phy ? p.getPhysicalAddress() : p.getAddress(); + + return { std::move(ub), + Pointer(m, std::move(bid), + all_same_size + ? expr::mkUInt(0, bits_for_offset) + : addr - *std::move(addrs)()) }; }; DisjointExpr UB(expr(false)), is_aligned(expr(false)), all_ptrs; for (auto &[ptr_expr, domain] : DisjointExpr(p, 3)) { Pointer ptr(m, ptr_expr); - auto [ub, aligned] = is_dereferenceable(ptr); + expr is_log = ptr.isLogical(); + expr inbounds = is_asm ? ptr.isInbounds(true) : expr(true); + + optional log_expr, phy_expr; + optional new_log_ptr; + + // non-local ptrs are always escaped in ASM, so no need to add them here + if (!is_log.isFalse() && + (inbounds.isTrue() || !ptr.isLocal().isFalse())) { + log_expr = log_ptr(ptr); + new_log_ptr = ptr; + } + + bool maybe_phy = !is_log.isTrue(); + bool maybe_oob = !inbounds.isTrue(); + + if (maybe_phy || maybe_oob) { + auto [cond, phylogptr] = phy_ptr(ptr, !maybe_oob); + phy_expr = std::move(cond); + + // Local ptrs may not have escaped, so we need to consider the logical + // expr as well. If all we have are non-local, then the phy expr covers + // all cases. + new_log_ptr + = new_log_ptr && (!maybe_oob || !ptr.isLocal().isFalse()) + ? Pointer::mkIf(is_log && inbounds, *new_log_ptr, phylogptr) + : std::move(phylogptr); + } + + expr cond; + if (phy_expr && log_expr) { + if (maybe_oob) + cond = (is_log && *log_expr) || *phy_expr; + else + cond = expr::mkIf(is_log, *log_expr, *phy_expr); + } else if (phy_expr) { + cond = *std::move(phy_expr); + } else { + cond = *std::move(log_expr); + } + + expr aligned = ptr.isAligned(align); // record pointer if not definitely unfeasible - if (!ub.isFalse() && !aligned.isFalse() && !ptr.blockSize().isZero()) - all_ptrs.add(std::move(ptr).release(), domain); + if (!cond.isFalse() && !aligned.isFalse() && + (!is_log.isTrue() || !ptr.blockSize().isZero())) + all_ptrs.add((*std::move(new_log_ptr)).release(), domain); - UB.add(std::move(ub), domain); - is_aligned.add(std::move(aligned), domain); + UB.add(std::move(cond), domain); + is_aligned.add(std::move(aligned), std::move(domain)); } AndExpr exprs; @@ -546,12 +715,15 @@ expr Pointer::isHeapAllocated() const { expr Pointer::refined(const Pointer &other) const { bool is_asm = other.m.isAsmMode(); + auto [p1l, d1] = toLogical(); + auto [p2l, d2] = other.toLogical(); // This refers to a block that was malloc'ed within the function - expr local = other.isLocal(); - local &= getAllocType() == other.getAllocType(); - local &= blockSize() == other.blockSize(); - local &= getOffset() == other.getOffset(); + expr local = d2 && p2l.isLocal(); + local &= p1l.getAllocType() == p2l.getAllocType(); + local &= p1l.blockSize() == p2l.blockSize(); + local &= p1l.getOffset() == p2l.getOffset(); + local &= p1l.isBlockAlive().implies(p2l.isBlockAlive()); // Attributes are ignored at refinement. // TODO: this induces an infinite loop @@ -559,31 +731,28 @@ expr Pointer::refined(const Pointer &other) const { expr nonlocal = is_asm ? getAddress() == other.getAddress() : *this == other; - Pointer other_deref - = is_asm ? other.m.searchPointer(other.getAddress()) : other; - return expr::mkIf(isNull(), other.isNull(), - expr::mkIf(isLocal(), std::move(local), nonlocal) && - // FIXME: this should be disabled just for phy pointers - (is_asm ? expr(true) - : isBlockAlive().implies(other_deref.isBlockAlive()))); + (is_asm ? expr(true) : isLogical() == other.isLogical()) && + expr::mkIf(d1 && p1l.isLocal(), local, nonlocal)); } expr Pointer::fninputRefined(const Pointer &other, set &undef, const expr &byval_bytes) const { bool is_asm = other.m.isAsmMode(); - expr size = blockSizeOffsetT(); - expr off = getOffsetSizet(); - expr size2 = other.blockSizeOffsetT(); - expr off2 = other.getOffsetSizet(); + auto [p1l, d1] = toLogical(); + auto [p2l, d2] = other.toLogical(); + expr size = p1l.blockSizeOffsetT(); + expr off = p1l.getOffsetSizet(); + expr size2 = p2l.blockSizeOffsetT(); + expr off2 = p2l.getOffsetSizet(); // TODO: check block value for byval_bytes if (!byval_bytes.isZero()) return true; expr local - = expr::mkIf(isHeapAllocated(), - getAllocType() == other.getAllocType() && + = expr::mkIf(p1l.isHeapAllocated(), + p1l.getAllocType() == p2l.getAllocType() && off == off2 && size == size2, expr::mkIf(off.sge(0), @@ -595,7 +764,7 @@ expr Pointer::fninputRefined(const Pointer &other, set &undef, size2.uge(size)), // maintains same dereferenceability before/after off == off2 && size2.uge(size))); - local = (other.isLocal() || other.isByval()) && local; + local = d2 && (p2l.isLocal() || p2l.isByval()) && local; // TODO: this induces an infinite loop // block_refined(other); @@ -603,10 +772,8 @@ expr Pointer::fninputRefined(const Pointer &other, set &undef, expr nonlocal = is_asm ? getAddress() == other.getAddress() : *this == other; return expr::mkIf(isNull(), other.isNull(), - expr::mkIf(isLocal(), local, nonlocal) && - // FIXME: this should be disabled just for phy pointers - (is_asm ? expr(true) - : isBlockAlive().implies(other.isBlockAlive()))); + (is_asm ? expr(true) : isLogical() == other.isLogical()) && + expr::mkIf(d1 && p1l.isLocal(), local, nonlocal)); } expr Pointer::isWritable() const { @@ -699,6 +866,50 @@ bool Pointer::isBlkSingleByte() const { return blockSize().isUInt(blk_size) && blk_size == bits_byte/8; } +pair Pointer::findLogicalPointer(const expr &addr) const { + DisjointExpr ret; + + auto add = [&](unsigned limit, bool local) { + for (unsigned i = 0; i != limit; ++i) { + // address not observed; can't alias with that + if (local && !m.observed_addrs.mayAlias(true, i)) + continue; + + Pointer p(m, i, local); + expr blk_addr = p.getLogAddress(); + + ret.add(p + (addr - blk_addr), + !local && i == 0 && has_null_block + ? addr == 0 + : addr.uge(blk_addr) && + addr.ult((p + p.blockSize()).getLogAddress())); + } + }; + add(m.numLocals(), true); + add(m.numNonlocals(), false); + return { *std::move(ret)(), ret.domain() }; +} + +pair Pointer::toLogical() const { + if (isLogical().isTrue()) + return { *this, true }; + + DisjointExpr leftover; + + for (auto [e, cond] : DisjointExpr(p, 5)) { + Pointer p(m, e); + if (!p.isLogical().isTrue()) + leftover.add(std::move(e), std::move(cond)); + } + + if (leftover.empty()) + return { *this, true }; + + expr addr = Pointer(m, *std::move(leftover)()).getPhysicalAddress(); + auto [ptr, domain] = findLogicalPointer(addr); + return {Pointer::mkIf(isLogical(), *this, ptr), leftover.domain() && domain}; +} + Pointer Pointer::mkIf(const expr &cond, const Pointer &then, const Pointer &els) { assert(&then.m == &els.m); @@ -706,8 +917,10 @@ Pointer::mkIf(const expr &cond, const Pointer &then, const Pointer &els) { } ostream& operator<<(ostream &os, const Pointer &p) { + auto logical = p.isLogical(); + if (p.isNull().isTrue()) - return os << "null"; + return os << (logical.isFalse() ? "0x0" : "null"); #define P(field, fn) \ if (field.isConst()) \ @@ -715,16 +928,21 @@ ostream& operator<<(ostream &os, const Pointer &p) { else \ os << field - os << "pointer("; - if (p.isLocal().isConst()) - os << (p.isLocal().isTrue() ? "local" : "non-local"); - else - os << "local=" << p.isLocal(); - os << ", block_id="; - P(p.getShortBid(), printUnsigned); - - os << ", offset="; - P(p.getOffset(), printSigned); + if (logical.isFalse()) { + os << "phy-ptr(addr="; + P(p.getPhysicalAddress(), printUnsigned); + } else { + os << "pointer("; + if (p.isLocal().isConst()) + os << (p.isLocal().isTrue() ? "local" : "non-local"); + else + os << "local=" << p.isLocal(); + os << ", block_id="; + P(p.getShortBid(), printUnsigned); + + os << ", offset="; + P(p.getOffset(), printSigned); + } if (bits_for_ptrattrs && !p.getAttrs().isZero()) { os << ", attrs="; diff --git a/ir/pointer.h b/ir/pointer.h index 5bf8290aa..b634134fa 100644 --- a/ir/pointer.h +++ b/ir/pointer.h @@ -17,7 +17,8 @@ class Memory; class Pointer { const Memory &m; - // [bid, offset, attributes (1 bit for each)] + // [0, padding, bid, offset, attributes (1 bit for each)] -- logical pointer + // [1, padding, address, attributes] -- physical pointer // The top bit of bid is 1 if the block is local, 0 otherwise. // A local memory block is a memory block that is // allocated by an instruction during the current function call. This does @@ -40,14 +41,22 @@ class Pointer { const smt::expr &attr); Pointer(const Memory &m, const char *var_name, const ParamAttrs &attr); Pointer(const Memory &m, smt::expr p); - Pointer(const Memory &m, unsigned bid, bool local); + Pointer(const Memory &m, unsigned bid, bool local, smt::expr attr = {}); Pointer(const Memory &m, const smt::expr &bid, const smt::expr &offset, const ParamAttrs &attr = {}); + static Pointer mkPhysical(const Memory &m, const smt::expr &addr); + static Pointer mkPhysical(const Memory &m, const smt::expr &addr, + const smt::expr &attr); + Pointer(const Pointer &other) noexcept = default; Pointer(Pointer &&other) noexcept = default; void operator=(Pointer &&rhs) noexcept { p = std::move(rhs.p); } + // returns (log-ptr, domain of inboundness) + std::pair findLogicalPointer(const smt::expr &addr) const; + std::pair toLogical() const; + static smt::expr mkLongBid(const smt::expr &short_bid, bool local); static smt::expr mkUndef(State &s); @@ -57,10 +66,15 @@ class Pointer { static unsigned zeroBitsShortOffset(); static bool hasLocalBit(); + smt::expr isLogical() const; + smt::expr isLocal(bool simplify = true) const; smt::expr isConstGlobal() const; smt::expr isWritableGlobal() const; + // these assume the pointer is logical + smt::expr getLogAddress(bool simplify = true) const; + smt::expr getBid() const; smt::expr getShortBid() const; // same as getBid but ignoring is_local bit smt::expr getOffset() const; @@ -69,6 +83,7 @@ class Pointer { smt::expr getAttrs() const; smt::expr getBlockBaseAddress(bool simplify = true) const; smt::expr getAddress(bool simplify = true) const; + smt::expr getPhysicalAddress() const; smt::expr blockSize() const; smt::expr blockSizeOffsetT() const; // to compare with offsets @@ -92,8 +107,10 @@ class Pointer { smt::expr operator==(const Pointer &rhs) const; smt::expr operator!=(const Pointer &rhs) const; - smt::expr isInboundsOf(const Pointer &block, const smt::expr &bytes) const; - smt::expr isInboundsOf(const Pointer &block, unsigned bytes) const; + smt::expr isOfBlock(const Pointer &block, const smt::expr &bytes, + bool is_phy) const; + smt::expr isInboundsOf(const Pointer &block, const smt::expr &bytes, + bool is_phy) const; smt::expr isInbounds(bool strict) const; smt::expr inbounds(bool simplify_ptr = false); diff --git a/smt/exprs.cpp b/smt/exprs.cpp index fb2cef1f4..2c0a4d48c 100644 --- a/smt/exprs.cpp +++ b/smt/exprs.cpp @@ -66,6 +66,11 @@ ostream &operator<<(ostream &os, const AndExpr &e) { } +void OrExpr::add(const expr &e) { + if (!e.isFalse()) + exprs.emplace(e); +} + void OrExpr::add(expr &&e) { if (!e.isFalse()) exprs.insert(std::move(e)); diff --git a/smt/exprs.h b/smt/exprs.h index 5ac924d00..546cd2dc3 100644 --- a/smt/exprs.h +++ b/smt/exprs.h @@ -42,6 +42,7 @@ class OrExpr { std::set exprs; public: + void add(const expr &e); void add(expr &&e); void add(const OrExpr &other); expr operator()() const; @@ -120,6 +121,14 @@ class DisjointExpr { std::optional operator()() && { return std::move(*this).mk({}); } + expr domain() const { + OrExpr ret; + for (auto &[val, domain] : vals) { + ret.add(domain); + } + return std::move(ret)(); + } + std::optional lookup(const expr &domain) const { for (auto &[v, d] : vals) { if (d.eq(domain)) @@ -131,6 +140,7 @@ class DisjointExpr { auto begin() const { return vals.begin(); } auto end() const { return vals.end(); } auto size() const { return vals.size(); } + bool empty() const { return vals.empty() && !default_val; } }; diff --git a/tests/alive-tv/asm/gep_mem.srctgt.ll b/tests/alive-tv/asm/gep_mem.srctgt.ll new file mode 100644 index 000000000..41cb3d5ff --- /dev/null +++ b/tests/alive-tv/asm/gep_mem.srctgt.ll @@ -0,0 +1,16 @@ +; TEST-ARGS: -tgt-is-asm +@G = external global ptr, align 8 + +define void @src(ptr %0) { + %LGV = load ptr, ptr %0, align 8 + %G1 = getelementptr i8, ptr %LGV, i64 8 + store ptr %G1, ptr @G, align 8 + ret void +} + +define void @tgt(ptr %0) { + %a3_1 = load i64, ptr %0, align 8 + %a6_1 = add i64 %a3_1, 8 + store i64 %a6_1, ptr @G, align 8 + ret void +} diff --git a/tests/alive-tv/int2ptr/gep_cmp.srctgt.ll b/tests/alive-tv/int2ptr/gep_cmp.srctgt.ll new file mode 100644 index 000000000..c948f3327 --- /dev/null +++ b/tests/alive-tv/int2ptr/gep_cmp.srctgt.ll @@ -0,0 +1,11 @@ +define i1 @src(ptr %0) { + %2 = getelementptr i8, ptr %0, i64 1 + %3 = icmp ult ptr %2, %0 + ret i1 %3 +} + +define i1 @tgt(ptr %0) { + %ptr = inttoptr i64 -1 to ptr + %a4_5.not = icmp eq ptr %0, %ptr + ret i1 %a4_5.not +} diff --git a/tests/alive-tv/int2ptr/intro.srctgt.ll b/tests/alive-tv/int2ptr/intro.srctgt.ll new file mode 100644 index 000000000..4d0086a74 --- /dev/null +++ b/tests/alive-tv/int2ptr/intro.srctgt.ll @@ -0,0 +1,15 @@ +; ERROR: Value mismatch + +define ptr @src(ptr %0, i64 %1) { + %3 = icmp eq i64 %1, 0 + %4 = select i1 %3, ptr null, ptr %0 + ret ptr %4 +} + +define ptr @tgt(ptr %0, i64 %1) { + %3 = ptrtoint ptr %0 to i64 + %a2_5 = icmp eq i64 %1, 0 + %a3_6 = select i1 %a2_5, i64 0, i64 %3 + %4 = inttoptr i64 %a3_6 to ptr + ret ptr %4 +} diff --git a/tests/alive-tv/int2ptr.srctgt.ll b/tests/alive-tv/int2ptr/removal.srctgt.ll similarity index 100% rename from tests/alive-tv/int2ptr.srctgt.ll rename to tests/alive-tv/int2ptr/removal.srctgt.ll diff --git a/tools/transform.cpp b/tools/transform.cpp index b6c44a43a..80f8fe648 100644 --- a/tools/transform.cpp +++ b/tools/transform.cpp @@ -196,7 +196,8 @@ static bool error(Errors &errs, State &src_state, State &tgt_state, // this *may* be a pointer if (bw == Pointer::totalBits()) { Pointer p(src_state.returnMemory(), var); - reduce(p.getOffset()); + if (try_reduce(p.isLogical())) + reduce(p.getOffset()); reduce(p.getAddress()); } } @@ -1003,7 +1004,7 @@ static void calculateAndInitConstants(Transform &t) { uint64_t min_global_size = UINT64_MAX; bool has_null_pointer = false; - bool has_int2ptr = false; + has_int2ptr = false; bool has_ptr2int = false; has_alloca = false; has_fncall = false; @@ -1250,6 +1251,10 @@ static void calculateAndInitConstants(Transform &t) { bits_for_offset = min(bits_for_offset, config::max_offset_bits); bits_for_offset = min(bits_for_offset, bits_program_pointer); + // we may have an implicit ptr2int through memory. Ensure we have enough bits + if (has_ptr_load && does_int_mem_access && t.tgt.has(FnAttrs::Asm)) + bits_for_offset = bits_program_pointer; + // ASSUMPTION: programs can only allocate up to half of address space // so the first bit of size is always zero. // We need this assumption to support negative offsets. @@ -1271,7 +1276,7 @@ static void calculateAndInitConstants(Transform &t) { bits_ptr_address) + has_local_bit, bits_program_pointer); - if (config::tgt_is_asm) + if (t.tgt.has(FnAttrs::Asm)) bits_ptr_address = bits_program_pointer; // TODO: this is only needed if some program pointer is observable