Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef A64_ASSEMBLER_A64_H_
#define A64_ASSEMBLER_A64_H_
#include <iterator>
#include "jit/arm64/vixl/Assembler-vixl.h"
#include "jit/CompactBuffer.h"
#include "jit/shared/Disassembler-shared.h"
#include "wasm/WasmTypeDecls.h"
namespace js {
namespace jit {
// VIXL imports.
typedef vixl::Register ARMRegister;
typedef vixl::FPRegister ARMFPRegister;
using vixl::ARMBuffer;
using vixl::Instruction;
using LabelDoc = DisassemblerSpew::LabelDoc;
using LiteralDoc = DisassemblerSpew::LiteralDoc;
static const uint32_t AlignmentAtPrologue = 0;
static const uint32_t AlignmentMidPrologue = 8;
static const Scale ScalePointer = TimesEight;
// The MacroAssembler uses scratch registers extensively and unexpectedly.
// For safety, scratch registers should always be acquired using
// vixl::UseScratchRegisterScope.
static constexpr Register ScratchReg{Registers::ip0};
static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64};
static constexpr Register ScratchReg2{Registers::ip1};
static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64};
static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
FloatRegisters::Double};
static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31,
FloatRegisters::Double};
struct ScratchDoubleScope : public AutoFloatRegisterScope {
explicit ScratchDoubleScope(MacroAssembler& masm)
: AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
};
static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0,
FloatRegisters::Single};
static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31,
FloatRegisters::Single};
struct ScratchFloat32Scope : public AutoFloatRegisterScope {
explicit ScratchFloat32Scope(MacroAssembler& masm)
: AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
};
#ifdef ENABLE_WASM_SIMD
static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0,
FloatRegisters::Simd128};
static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31,
FloatRegisters::Simd128};
struct ScratchSimd128Scope : public AutoFloatRegisterScope {
explicit ScratchSimd128Scope(MacroAssembler& masm)
: AutoFloatRegisterScope(masm, ScratchSimd128Reg) {}
};
#else
struct ScratchSimd128Scope : public AutoFloatRegisterScope {
explicit ScratchSimd128Scope(MacroAssembler& masm)
: AutoFloatRegisterScope(masm, ScratchDoubleReg_) {
MOZ_CRASH("SIMD not enabled");
}
};
#endif
static constexpr Register InvalidReg{Registers::Invalid};
static constexpr FloatRegister InvalidFloatReg = {};
static constexpr Register OsrFrameReg{Registers::x3};
static constexpr Register CallTempReg0{Registers::x9};
static constexpr Register CallTempReg1{Registers::x10};
static constexpr Register CallTempReg2{Registers::x11};
static constexpr Register CallTempReg3{Registers::x12};
static constexpr Register CallTempReg4{Registers::x13};
static constexpr Register CallTempReg5{Registers::x14};
static constexpr Register PreBarrierReg{Registers::x1};
static constexpr Register InterpreterPCReg{Registers::x9};
static constexpr Register ReturnReg{Registers::x0};
static constexpr Register64 ReturnReg64(ReturnReg);
static constexpr Register JSReturnReg{Registers::x2};
static constexpr Register FramePointer{Registers::fp};
static constexpr ARMRegister FramePointer64{FramePointer, 64};
static constexpr Register ZeroRegister{Registers::sp};
static constexpr ARMRegister ZeroRegister64{Registers::sp, 64};
static constexpr ARMRegister ZeroRegister32{Registers::sp, 32};
// [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions
//
// ================
//
// Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer:
//
// The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it
// is used as the base pointer for a memory access. (SP+offset need not be
// 16-byte aligned, but the SP value itself must be.) The SP register may
// take on unaligned values but may not be used for a memory access while it
// is unaligned.
//
// Stack-alignment checking can be enabled or disabled by a control register;
// however that register cannot be modified by user space. We have to assume
// stack alignment checking is enabled, and that does usually appear to be the
// case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment
// checking", for further details.
//
// A second constraint is forced upon us by the ARM64 ABI. This requires that
// all accesses to the stack must be at or above SP. Accesses below SP are
// strictly forbidden, presumably because the kernel might use that area of
// memory for its own purposes -- in particular, signal delivery -- and hence
// it may get trashed at any time.
//
// Note this doesn't mean that accesses to the stack must be based off
// register SP. Only that the effective addresses must be >= SP, regardless
// of how the address is formed.
//
// In order to allow word-wise pushes and pops, some of our ARM64 jits
// (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to
// be used as a PseudoStackPointer (PSP).
//
// Initially the PSP will have the same value as the SP. Code can, if it
// wants, push a single word by subtracting 8 from the PSP, doing SP := PSP,
// then storing the value at PSP+0. Given other constraints on the alignment
// of the SP at function call boundaries, this works out OK, at the cost of
// the two extra instructions per push / pop.
//
// This is all a bit messy, and is probably not robustly adhered to. However,
// the following appear to be the intended, and mostly implemented, current
// invariants:
//
// (1) PSP is "primary", SP is "secondary". Most stack refs are
// PSP-relative. SP-relative is rare and (obviously) only done when we
// know that SP is aligned.
//
// (2) At all times, the relationship SP <= PSP is maintained. The fact that
// SP may validly be less than PSP means that pushes on the stack force
// the two values to become equal, by copying PSP into SP. However, pops
// behave differently: PSP moves back up and SP stays the same, since that
// doesn't break the SP <= PSP invariant.
//
// (3) However, immediately before a call instruction, SP and PSP must be the
// same. To enforce this, PSP is copied into SP by the arm64-specific
// MacroAssembler::call routines.
//
// (4) Also, after a function has returned, it is expected that SP holds the
// "primary" value. How exactly this is implemented remains not entirely
// clear and merits further investigation. The following points are
// believed to be relevant:
//
// - For calls to functions observing the system AArch64 ABI, PSP (x28) is
// callee-saved. That, combined with (3) above, implies SP == PSP
// immediately after the call returns.
//
// - JIT-generated routines return using MacroAssemblerCompat::retn, and
// that copies PSP into SP (bizarrely; this would make more sense if it
// copied SP into PSP); but in any case, the point is that they are the
// same at the point that the return instruction executes.
//
// - MacroAssembler::callWithABIPost copies PSP into SP after the return
// of a call requiring dynamic alignment.
//
// Given the above, it is unclear exactly where in the return sequence it
// is expected that SP == PSP, and also whether it is the callee or caller
// that is expected to enforce it.
//
// In general it would be nice to be able to move (at some time in the future,
// not now) to a world where *every* assignment to PSP or SP is followed
// immediately by a copy into the other register. That would make all
// required correctness proofs trivial in the sense that it would require only
// local inspection of code immediately following (dominated by) any such
// assignment. For the moment, however, this is a guideline, not a hard
// requirement.
//
// ================
//
// Mechanics of keeping the stack pointers in sync:
//
// The following two methods require that the masm's SP has been set to the PSP
// with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be
// no-ops. The setup is performed manually by the jits after creating the masm.
//
// * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has
// been updated, so SP needs to move too. This is used pretty liberally
// throughout the code base.
//
// * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used
// after calls to non-ABI compliant code; it's not used much.
//
// In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that
// recognizes the instruction emitted by syncStackPtr(), and this is used to
// skip that instruction a few places, should it be present, in the JS JIT where
// code is generated to deal with toggled calls.
//
// In various places there are calls to MacroAssembler::syncStackPtr() which
// appear to be redundant. Investigation shows that they often are redundant,
// but not always. Finding and removing such redundancies would be quite some
// work, so we live for now with the occasional redundant update. Perusal of
// the Cortex-A55 and -A72 optimization guides shows no evidence that such
// assignments are any more expensive than assignments between vanilla integer
// registers, so the costs of such redundant updates are assumed to be small.
//
// Invariants on the PSP at function call boundaries:
//
// It *appears* that the following invariants exist:
//
// * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via
// both registers.
//
// * On entry to C++ code, PSP == SP. Certainly it appears that all calls
// created by the MacroAssembler::call(..) routines perform 'syncStackPtr'
// immediately before the call, and all ABI calls are routed through the
// MacroAssembler::call layer.
//
// * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the
// active stack pointer and that PSP is dead.
//
// * The PSP is non-volatile (callee-saved). Along a normal return path from
// JIT code, simply having PSP == SP on exit is correct, since the exit SP is
// the same as the entry SP by the JIT ABI.
//
// * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be
// used), and will not need to restore the PSP on return because x28 is
// non-volatile in the ARM64 ABI.
//
// ================
//
// Future cleanups to the SP-vs-PSP machinery:
//
// Currently we have somewhat unclear invariants, which are not obviously
// always enforced, and which may require complex non-local reasoning.
// Auditing the code to ensure that the invariants always hold, whilst not
// generating duplicate syncs, is close to impossible. A future rework to
// tidy this might be as follows. (This suggestion pertains the the entire
// JIT complex: all of the JS compilers, wasm compilers, stub generators,
// regexp compilers, etc).
//
// Currently we have that, in JIT-generated code, PSP is "primary" and SP is
// "secondary", meaning that PSP has the "real" stack pointer value and SP is
// updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP.
// An exception to this scheme is the stubs code generated by WasmStubs.cpp,
// which assumes that SP is "primary" and PSP is dead.
//
// It might give us an easier incremental path to eventually removing PSP
// entirely if we switched to having SP always be the primary. That is:
//
// (1) SP is primary, PSP is secondary
// (2) After any assignment to SP, it is copied into PSP
// (3) All (non-frame-pointer-based) stack accesses are PSP-relative
// (as at present)
//
// This would have the effect that:
//
// * It would reinstate the invariant that on all targets, the "real" SP value
// is in the ABI-and-or-hardware-mandated stack pointer register.
//
// * It would give us a simple story about calls and returns:
// - for calls to non-JIT generated code (viz, C++ etc), we need no extra
// copies, because PSP (x28) is callee-saved
// - for calls to JIT-generated code, we need no extra copies, because of (2)
// above
//
// * We could incrementally migrate those parts of the code generator where we
// know that SP is 16-aligned, to use SP- rather than PSP-relative accesses
//
// * The consistent use of (2) would remove the requirement to have to perform
// path-dependent reasoning (for paths in the generated code, not in the
// compiler) when reading/understanding the code.
//
// * x28 would become free for use by stubs and the baseline compiler without
// having to worry about interoperating with code that expects x28 to hold a
// valid PSP.
//
// One might ask what mechanical checks we can add to ensure correctness, rather
// than having to verify these invariants by hand indefinitely. Maybe some
// combination of:
//
// * In debug builds, compiling-in assert(SP == PSP) at critical places. This
// can be done using the existing `assertStackPtrsSynced` function.
//
// * In debug builds, scanning sections of generated code to ensure no
// SP-relative stack accesses have been created -- for some sections, at
// least every assignment to SP is immediately followed by a copy to x28.
// This would also facilitate detection of duplicate syncs.
//
// ================
//
// Other investigative notes, for the code base at present:
//
// * Some disassembly dumps suggest that we sync the stack pointer too often.
// This could be the result of various pieces of code working at cross
// purposes when syncing the stack pointer, or of not paying attention to the
// precise invariants.
//
// * As documented in RegExpNativeMacroAssembler.cpp, function
// SMRegExpMacroAssembler::createStackFrame:
//
// // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
// // addressing. The register we use for PSP may however also be used by
// // calling code, and it is nonvolatile, so save it. Do this as a special
// // case first because the generic save/restore code needs the PSP to be
// // initialized already.
//
// and also in function SMRegExpMacroAssembler::exitHandler:
//
// // Restore the saved value of the PSP register, this value is whatever the
// // caller had saved in it, not any actual SP value, and it must not be
// // overwritten subsequently.
//
// The original source for these comments was a patch for bug 1445907.
//
// * MacroAssembler-arm64.h has an interesting comment in the retn()
// function:
//
// syncStackPtr(); // SP is always used to transmit the stack between calls.
//
// Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp,
// at callWithABIPre and callWithABIPost.
//
// * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find
//
// // SP is used to transfer stack across call boundaries.
// masm.initPseudoStackPtr();
//
// after the return point of a callWithVMWrapper. The only reasonable
// conclusion from all those (assuming they are right) is that SP == PSP.
//
// * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP
// and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP
// before a call to wasm code.
//
// ================
// StackPointer is intentionally undefined on ARM64 to prevent misuse: using
// sp as a base register is only valid if sp % 16 == 0.
static constexpr Register RealStackPointer{Registers::sp};
static constexpr Register PseudoStackPointer{Registers::x28};
static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64};
static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32};
static constexpr Register IntArgReg0{Registers::x0};
static constexpr Register IntArgReg1{Registers::x1};
static constexpr Register IntArgReg2{Registers::x2};
static constexpr Register IntArgReg3{Registers::x3};
static constexpr Register IntArgReg4{Registers::x4};
static constexpr Register IntArgReg5{Registers::x5};
static constexpr Register IntArgReg6{Registers::x6};
static constexpr Register IntArgReg7{Registers::x7};
static constexpr Register HeapReg{Registers::x21};
// Define unsized Registers.
#define DEFINE_UNSIZED_REGISTERS(N) \
static constexpr Register r##N{Registers::x##N};
REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS)
#undef DEFINE_UNSIZED_REGISTERS
static constexpr Register ip0{Registers::x16};
static constexpr Register ip1{Registers::x17};
static constexpr Register fp{Registers::x29};
static constexpr Register lr{Registers::x30};
static constexpr Register rzr{Registers::xzr};
// Import VIXL registers into the js::jit namespace.
#define IMPORT_VIXL_REGISTERS(N) \
static constexpr ARMRegister w##N = vixl::w##N; \
static constexpr ARMRegister x##N = vixl::x##N;
REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS)
#undef IMPORT_VIXL_REGISTERS
static constexpr ARMRegister wzr = vixl::wzr;
static constexpr ARMRegister xzr = vixl::xzr;
static constexpr ARMRegister wsp = vixl::wsp;
static constexpr ARMRegister sp = vixl::sp;
// Import VIXL VRegisters into the js::jit namespace.
#define IMPORT_VIXL_VREGISTERS(N) \
static constexpr ARMFPRegister s##N = vixl::s##N; \
static constexpr ARMFPRegister d##N = vixl::d##N;
REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS)
#undef IMPORT_VIXL_VREGISTERS
static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg);
// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
// JSReturnOperand).
static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
static constexpr Register RegExpMatcherStringReg = CallTempReg1;
static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
// Registers used by RegExpExecTest stub (do not use ReturnReg).
static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
static constexpr Register RegExpExecTestStringReg = CallTempReg1;
// Registers used by RegExpSearcher stub (do not use ReturnReg).
static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
static constexpr Register RegExpSearcherStringReg = CallTempReg1;
static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
static constexpr Register JSReturnReg_Type = r3;
static constexpr Register JSReturnReg_Data = r2;
static constexpr FloatRegister NANReg = {FloatRegisters::d14,
FloatRegisters::Single};
// N.B. r8 isn't listed as an aapcs temp register, but we can use it as such
// because we never use return-structs.
static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11,
r12, r13, r14, r15};
static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
static constexpr uint32_t JitStackAlignment = 16;
static constexpr uint32_t JitStackValueAlignment =
JitStackAlignment / sizeof(Value);
static_assert(JitStackAlignment % sizeof(Value) == 0 &&
JitStackValueAlignment >= 1,
"Stack alignment should be a non-zero multiple of sizeof(Value)");
static constexpr uint32_t SimdMemoryAlignment = 16;
static_assert(CodeAlignment % SimdMemoryAlignment == 0,
"Code alignment should be larger than any of the alignments "
"which are used for "
"the constant sections of the code buffer. Thus it should be "
"larger than the "
"alignment for SIMD constants.");
static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
static const uint32_t WasmTrapInstructionLength = 4;
// See comments in wasm::GenerateFunctionPrologue. The difference between these
// is the size of the largest callable prologue on the platform.
static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
class Assembler : public vixl::Assembler {
public:
Assembler() : vixl::Assembler() {}
typedef vixl::Condition Condition;
void finish();
bool appendRawCode(const uint8_t* code, size_t numBytes);
bool reserve(size_t size);
bool swapBuffer(wasm::Bytes& bytes);
// Emit the jump table, returning the BufferOffset to the first entry in the
// table.
BufferOffset emitExtendedJumpTable();
BufferOffset ExtendedJumpTable_;
void executableCopy(uint8_t* buffer);
BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op,
const LiteralDoc& doc,
ARMBuffer::PoolEntry* pe = nullptr);
BufferOffset immPool64(ARMRegister dest, uint64_t value,
ARMBuffer::PoolEntry* pe = nullptr);
BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value,
vixl::LoadLiteralOp op, const LiteralDoc& doc);
BufferOffset fImmPool64(ARMFPRegister dest, double value);
BufferOffset fImmPool32(ARMFPRegister dest, float value);
uint32_t currentOffset() const { return nextOffset().getOffset(); }
void bind(Label* label) { bind(label, nextOffset()); }
void bind(Label* label, BufferOffset boff);
void bind(CodeLabel* label) { label->target()->bind(currentOffset()); }
void setUnlimitedBuffer() { armbuffer_.setUnlimited(); }
bool oom() const {
return AssemblerShared::oom() || armbuffer_.oom() ||
jumpRelocations_.oom() || dataRelocations_.oom();
}
void copyJumpRelocationTable(uint8_t* dest) const {
if (jumpRelocations_.length()) {
memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
}
}
void copyDataRelocationTable(uint8_t* dest) const {
if (dataRelocations_.length()) {
memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
}
}
size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); }
size_t dataRelocationTableBytes() const { return dataRelocations_.length(); }
size_t bytesNeeded() const {
return SizeOfCodeGenerated() + jumpRelocationTableBytes() +
dataRelocationTableBytes();
}
void processCodeLabels(uint8_t* rawCode) {
for (const CodeLabel& label : codeLabels_) {
Bind(rawCode, label);
}
}
static void UpdateLoad64Value(Instruction* inst0, uint64_t value);
static void Bind(uint8_t* rawCode, const CodeLabel& label) {
auto mode = label.linkMode();
size_t patchAtOffset = label.patchAt().offset();
size_t targetOffset = label.target().offset();
if (mode == CodeLabel::MoveImmediate) {
Instruction* inst = (Instruction*)(rawCode + patchAtOffset);
Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset));
} else {
*reinterpret_cast<const void**>(rawCode + patchAtOffset) =
rawCode + targetOffset;
}
}
void retarget(Label* cur, Label* next);
// The buffer is about to be linked. Ensure any constant pools or
// excess bookkeeping has been flushed to the instruction stream.
void flush() { armbuffer_.flushPool(); }
void comment(const char* msg) {
#ifdef JS_DISASM_ARM64
spew_.spew("; %s", msg);
#endif
}
void setPrinter(Sprinter* sp) {
#ifdef JS_DISASM_ARM64
spew_.setPrinter(sp);
#endif
}
static bool SupportsFloatingPoint() { return true; }
static bool SupportsUnalignedAccesses() { return true; }
static bool SupportsFastUnalignedFPAccesses() { return true; }
static bool SupportsWasmSimd() { return true; }
static bool HasRoundInstruction(RoundingMode mode) {
switch (mode) {
case RoundingMode::Up:
case RoundingMode::Down:
case RoundingMode::NearestTiesToEven:
case RoundingMode::TowardsZero:
return true;
}
MOZ_CRASH("unexpected mode");
}
protected:
// Add a jump whose target is unknown until finalization.
// The jump may not be patched at runtime.
void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind);
public:
static uint32_t PatchWrite_NearCallSize() { return 4; }
static uint32_t NopSize() { return 4; }
static void PatchWrite_NearCall(CodeLocationLabel start,
CodeLocationLabel toCall);
static void PatchDataWithValueCheck(CodeLocationLabel label,
PatchedImmPtr newValue,
PatchedImmPtr expected);
static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
ImmPtr expected);
static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
// Raw is going to be the return address.
uint32_t* raw = (uint32_t*)label.raw();
// Overwrite the 4 bytes before the return address, which will end up being
// the call instruction.
*(raw - 1) = imm.value;
}
static uint32_t AlignDoubleArg(uint32_t offset) {
MOZ_CRASH("AlignDoubleArg()");
}
static uintptr_t GetPointer(uint8_t* ptr) {
Instruction* i = reinterpret_cast<Instruction*>(ptr);
uint64_t ret = i->Literal64();
return ret;
}
// Toggle a jmp or cmp emitted by toggledJump().
static void ToggleToJmp(CodeLocationLabel inst_);
static void ToggleToCmp(CodeLocationLabel inst_);
static void ToggleCall(CodeLocationLabel inst_, bool enabled);
static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
CompactBufferReader& reader);
static void TraceDataRelocations(JSTracer* trc, JitCode* code,
CompactBufferReader& reader);
void assertNoGCThings() const {
#ifdef DEBUG
MOZ_ASSERT(dataRelocations_.length() == 0);
for (auto& j : pendingJumps_) {
MOZ_ASSERT(j.kind == RelocationKind::HARDCODED);
}
#endif
}
public:
// A Jump table entry is 2 instructions, with 8 bytes of raw data
static const size_t SizeOfJumpTableEntry = 16;
struct JumpTableEntry {
uint32_t ldr;
uint32_t br;
void* data;
Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); }
};
// Offset of the patchable target for the given entry.
static const size_t OffsetOfJumpTableEntryPointer = 8;
public:
void writeCodePointer(CodeLabel* label) {
armbuffer_.assertNoPoolAndNoNops();
uintptr_t x = uintptr_t(-1);
BufferOffset off = EmitData(&x, sizeof(uintptr_t));
label->patchAt()->bind(off.getOffset());
}
void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
const Disassembler::HeapAccess& heapAccess) {
MOZ_CRASH("verifyHeapAccessDisassembly");
}
protected:
// Structure for fixing up pc-relative loads/jumps when the machine
// code gets moved (executable copy, gc, etc.).
struct RelativePatch {
BufferOffset offset;
void* target;
RelocationKind kind;
RelativePatch(BufferOffset offset, void* target, RelocationKind kind)
: offset(offset), target(target), kind(kind) {}
};
// List of jumps for which the target is either unknown until finalization,
// or cannot be known due to GC. Each entry here requires a unique entry
// in the extended jump table, and is patched at finalization.
js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_;
// Final output formatters.
CompactBufferWriter jumpRelocations_;
CompactBufferWriter dataRelocations_;
};
static const uint32_t NumIntArgRegs = 8;
static const uint32_t NumFloatArgRegs = 8;
class ABIArgGenerator {
public:
ABIArgGenerator()
: intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {}
ABIArg next(MIRType argType);
ABIArg& current() { return current_; }
uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; }
protected:
unsigned intRegIndex_;
unsigned floatRegIndex_;
uint32_t stackOffset_;
ABIArg current_;
};
// These registers may be volatile or nonvolatile.
static constexpr Register ABINonArgReg0 = r8;
static constexpr Register ABINonArgReg1 = r9;
static constexpr Register ABINonArgReg2 = r10;
static constexpr Register ABINonArgReg3 = r11;
// This register may be volatile or nonvolatile. Avoid d31 which is the
// ScratchDoubleReg_.
static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16,
FloatRegisters::Single};
// These registers may be volatile or nonvolatile.
// Note: these three registers are all guaranteed to be different
static constexpr Register ABINonArgReturnReg0 = r8;
static constexpr Register ABINonArgReturnReg1 = r9;
static constexpr Register ABINonVolatileReg{Registers::x19};
// This register is guaranteed to be clobberable during the prologue and
// epilogue of an ABI call which must preserve both ABI argument, return
// and non-volatile registers.
static constexpr Register ABINonArgReturnVolatileReg = lr;
// Instance pointer argument register for WebAssembly functions. This must not
// alias any other register used for passing function arguments or return
// values. Preserved by WebAssembly functions. Must be nonvolatile.
static constexpr Register InstanceReg{Registers::x23};
// Registers used for wasm table calls. These registers must be disjoint
// from the ABI argument registers, InstanceReg and each other.
static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
// Registers used for ref calls.
static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
static constexpr Register WasmCallRefReg = ABINonArgReg3;
// Registers used for wasm tail calls operations.
static constexpr Register WasmTailCallInstanceScratchReg = ABINonArgReg1;
static constexpr Register WasmTailCallRAScratchReg = lr;
static constexpr Register WasmTailCallFPScratchReg = ABINonArgReg3;
// Register used as a scratch along the return path in the fast js -> wasm stub
// code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
// It must be a volatile register.
static constexpr Register WasmJitEntryReturnScratch = r9;
static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
Register* out) {
if (usedIntArgs >= NumIntArgRegs) {
return false;
}
*out = Register::FromCode(usedIntArgs);
return true;
}
static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
FloatRegister* out) {
if (usedFloatArgs >= NumFloatArgRegs) {
return false;
}
*out = FloatRegister::FromCode(usedFloatArgs);
return true;
}
// Get a register in which we plan to put a quantity that will be used as an
// integer argument. This differs from GetIntArgReg in that if we have no more
// actual argument registers to use we will fall back on using whatever
// CallTempReg* don't overlap the argument registers, and only fail once those
// run out too.
static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
uint32_t usedFloatArgs, Register* out) {
if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
return true;
}
// Unfortunately, we have to assume things about the point at which
// GetIntArgReg returns false, because we need to know how many registers it
// can allocate.
usedIntArgs -= NumIntArgRegs;
if (usedIntArgs >= NumCallTempNonArgRegs) {
return false;
}
*out = CallTempNonArgRegs[usedIntArgs];
return true;
}
inline Imm32 Imm64::firstHalf() const { return low(); }
inline Imm32 Imm64::secondHalf() const { return hi(); }
// Forbids nop filling for testing purposes. Nestable, but nested calls have
// no effect on the no-nops status; it is only the top level one that counts.
class AutoForbidNops {
protected:
Assembler* asm_;
public:
explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); }
~AutoForbidNops() { asm_->leaveNoNops(); }
};
// Forbids pool generation during a specified interval. Nestable, but nested
// calls must imply a no-pool area of the assembler buffer that is completely
// contained within the area implied by the outermost level call.
class AutoForbidPoolsAndNops : public AutoForbidNops {
public:
AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst)
: AutoForbidNops(asm_) {
asm_->enterNoPool(maxInst);
}
~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); }
};
} // namespace jit
} // namespace js
#endif // A64_ASSEMBLER_A64_H_