diff options
Diffstat (limited to 'sys/contrib/ia64/libuwx/src/uwx_self_context.s')
-rw-r--r-- | sys/contrib/ia64/libuwx/src/uwx_self_context.s | 273 |
1 files changed, 243 insertions, 30 deletions
diff --git a/sys/contrib/ia64/libuwx/src/uwx_self_context.s b/sys/contrib/ia64/libuwx/src/uwx_self_context.s index e373c4a893b3..e2986eb4139b 100644 --- a/sys/contrib/ia64/libuwx/src/uwx_self_context.s +++ b/sys/contrib/ia64/libuwx/src/uwx_self_context.s @@ -1,4 +1,4 @@ -// Copyright (c) 2003 Hewlett-Packard Development Company, L.P. +// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without @@ -47,23 +47,40 @@ rTMP2 = r29 rTMP3 = r30 rTMP4 = r31 rTMP5 = r8 +rMYPFS = r9 +rPSP = r10 -VALID_IP = 1 -VALID_SP = 1 << 1 -VALID_BSP = 1 << 2 -VALID_CFM = 1 << 3 -VALID_PREDS = 1 << 7 -VALID_RNAT = 1 << 10 -VALID_UNAT = 1 << 11 -VALID_FPSR = 1 << 12 -VALID_LC = 1 << 13 -VALID_GRS = 0xf << 16 -VALID_BRS = 0x1f << 20 -VALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM -VALID_SPEC = VALID_PREDS | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC -VALID_BITS = (VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS) << 32 +VALID_IP = 1 +VALID_SP = 1 << 1 +VALID_BSP = 1 << 2 +VALID_CFM = 1 << 3 +VALID_PREDS = 1 << 7 +VALID_PRIUNAT = 1 << 8 +VALID_RNAT = 1 << 10 +VALID_UNAT = 1 << 11 +VALID_FPSR = 1 << 12 +VALID_LC = 1 << 13 +VALID_GRS = 0xf << 16 +VALID_BRS = 0x1f << 20 +VALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM +VALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC +VALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS +VALID_FRS = 0xfffff +// valid_regs and valid_frs are separate unsigned int fields. +// In order to store them with a single st8, we need to know +// the endianness. +#ifdef __LITTLE_ENDIAN__ +VALID_BITS = (VALID_FRS << 32) | VALID_REGS +#else +VALID_BITS = (VALID_REGS << 32) | VALID_FRS +#endif .text + +// int uwx_self_init_context(struct uwx_env *env); +// +// Stores a snapshot of the caller's context in the uwx_env structure. + .proc uwx_self_init_context .global uwx_self_init_context uwx_self_init_context: @@ -107,9 +124,9 @@ uwx_self_init_context: ;; mov rRNAT = ar.rnat // get copy of ar.rnat movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm, - // preds, rnat, unat, fpsr, + // preds, priunat, rnat, unat, fpsr, // lc, grs, brs - // = 0x1ff3c8f00000000 + // = 0x1ff3d8f00000000 ;; mov ar.rsc = rRSC // restore ar.rsc mov rBSP = ar.bsp @@ -119,25 +136,24 @@ uwx_self_init_context: nop extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3} ;; - or rNATP = rBSP, rNATP // natp = bsp | 0x1f8 + and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ... sub rTMP4 = 64, rTMP3 // (64 - bitpos) shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos) ;; - sub rBIAS = rNATP, rBSP // bias = (natp - bsp) ... nop + extr.u rBIAS = rBIAS, 3, 6 // ... div 8 shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos)) ;; or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos) - extr.u rBIAS = rBIAS, 3, 6 // ... div 8 + nop mov rTMP4 = pr ;; st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask st8 [rENV1] = rRP, 24 // env+8: ip (my rp) - add rBIAS = rNSLOT, rBIAS // bias += nslots + sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias ;; - cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ... - cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ... - nop + cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ... + cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ... ;; st8 [rENV0] = r12, 48 // env+16: sp st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs) @@ -147,24 +163,221 @@ uwx_self_init_context: st8 [rENV1] = rTMP2, 24 // env+72: priunat (p7) add rNSLOT = 1, rNSLOT // ... nslots++ ;; - st8 [rENV0] = rRNAT, -64 // env+88: rnat - st8 [rENV1] = rUNAT, 8 // env+96: unat + st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat + st8 [rENV1] = rUNAT, 8 // env+96: ar.unat dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) ;; sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3) mov rTMP3 = ar.fpsr mov rTMP1 = ar.lc ;; - st8 [rENV0] = rPBSP // env+24: bsp (my prev bsp) - st8 [rENV1] = rTMP3, 8 // env+104: fpsr + st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp) + st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate ;; - st8 [rENV1] = rTMP1 // env+112: lc + st8 [rENV1] = rTMP1, 112 // env+112: ar.lc STPTR [rENV2] = r0 // env+528: env->rstate = 0 nop ;; + // THIS CODE NEEDS TO BE SCHEDULED!!! + stf.spill [rENV0] = f2, 32 // env+208: f2 + stf.spill [rENV1] = f3, 32 // env+224: f3 + ;; + stf.spill [rENV0] = f4, 32 // env+240: f4 + stf.spill [rENV1] = f5, 32 // env+256: f5 + ;; + stf.spill [rENV0] = f16, 32 // env+272: f16 + stf.spill [rENV1] = f17, 32 // env+288: f17 + ;; + stf.spill [rENV0] = f18, 32 // env+304: f16 + stf.spill [rENV1] = f19, 32 // env+320: f17 + ;; + stf.spill [rENV0] = f20, 32 // env+336: f16 + stf.spill [rENV1] = f21, 32 // env+352: f17 + ;; + stf.spill [rENV0] = f22, 32 // env+368: f16 + stf.spill [rENV1] = f23, 32 // env+384: f17 + ;; + stf.spill [rENV0] = f24, 32 // env+400: f16 + stf.spill [rENV1] = f25, 32 // env+416: f17 + ;; + stf.spill [rENV0] = f26, 32 // env+432: f16 + stf.spill [rENV1] = f27, 32 // env+448: f17 + ;; + stf.spill [rENV0] = f28, 32 // env+464: f16 + stf.spill [rENV1] = f29, 32 // env+480: f17 + ;; + stf.spill [rENV0] = f30, 32 // env+496: f16 + stf.spill [rENV1] = f31, 32 // env+512: f17 + ;; mov ar.unat = rUNAT mov ret0 = r0 // return UWX_OK - br.ret.sptk b0 + br.ret.sptk b0 .endp +// uwx_self_install_context( +// struct uwx_env *env, +// uint64_t r15, +// uint64_t r16, +// uint64_t r17, +// uint64_t r18, +// uint64_t ret +// ); +// +// Installs the given context, and sets the landing pad binding +// registers r15-r18 to the values given. +// Returns the value "ret" to the new context (for testing -- +// when transferring to a landing pad, the new context won't +// care about the return value). + + .proc uwx_self_install_context + .global uwx_self_install_context +uwx_self_install_context: + .prologue + alloc rMYPFS = ar.pfs, 6, 0, 0, 0 + .body + SWIZZLE rENV0 = r0, r32 // rENV0 = &env + ;; + + // THIS CODE NEEDS TO BE SCHEDULED!!! + + // Restore GR 4-7 and ar.unat + add rENV1 = 136, rENV0 // &env->context.gr[0] + add rENV2 = 72, rENV0 // &env->context.priunat + ;; + ld8 rTMP2 = [rENV2], 24 // env+72: priunat + extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3} + ;; + ld8 rUNAT = [rENV2], 48 // env+96: ar.unat + sub rTMP4 = 64, rTMP3 // (64 - bitpos) + shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos) + ;; + shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos)) + ;; + or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos) + ;; + mov ar.unat = rTMP2 // put priunat in place + ;; + ld8.fill r4 = [rENV1], 16 // env+136: r4 + ld8.fill r5 = [rENV2], 16 // env+144: r5 + ;; + ld8.fill r6 = [rENV1], 16 // env+152: r6 + ld8.fill r7 = [rENV2], 16 // env+160: r7 + ;; + mov ar.unat = rUNAT // restore real ar.unat + + // Restore BR 1-5 + ld8 rTMP1 = [rENV1], 16 // env+168: b1 + ld8 rTMP2 = [rENV2], 16 // env+176: b2 + ;; + ld8 rTMP3 = [rENV1], 16 // env+184: b3 + ld8 rTMP4 = [rENV2], -168 // env+192: b4 + mov b1 = rTMP1 + ;; + ld8 rTMP1 = [rENV1], -168 // env+200: b5 + mov b2 = rTMP2 + mov b3 = rTMP3 + mov b4 = rTMP4 + ;; + mov b5 = rTMP1 + + // Restore ar.bsp, ar.pfs, and ar.rnat + ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec) + mov rRSC = ar.rsc + adds rBIAS = 0x1f8, r0 + ;; + flushrs + ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat + ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp + and rRSC0 = -4, rRSC // clear ar.rsc.mode + ;; + mov ar.rsc = rRSC0 // enforced lazy mode + extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol + ;; + invala + and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ... + ;; + extr.u rBIAS = rBIAS, 3, 6 // ... div 8 + ;; + add rBIAS = rNSLOT, rBIAS // bias += nslots + ;; + cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ... + cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ... + ;; +(p6) add rNSLOT = 1, rNSLOT // ... nslots++ + ;; +(p7) add rNSLOT = 1, rNSLOT // ... nslots++ + ;; + dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) + ;; + add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3) + ;; + mov ar.bspstore = rBSP // restore ar.bsp + ;; + mov ar.rnat = rRNAT // restore ar.rnat + mov ar.pfs = rPFS // restore ar.pfs + ;; + mov ar.rsc = rRSC // restore ar.rsc + + // Restore preds and ar.lc + ld8 rTMP1 = [rENV1], -56 // env+64: preds + ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc + ;; + mov pr = rTMP1 + mov ar.lc = rTMP2 + + // Get previous sp and ip + ld8 rRP = [rENV1], 96 // env+8: ip (my rp) + ld8 rPSP = [rENV2], 112 // env+16: sp + ;; + + // Restore ar.fpsr and gp + ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr + ld8 r1 = [rENV2], 96 // env+128: gp + ;; + mov ar.fpsr = rTMP1 // restore ar.fpsr + + // Restore FR 2-5 and 16-31 + ldf.fill f2 = [rENV1], 32 // env+208: f2 + ldf.fill f3 = [rENV2], 32 // env+224: f3 + ;; + ldf.fill f4 = [rENV1], 32 // env+240: f4 + ldf.fill f5 = [rENV2], 32 // env+256: f5 + ;; + ldf.fill f16 = [rENV1], 32 // env+272: f16 + ldf.fill f17 = [rENV2], 32 // env+288: f17 + ;; + ldf.fill f18 = [rENV1], 32 // env+304: f16 + ldf.fill f19 = [rENV2], 32 // env+320: f17 + ;; + ldf.fill f20 = [rENV1], 32 // env+336: f16 + ldf.fill f21 = [rENV2], 32 // env+352: f17 + ;; + ldf.fill f22 = [rENV1], 32 // env+368: f16 + ldf.fill f23 = [rENV2], 32 // env+384: f17 + ;; + ldf.fill f24 = [rENV1], 32 // env+400: f16 + ldf.fill f25 = [rENV2], 32 // env+416: f17 + ;; + ldf.fill f26 = [rENV1], 32 // env+432: f16 + ldf.fill f27 = [rENV2], 32 // env+448: f17 + ;; + ldf.fill f28 = [rENV1], 32 // env+464: f16 + ldf.fill f29 = [rENV2], 32 // env+480: f17 + ;; + ldf.fill f30 = [rENV1], 32 // env+496: f16 + ldf.fill f31 = [rENV2], 32 // env+512: f17 + + // Set landing pad parameter registers + mov r15 = r33 + mov r16 = r34 + mov r17 = r35 + mov r18 = r36 + + // Restore previous sp and Return + mov ret0 = r37 + mov sp = rPSP + mov b0 = rRP + br.ret.sptk b0 + + .endp |