aboutsummaryrefslogtreecommitdiff
path: root/lib/builtins/hexagon/dffma.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/builtins/hexagon/dffma.S')
-rw-r--r--lib/builtins/hexagon/dffma.S103
1 files changed, 47 insertions, 56 deletions
diff --git a/lib/builtins/hexagon/dffma.S b/lib/builtins/hexagon/dffma.S
index 97b885a3bf27..c201d3d8be5e 100644
--- a/lib/builtins/hexagon/dffma.S
+++ b/lib/builtins/hexagon/dffma.S
@@ -1,16 +1,15 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
-/* Double Precision Multiply */
+// Double Precision Multiply
#define A r1:0
@@ -76,33 +75,29 @@
#define SR_ROUND_OFF 22
#endif
- /*
- * First, classify for normal values, and abort if abnormal
- *
- * Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
- *
- * Since we know that the 2 MSBs of the H registers is zero, we should never carry
- * the partial products that involve the H registers
- *
- * Try to buy X slots, at the expense of latency if needed
- *
- * We will have PP_HH with the upper bits of the product, PP_LL with the lower
- * PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
- * PP_HH can have a minimum of 0x0100_0000_0000_0000
- *
- * 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
- *
- * We need to align CTMP.
- * If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
- * If CTMP << PP align CTMP and add 128 bits. Then compute sticky
- * If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation.
- *
- * Convert partial product and CTMP to 2's complement prior to addition
- *
- * After we add, we need to normalize into upper 64 bits, then compute sticky.
- *
- *
- */
+ // First, classify for normal values, and abort if abnormal
+ //
+ // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
+ //
+ // Since we know that the 2 MSBs of the H registers is zero, we should never carry
+ // the partial products that involve the H registers
+ //
+ // Try to buy X slots, at the expense of latency if needed
+ //
+ // We will have PP_HH with the upper bits of the product, PP_LL with the lower
+ // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
+ // PP_HH can have a minimum of 0x0100_0000_0000_0000
+ //
+ // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
+ //
+ // We need to align CTMP.
+ // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
+ // If CTMP << PP align CTMP and add 128 bits. Then compute sticky
+ // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation.
+ //
+ // Convert partial product and CTMP to 2's complement prior to addition
+ //
+ // After we add, we need to normalize into upper 64 bits, then compute sticky.
.text
.global __hexagon_fmadf4
@@ -182,14 +177,12 @@ fma:
#define EXPCA r19:18
EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
}
- /* PP_HH:PP_LL now has product */
- /* CTMP is negated */
- /* EXPA,B,C are extracted */
- /*
- * We need to negate PP
- * Since we will be adding with carry later, if we need to negate,
- * just invert all bits now, which we can do conditionally and in parallel
- */
+ // PP_HH:PP_LL now has product
+ // CTMP is negated
+ // EXPA,B,C are extracted
+ // We need to negate PP
+ // Since we will be adding with carry later, if we need to negate,
+ // just invert all bits now, which we can do conditionally and in parallel
#define PP_HH_TMP r15:14
#define PP_LL_TMP r7:6
{
@@ -274,18 +267,16 @@ fma:
PP_HH = add(CTMP,PP_HH,P_CARRY):carry
TMP = #62
}
- /*
- * PP_HH:PP_LL now holds the sum
- * We may need to normalize left, up to ??? bits.
- *
- * I think that if we have massive cancellation, the range we normalize by
- * is still limited
- */
+ // PP_HH:PP_LL now holds the sum
+ // We may need to normalize left, up to ??? bits.
+ //
+ // I think that if we have massive cancellation, the range we normalize by
+ // is still limited
{
LEFTSHIFT = add(clb(PP_HH),#-2)
if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits?
}
- /* We had all sign bits, shift left by 62. */
+ // We had all sign bits, shift left by 62.
{
CTMP = extractu(PP_LL,#62,#2)
PP_LL = asl(PP_LL,#62)
@@ -330,7 +321,7 @@ fma:
if (!P_TMP) dealloc_return // not zero, return
}
.Ladd_yields_zero:
- /* We had full cancellation. Return +/- zero (-0 when round-down) */
+ // We had full cancellation. Return +/- zero (-0 when round-down)
{
TMP = USR
A = #0
@@ -408,9 +399,9 @@ fma:
EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize
p3 = cmp.gt(CTMPH,#-1)
}
- /* Underflow */
- /* We know that the infinte range exponent should be EXPA */
- /* CTMP is 2's complement, ATMP is abs(CTMP) */
+ // Underflow
+ // We know that the infinte range exponent should be EXPA
+ // CTMP is 2's complement, ATMP is abs(CTMP)
{
EXPA = add(EXPA,EXPB) // how much to shift back right
ATMP = asl(ATMP,EXPB) // shift left
@@ -593,7 +584,7 @@ fma:
p1 = dfclass(C,#0x08)
if (p1.new) jump:nt .Lfma_inf_plus_inf
}
- /* A*B is +/- inf, C is finite. Return A */
+ // A*B is +/- inf, C is finite. Return A
{
jumpr r31
}
@@ -649,7 +640,7 @@ fma:
if (!p0) A = C // If C is not zero, return C
if (!p0) jumpr r31
}
- /* B has correctly signed zero, C is also zero */
+ // B has correctly signed zero, C is also zero
.Lzero_plus_zero:
{
p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0
@@ -674,8 +665,8 @@ fma:
#define CTMP r11:10
.falign
.Lfma_abnormal_c:
- /* We know that AB is normal * normal */
- /* C is not normal: zero, subnormal, inf, or NaN. */
+ // We know that AB is normal * normal
+ // C is not normal: zero, subnormal, inf, or NaN.
{
p0 = dfclass(C,#0x10) // is C NaN?
if (p0.new) jump:nt .Lnan