diff options
Diffstat (limited to 'tools/test/xregs_sig/c2x2c_amd64.S')
-rw-r--r-- | tools/test/xregs_sig/c2x2c_amd64.S | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/tools/test/xregs_sig/c2x2c_amd64.S b/tools/test/xregs_sig/c2x2c_amd64.S new file mode 100644 index 000000000000..637090d53f46 --- /dev/null +++ b/tools/test/xregs_sig/c2x2c_amd64.S @@ -0,0 +1,140 @@ +/* + * This file is in public domain. + * Written by Dmitry Chagin <dchagin@FreeBSD.org> + */ + +#if defined(__FreeBSD__) +#include <machine/specialreg.h> +#else +#define CPUID2_OSXSAVE 0x08000000 +#define CPUID2_AVX 0x10000000 +#define XFEATURE_ENABLED_X87 0x00000001 +#define XFEATURE_ENABLED_SSE 0x00000002 +#define XFEATURE_ENABLED_AVX 0x00000004 +#define XFEATURE_AVX \ + (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX) +#endif + + .text + + .globl xregs_banks_max + .type xregs_banks_max, @function +xregs_banks_max: + pushq %rbx + movl $1, %eax + cpuid + andl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx + cmpl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx + jne sse + xorl %ecx, %ecx + xgetbv + andl $XFEATURE_AVX, %eax + cmpl $XFEATURE_AVX, %eax + jne sse + movl $1, %eax + jmp out +sse: + xorl %eax, %eax +out: + popq %rbx + retq + + .size xregs_banks_max, . - xregs_banks_max + + + .globl cpu_to_xmm + .type cpu_to_xmm, @function +cpu_to_xmm: + movdqu %xmm0, (%rdi) + movdqu %xmm1, 1 * 16(%rdi) + movdqu %xmm2, 2 * 16(%rdi) + movdqu %xmm3, 3 * 16(%rdi) + movdqu %xmm4, 4 * 16(%rdi) + movdqu %xmm5, 5 * 16(%rdi) + movdqu %xmm6, 6 * 16(%rdi) + movdqu %xmm7, 7 * 16(%rdi) + movdqu %xmm8, 8 * 16(%rdi) + movdqu %xmm9, 9 * 16(%rdi) + movdqu %xmm10, 10 * 16(%rdi) + movdqu %xmm11, 11 * 16(%rdi) + movdqu %xmm12, 12 * 16(%rdi) + movdqu %xmm13, 13 * 16(%rdi) + movdqu %xmm14, 14 * 16(%rdi) + movdqu %xmm15, 15 * 16(%rdi) + retq + + .size cpu_to_xmm, . - cpu_to_xmm + + + .globl xmm_to_cpu + .type xmm_to_cpu, @function +xmm_to_cpu: + movdqu (%rdi), %xmm0 + movdqu 1 * 16(%rdi), %xmm1 + movdqu 2 * 16(%rdi), %xmm2 + movdqu 3 * 16(%rdi), %xmm3 + movdqu 4 * 16(%rdi), %xmm4 + movdqu 5 * 16(%rdi), %xmm5 + movdqu 6 * 16(%rdi), %xmm6 + movdqu 7 * 16(%rdi), %xmm7 + movdqu 8 * 16(%rdi), %xmm8 + movdqu 9 * 16(%rdi), %xmm9 + movdqu 10 * 16(%rdi), %xmm10 + movdqu 11 * 16(%rdi), %xmm11 + movdqu 12 * 16(%rdi), %xmm12 + movdqu 13 * 16(%rdi), %xmm13 + movdqu 14 * 16(%rdi), %xmm14 + movdqu 15 * 16(%rdi), %xmm15 + retq + + .size xmm_to_cpu, . - xmm_to_cpu + + + .globl cpu_to_avx + .type cpu_to_avx, @function +cpu_to_avx: + vmovdqu %ymm0, (%rdi) + vmovdqu %ymm1, 1 * 32(%rdi) + vmovdqu %ymm2, 2 * 32(%rdi) + vmovdqu %ymm3, 3 * 32(%rdi) + vmovdqu %ymm4, 4 * 32(%rdi) + vmovdqu %ymm5, 5 * 32(%rdi) + vmovdqu %ymm6, 6 * 32(%rdi) + vmovdqu %ymm7, 7 * 32(%rdi) + vmovdqu %ymm8, 8 * 32(%rdi) + vmovdqu %ymm9, 9 * 32(%rdi) + vmovdqu %ymm10, 10 * 32(%rdi) + vmovdqu %ymm11, 11 * 32(%rdi) + vmovdqu %ymm12, 12 * 32(%rdi) + vmovdqu %ymm13, 13 * 32(%rdi) + vmovdqu %ymm14, 14 * 32(%rdi) + vmovdqu %ymm15, 15 * 32(%rdi) + retq + + .size cpu_to_avx, . - cpu_to_avx + + + .globl avx_to_cpu + .type avx_to_cpu, @function +avx_to_cpu: + vmovdqu (%rdi), %ymm0 + vmovdqu 1 * 32(%rdi), %ymm1 + vmovdqu 2 * 32(%rdi), %ymm2 + vmovdqu 3 * 32(%rdi), %ymm3 + vmovdqu 4 * 32(%rdi), %ymm4 + vmovdqu 5 * 32(%rdi), %ymm5 + vmovdqu 6 * 32(%rdi), %ymm6 + vmovdqu 7 * 32(%rdi), %ymm7 + vmovdqu 8 * 32(%rdi), %ymm8 + vmovdqu 9 * 32(%rdi), %ymm9 + vmovdqu 10 * 32(%rdi), %ymm10 + vmovdqu 11 * 32(%rdi), %ymm11 + vmovdqu 12 * 32(%rdi), %ymm12 + vmovdqu 13 * 32(%rdi), %ymm13 + vmovdqu 14 * 32(%rdi), %ymm14 + vmovdqu 15 * 32(%rdi), %ymm15 + retq + + .size avx_to_cpu, . - avx_to_cpu + + .section .note.GNU-stack,"",@progbits |