aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/syscons
diff options
context:
space:
mode:
authorBruce Evans <bde@FreeBSD.org>2017-04-14 12:03:34 +0000
committerBruce Evans <bde@FreeBSD.org>2017-04-14 12:03:34 +0000
commitddac85e5a041fd8f41f4874de9808152c9979f22 (patch)
treed9e5fca74400126ca21e6cb92aac75cfb84bfa15 /sys/dev/syscons
parentda62ffd9cdf23773d526f4e3a6fe6d26c9f97774 (diff)
downloadsrc-ddac85e5a041fd8f41f4874de9808152c9979f22.tar.gz
src-ddac85e5a041fd8f41f4874de9808152c9979f22.zip
Further unobfuscate the method of drawing the mouse cursor in vga planar
mode. Don't manually unroll the 2 inner loops. On Haswell, doing so gave a speedup of about 0.5% (about 4 cycles per iteration out of 1400), but hard-coded a limit of width 9 and made better better optimizations harder to see. gcc-4.2.1 -O does the unrolling anyway, unless tricked with a volatile hack. gcc's unrolling is not very good and gives a a speedup of about half as much (about 2 cycles per iteration). (All timing on i386.) Manual unrolling was only feasible because the inner loop only iterates once or twice. Usually twice, but a dynamic check is needed to decide, and was not moved from the second-innermost loop manually or by gcc. This commit basically adds another dynamic check in the inner loop. Cursor widths of 10-17 require 3 iterations in the inner loop and this is not so easy to unroll -- even gcc stops at 2.
Notes
Notes: svn path=/head/; revision=316827
Diffstat (limited to 'sys/dev/syscons')
-rw-r--r--sys/dev/syscons/scvgarndr.c49
1 files changed, 17 insertions, 32 deletions
diff --git a/sys/dev/syscons/scvgarndr.c b/sys/dev/syscons/scvgarndr.c
index 2ec06a514b6c..8450deb74c9c 100644
--- a/sys/dev/syscons/scvgarndr.c
+++ b/sys/dev/syscons/scvgarndr.c
@@ -1031,7 +1031,7 @@ draw_pxlmouse_planar(scr_stat *scp, int x, int y)
int xoff, yoff;
int ymax;
u_short m;
- int i, j;
+ int i, j, k;
line_width = scp->sc->adp->va_line_width;
xoff = (x - scp->xoff*8)%8;
@@ -1043,42 +1043,27 @@ draw_pxlmouse_planar(scr_stat *scp, int x, int y)
outw(GDCIDX, 0xff08); /* bit mask */
outw(GDCIDX, 0x0803); /* data rotate/function select (and) */
p = scp->sc->adp->va_window + line_width*y + x/8;
- if (x < scp->xpixel - 8) {
- for (i = y, j = 0; i < ymax; ++i, ++j) {
- m = ~((mouse_and_mask[j] & ~mouse_or_mask[j]) >> xoff);
- readb(p);
- writeb(p, m >> 8);
- readb(p + 1);
- writeb(p + 1, m);
- p += line_width;
- }
- } else {
- xoff += 8;
- for (i = y, j = 0; i < ymax; ++i, ++j) {
- m = ~((mouse_and_mask[j] & ~mouse_or_mask[j]) >> xoff);
- readb(p);
- writeb(p, m);
- p += line_width;
+ for (i = y, j = 0; i < ymax; ++i, ++j) {
+ m = ~((mouse_and_mask[j] & ~mouse_or_mask[j]) >> xoff);
+ for (k = 0; k < 2; ++k) {
+ if (x + 8 * k < scp->xpixel) {
+ readb(p + k);
+ writeb(p + k, m >> (8 * (1 - k)));
+ }
}
+ p += line_width;
}
outw(GDCIDX, 0x1003); /* data rotate/function select (or) */
p = scp->sc->adp->va_window + line_width*y + x/8;
- if (x < scp->xpixel - 8) {
- for (i = y, j = 0; i < ymax; ++i, ++j) {
- m = mouse_or_mask[j] >> xoff;
- readb(p);
- writeb(p, m >> 8);
- readb(p + 1);
- writeb(p + 1, m);
- p += line_width;
- }
- } else {
- for (i = y, j = 0; i < ymax; ++i, ++j) {
- m = mouse_or_mask[j] >> xoff;
- readb(p);
- writeb(p, m);
- p += line_width;
+ for (i = y, j = 0; i < ymax; ++i, ++j) {
+ m = mouse_or_mask[j] >> xoff;
+ for (k = 0; k < 2; ++k) {
+ if (x + 8 * k < scp->xpixel) {
+ readb(p + k);
+ writeb(p + k, m >> (8 * (1 - k)));
+ }
}
+ p += line_width;
}
outw(GDCIDX, 0x0003); /* data rotate/function select */
}