aboutsummaryrefslogtreecommitdiff
path: root/sys/cam/ctl/ctl_tpc.c
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2015-08-05 13:46:15 +0000
committerAlexander Motin <mav@FreeBSD.org>2015-08-05 13:46:15 +0000
commit73942c5ce0c95dec6d2c02a2c8558f648243c03c (patch)
tree2a4097c22bb6ed2ae36752098bd28be779b3debe /sys/cam/ctl/ctl_tpc.c
parent2412ae2b8e3af7ec2e08b9081d14b9c57bfac857 (diff)
downloadsrc-73942c5ce0c95dec6d2c02a2c8558f648243c03c.tar.gz
src-73942c5ce0c95dec6d2c02a2c8558f648243c03c.zip
Issue all reads of single XCOPY segment simultaneously.
During vMotion and Clone VMware by default runs multiple sequential 4MB XCOPY requests same time. If CTL issues reads sequentially in 1MB chunks for each XCOPY command, reads from different commands are not detected as sequential by serseq option code and allowed to execute simultaneously. Such read pattern confused ZFS prefetcher, causing suboptimal disk access. Issuing all reads same time make serseq code work properly, serializing reads both within each XCOPY command and between them. My tests with ZFS pool of 14 disks in RAID10 shows prefetcher efficiency improved from 37% to 99.7%, copying speed improved by 10-60%, average read latency reduced twice on HDD layer and by five times on zvol layer. MFC after: 2 weeks Sponsored by: iXsystems, Inc.
Notes
Notes: svn path=/head/; revision=286320
Diffstat (limited to 'sys/cam/ctl/ctl_tpc.c')
-rw-r--r--sys/cam/ctl/ctl_tpc.c9
1 files changed, 4 insertions, 5 deletions
diff --git a/sys/cam/ctl/ctl_tpc.c b/sys/cam/ctl/ctl_tpc.c
index 490cddd510b7..662ee3d064a0 100644
--- a/sys/cam/ctl/ctl_tpc.c
+++ b/sys/cam/ctl/ctl_tpc.c
@@ -817,7 +817,7 @@ tpc_process_b2b(struct tpc_list *list)
struct scsi_ec_segment_b2b *seg;
struct scsi_ec_cscd_dtsp *sdstp, *ddstp;
struct tpc_io *tior, *tiow;
- struct runl run, *prun;
+ struct runl run;
uint64_t sl, dl;
off_t srclba, dstlba, numbytes, donebytes, roundbytes;
int numlba;
@@ -889,8 +889,7 @@ tpc_process_b2b(struct tpc_list *list)
list->segsectors = numbytes / dstblock;
donebytes = 0;
TAILQ_INIT(&run);
- prun = &run;
- list->tbdio = 1;
+ list->tbdio = 0;
while (donebytes < numbytes) {
roundbytes = numbytes - donebytes;
if (roundbytes > TPC_MAX_IO_SIZE) {
@@ -942,8 +941,8 @@ tpc_process_b2b(struct tpc_list *list)
tiow->io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = tiow;
TAILQ_INSERT_TAIL(&tior->run, tiow, rlinks);
- TAILQ_INSERT_TAIL(prun, tior, rlinks);
- prun = &tior->run;
+ TAILQ_INSERT_TAIL(&run, tior, rlinks);
+ list->tbdio++;
donebytes += roundbytes;
srclba += roundbytes / srcblock;
dstlba += roundbytes / dstblock;