aboutsummaryrefslogtreecommitdiff
path: root/bin/ls
diff options
context:
space:
mode:
authorTim J. Robbins <tjr@FreeBSD.org>2004-05-02 11:25:37 +0000
committerTim J. Robbins <tjr@FreeBSD.org>2004-05-02 11:25:37 +0000
commit107409f46e47bfa3d0d5769956e70abab8a82daa (patch)
tree33ababbec3452462cff8a64581a3fc9f628a2913 /bin/ls
parent30d3088041a74c4d74a878104ad2b4855d5a01c2 (diff)
downloadsrc-107409f46e47bfa3d0d5769956e70abab8a82daa.tar.gz
src-107409f46e47bfa3d0d5769956e70abab8a82daa.zip
Treat filenames as multibyte character strings (according to the current
LC_CTYPE setting) when determining which characters are printable. This is an often-requested feature. Use wcwidth() to determine the number of column positions a character takes up, although there are still a few places left where we assume 1 byte = 1 column position, e.g. line-wrapping when handling the -m option. The error handling here is somewhat more complicated than usual: we do our best to show what we can of a filename in the presence of conversion errors, instead of simply aborting.
Notes
Notes: svn path=/head/; revision=128823
Diffstat (limited to 'bin/ls')
-rw-r--r--bin/ls/extern.h1
-rw-r--r--bin/ls/ls.16
-rw-r--r--bin/ls/print.c3
-rw-r--r--bin/ls/util.c187
4 files changed, 132 insertions, 65 deletions
diff --git a/bin/ls/extern.h b/bin/ls/extern.h
index 9482dad1be4f..52b6a857c3ed 100644
--- a/bin/ls/extern.h
+++ b/bin/ls/extern.h
@@ -45,6 +45,7 @@ int printname(const char *);
void printscol(const DISPLAY *);
void printstream(const DISPLAY *);
void usage(void);
+int prn_normal(const char *);
size_t len_octal(const char *, int);
int prn_octal(const char *);
int prn_printable(const char *);
diff --git a/bin/ls/ls.1 b/bin/ls/ls.1
index eb84484e14be..8554f54e543b 100644
--- a/bin/ls/ls.1
+++ b/bin/ls/ls.1
@@ -31,7 +31,7 @@
.\" @(#)ls.1 8.7 (Berkeley) 7/29/94
.\" $FreeBSD$
.\"
-.Dd March 21, 2004
+.Dd May 2, 2004
.Dt LS 1
.Os
.Sh NAME
@@ -673,7 +673,3 @@ command appeared in
.Sh BUGS
To maintain backward compatibility, the relationships between the many
options are quite complex.
-.Pp
-The
-.Nm
-utility does not recognize multibyte characters in filenames.
diff --git a/bin/ls/print.c b/bin/ls/print.c
index 1d6bec9614c4..eb0258e281f7 100644
--- a/bin/ls/print.c
+++ b/bin/ls/print.c
@@ -147,7 +147,7 @@ printname(const char *name)
else if (f_nonprint)
return prn_printable(name);
else
- return printf("%s", name);
+ return prn_normal(name);
}
void
@@ -239,6 +239,7 @@ printstream(const DISPLAY *dp)
for (p = dp->list, chcnt = 0; p; p = p->fts_link) {
if (p->fts_number == NO_PRINT)
continue;
+ /* XXX strlen does not take octal escapes into account. */
if (strlen(p->fts_name) + chcnt +
(p->fts_link ? 2 : 0) >= (unsigned)termwidth) {
putchar('\n');
diff --git a/bin/ls/util.c b/bin/ls/util.c
index ea305c1b44f7..a7b82080e287 100644
--- a/bin/ls/util.c
+++ b/bin/ls/util.c
@@ -44,25 +44,81 @@ __FBSDID("$FreeBSD$");
#include <ctype.h>
#include <err.h>
#include <fts.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
#include "ls.h"
#include "extern.h"
int
+prn_normal(const char *s)
+{
+ mbstate_t mbs;
+ wchar_t wc;
+ int i, n;
+ size_t clen;
+
+ memset(&mbs, 0, sizeof(mbs));
+ n = 0;
+ while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) {
+ if (clen == (size_t)-2) {
+ n += printf("%s", s);
+ break;
+ }
+ if (clen == (size_t)-1) {
+ memset(&mbs, 0, sizeof(mbs));
+ putchar((unsigned char)*s);
+ s++;
+ n++;
+ continue;
+ }
+ for (i = 0; i < (int)clen; i++)
+ putchar((unsigned char)s[i]);
+ s += clen;
+ n += wcwidth(wc);
+ }
+ return (n);
+}
+
+int
prn_printable(const char *s)
{
- char c;
- int n;
+ mbstate_t mbs;
+ wchar_t wc;
+ int i, n;
+ size_t clen;
- for (n = 0; (c = *s) != '\0'; ++s, ++n)
- if (isprint((unsigned char)c))
- putchar(c);
- else
+ memset(&mbs, 0, sizeof(mbs));
+ n = 0;
+ while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) {
+ if (clen == (size_t)-1) {
+ putchar('?');
+ s++;
+ n++;
+ memset(&mbs, 0, sizeof(mbs));
+ continue;
+ }
+ if (clen == (size_t)-2) {
putchar('?');
- return n;
+ n++;
+ break;
+ }
+ if (!iswprint(wc)) {
+ putchar('?');
+ s += clen;
+ n++;
+ continue;
+ }
+ for (i = 0; i < (int)clen; i++)
+ putchar((unsigned char)s[i]);
+ s += clen;
+ n += wcwidth(wc);
+ }
+ return (n);
}
/*
@@ -81,70 +137,83 @@ prn_printable(const char *s)
size_t
len_octal(const char *s, int len)
{
- size_t r = 0;
+ mbstate_t mbs;
+ wchar_t wc;
+ size_t clen, r;
- while (len--)
- if (isprint((unsigned const char)*s++)) r++; else r += 4;
- return r;
+ memset(&mbs, 0, sizeof(mbs));
+ r = 0;
+ while (len != 0 && (clen = mbrtowc(&wc, s, len, &mbs)) != 0) {
+ if (clen == (size_t)-1) {
+ r += 4;
+ s++;
+ len--;
+ memset(&mbs, 0, sizeof(mbs));
+ continue;
+ }
+ if (clen == (size_t)-2) {
+ r += 4 * len;
+ break;
+ }
+ if (iswprint(wc))
+ r++;
+ else
+ r += 4 * clen;
+ s += clen;
+ }
+ return (r);
}
int
prn_octal(const char *s)
{
- unsigned char ch;
- int len = 0;
+ static const char esc[] = "\\\\\"\"\aa\bb\ff\nn\rr\tt\vv";
+ const char *p;
+ mbstate_t mbs;
+ wchar_t wc;
+ size_t clen;
+ unsigned char ch;
+ int goodchar, i, len, prtlen;
- while ((ch = (unsigned char)*s++)) {
- if (isprint(ch) && (ch != '\"') && (ch != '\\'))
- putchar(ch), len++;
- else if (f_octal_escape) {
- putchar('\\');
- switch (ch) {
- case '\\':
- putchar('\\');
- break;
- case '\"':
- putchar('"');
- break;
- case '\a':
- putchar('a');
- break;
- case '\b':
- putchar('b');
- break;
- case '\f':
- putchar('f');
- break;
- case '\n':
- putchar('n');
- break;
- case '\r':
- putchar('r');
- break;
- case '\t':
- putchar('t');
- break;
- case '\v':
- putchar('v');
- break;
- default:
+ memset(&mbs, 0, sizeof(mbs));
+ len = 0;
+ while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) {
+ goodchar = clen != (size_t)-1 && clen != (size_t)-2;
+ if (goodchar && iswprint(wc) && wc != L'\"' && wc != L'\\') {
+ for (i = 0; i < (int)clen; i++)
+ putchar((unsigned char)s[i]);
+ len += wcwidth(wc);
+ } else if (goodchar && f_octal_escape && wc >= 0 &&
+ wc <= (wchar_t)UCHAR_MAX &&
+ (p = strchr(esc, (char)wc)) != NULL) {
+ putchar('\\');
+ putchar(p[1]);
+ len += 2;
+ } else {
+ if (goodchar)
+ prtlen = clen;
+ else if (clen == (size_t)-1)
+ prtlen = 1;
+ else
+ prtlen = strlen(s);
+ for (i = 0; i < prtlen; i++) {
+ ch = (unsigned char)s[i];
+ putchar('\\');
putchar('0' + (ch >> 6));
putchar('0' + ((ch >> 3) & 7));
putchar('0' + (ch & 7));
- len += 2;
- break;
- }
- len += 2;
- }
- else {
- putchar('\\');
- putchar('0' + (ch >> 6));
- putchar('0' + ((ch >> 3) & 7));
- putchar('0' + (ch & 7));
- len += 4;
+ len += 4;
+ }
}
+ if (clen == (size_t)-2)
+ break;
+ if (clen == (size_t)-1) {
+ memset(&mbs, 0, sizeof(mbs));
+ s++;
+ } else
+ s += clen;
}
- return len;
+ return (len);
}
void