aboutsummaryrefslogtreecommitdiff
path: root/contrib/file/ascmagic.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/file/ascmagic.c')
-rw-r--r--contrib/file/ascmagic.c119
1 files changed, 71 insertions, 48 deletions
diff --git a/contrib/file/ascmagic.c b/contrib/file/ascmagic.c
index dc3ebd38010e..43d467cd3e22 100644
--- a/contrib/file/ascmagic.c
+++ b/contrib/file/ascmagic.c
@@ -49,7 +49,7 @@
#include "names.h"
#ifndef lint
-FILE_RCSID("@(#)$Id: ascmagic.c,v 1.41 2004/09/11 19:15:57 christos Exp $")
+FILE_RCSID("@(#)$Id: ascmagic.c,v 1.45 2006/03/12 22:09:33 christos Exp $")
#endif /* lint */
typedef unsigned long unichar;
@@ -71,10 +71,11 @@ protected int
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
{
size_t i;
- unsigned char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */
- unichar ubuf[HOWMANY+1]; /* one extra for terminating '\0' */
+ unsigned char *nbuf = NULL;
+ unichar *ubuf = NULL;
size_t ulen;
struct names *p;
+ int rv = -1;
const char *code = NULL;
const char *code_mime = NULL;
@@ -84,6 +85,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
int has_escapes = 0;
int has_backspace = 0;
+ int seen_cr = 0;
int n_crlf = 0;
int n_lf = 0;
@@ -97,13 +99,13 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
* Undo the NUL-termination kindly provided by process()
* but leave at least one byte to look at
*/
-
while (nbytes > 1 && buf[nbytes - 1] == '\0')
nbytes--;
- /* nbuf and ubuf relies on this */
- if (nbytes > HOWMANY)
- nbytes = HOWMANY;
+ if ((nbuf = malloc((nbytes + 1) * sizeof(nbuf[0]))) == NULL)
+ goto done;
+ if ((ubuf = malloc((nbytes + 1) * sizeof(ubuf[0]))) == NULL)
+ goto done;
/*
* Then try to determine whether it's any character code we can
@@ -147,10 +149,16 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
type = "character data";
code_mime = "ebcdic";
} else {
- return 0; /* doesn't look like text at all */
+ rv = 0;
+ goto done; /* doesn't look like text at all */
}
}
+ if (nbytes <= 1) {
+ rv = 0;
+ goto done;
+ }
+
/*
* for troff, look for . + letter + letter or .\";
* this must be done to disambiguate tar archives' ./file
@@ -224,66 +232,75 @@ subtype_identified:
* Now try to discover other details about the file.
*/
for (i = 0; i < ulen; i++) {
- if (i > last_line_end + MAXLINELEN)
- has_long_lines = 1;
-
- if (ubuf[i] == '\033')
- has_escapes = 1;
- if (ubuf[i] == '\b')
- has_backspace = 1;
-
- if (ubuf[i] == '\r' && (i + 1 < ulen && ubuf[i + 1] == '\n')) {
- n_crlf++;
+ if (ubuf[i] == '\n') {
+ if (seen_cr)
+ n_crlf++;
+ else
+ n_lf++;
last_line_end = i;
- }
- if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) {
+ } else if (seen_cr)
n_cr++;
+
+ seen_cr = (ubuf[i] == '\r');
+ if (seen_cr)
last_line_end = i;
- }
- if (ubuf[i] == '\n' && ((int)i - 1 < 0 || ubuf[i - 1] != '\r')){
- n_lf++;
- last_line_end = i;
- }
+
if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
n_nel++;
last_line_end = i;
}
+
+ /* If this line is _longer_ than MAXLINELEN, remember it. */
+ if (i > last_line_end + MAXLINELEN)
+ has_long_lines = 1;
+
+ if (ubuf[i] == '\033')
+ has_escapes = 1;
+ if (ubuf[i] == '\b')
+ has_backspace = 1;
}
+ /* Beware, if the data has been truncated, the final CR could have
+ been followed by a LF. If we have HOWMANY bytes, it indicates
+ that the data might have been truncated, probably even before
+ this function was called. */
+ if (seen_cr && nbytes < HOWMANY)
+ n_cr++;
+
if ((ms->flags & MAGIC_MIME)) {
if (subtype_mime) {
if (file_printf(ms, subtype_mime) == -1)
- return -1;
+ goto done;
} else {
if (file_printf(ms, "text/plain") == -1)
- return -1;
+ goto done;
}
if (code_mime) {
if (file_printf(ms, "; charset=") == -1)
- return -1;
+ goto done;
if (file_printf(ms, code_mime) == -1)
- return -1;
+ goto done;
}
} else {
if (file_printf(ms, code) == -1)
- return -1;
+ goto done;
if (subtype) {
if (file_printf(ms, " ") == -1)
- return -1;
+ goto done;
if (file_printf(ms, subtype) == -1)
- return -1;
+ goto done;
}
if (file_printf(ms, " ") == -1)
- return -1;
+ goto done;
if (file_printf(ms, type) == -1)
- return -1;
+ goto done;
if (has_long_lines)
if (file_printf(ms, ", with very long lines") == -1)
- return -1;
+ goto done;
/*
* Only report line terminators if we find one other than LF,
@@ -292,51 +309,57 @@ subtype_identified:
if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
(n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
if (file_printf(ms, ", with") == -1)
- return -1;
+ goto done;
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
if (file_printf(ms, " no") == -1)
- return -1;
+ goto done;
} else {
if (n_crlf) {
if (file_printf(ms, " CRLF") == -1)
- return -1;
+ goto done;
if (n_cr || n_lf || n_nel)
if (file_printf(ms, ",") == -1)
- return -1;
+ goto done;
}
if (n_cr) {
if (file_printf(ms, " CR") == -1)
- return -1;
+ goto done;
if (n_lf || n_nel)
if (file_printf(ms, ",") == -1)
- return -1;
+ goto done;
}
if (n_lf) {
if (file_printf(ms, " LF") == -1)
- return -1;
+ goto done;
if (n_nel)
if (file_printf(ms, ",") == -1)
- return -1;
+ goto done;
}
if (n_nel)
if (file_printf(ms, " NEL") == -1)
- return -1;
+ goto done;
}
if (file_printf(ms, " line terminators") == -1)
- return -1;
+ goto done;
}
if (has_escapes)
if (file_printf(ms, ", with escape sequences") == -1)
- return -1;
+ goto done;
if (has_backspace)
if (file_printf(ms, ", with overstriking") == -1)
- return -1;
+ goto done;
}
+ rv = 1;
+done:
+ if (nbuf)
+ free(nbuf);
+ if (ubuf)
+ free(ubuf);
- return 1;
+ return rv;
}
private int