Files
Arquivotheca.Solaris-2.5/cmd/sort/sort.c
seta75D 7c4988eac0 Init
2021-10-11 19:38:01 -03:00

1937 lines
38 KiB
C
Executable File

/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T */
/* The copyright notice above does not evidence any */
/* actual or intended publication of such source code. */
/* Portions Copyright (c) 1988, 1991 - 1994, Sun Microsystems, Inc */
/* All Rights Reserved. */
#ident "@(#)sort.c 1.21 95/02/27 SMI" /* SVr4.0 1.22 */
#include <stdio.h>
#include <ctype.h>
#include <signal.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <values.h>
#include <stdlib.h>
#include <limits.h>
#include <locale.h>
#include <widec.h>
#include <wctype.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <ulimit.h>
#include <errno.h>
#define N 16
#define C 20
#define NF 10
#define MTHRESH 8 /* threshhold for doing median of 3 qksort selection */
#define TREEZ 32 /* no less than N and best if power of 2 */
/*
* Memory administration
*
* Using a lot of memory is great when sorting a lot of data.
* Using a megabyte to sort the output of `who' loses big.
* MAXMEM, MINMEM and DEFMEM define the absolute maximum,
* minimum and default memory requirements. Administrators
* can override any or all of these via defines at compile time.
* Users can override the amount allocated (within the limits
* of MAXMEM and MINMEM) on the command line.
*
* For PDP-11s, memory is limited by the maximum unsigned number, 2^16-1.
* Administrators can override this too.
* Arguments to core getting routines must be unsigned.
* Unsigned long not supported on 11s.
*/
#ifndef MAXMEM
#define MAXMEM 1048576 /* Megabyte maximum */
#endif
#ifndef MINMEM
#define MINMEM 16384 /* 16K minimum */
#endif
#ifndef DEFMEM
#define DEFMEM 32768 /* Same as old sort */
#endif
#define ASC 0
#define NUM 1
#define MON 2
#define blank(c) (iswspace(c) && ((c) != L'\n'))
/* #define blank(c) ((c)==' ' || (c)=='\t') */
static FILE *os;
static char *dirtry = NULL;
static char *file1;
static char *file;
static char *filep;
static int nfiles;
static int *lspace;
static int *maxbrk;
static unsigned tryfor;
static unsigned alloc;
static char bufin[BUFSIZ], bufout[BUFSIZ];
/*
* Use setbuf's to avoid malloc calls.
* malloc seems to get heartburn
* when brk returns storage.
*/
static int maxrec;
static int mflg;
static int nway;
static int cflg;
static int uflg;
static char *outfil;
static int unsafeout; /* kludge to assure -m -o works */
static wchar_t tabchar;
static int eargc;
static char **eargv;
static struct btree {
wchar_t *rp;
int rn;
int recsz;
int allocflag;
} tree[TREEZ], *treep[TREEZ];
static long wasfirst = 0, notfirst = 0;
static int bonus;
static wchar_t *save;
static wchar_t *lines[2];
static int save_alloc;
static struct field {
wchar_t (*code)(wchar_t); /* WAS: unsigned char *code; */
int (*ignore)(wchar_t); /* WAS: unsigned char *ignore; */
int fcmp;
int rflg;
int bflg[2];
int m[2];
int n[2];
} *fields;
static wchar_t nofold(wchar_t);
static void sort(void);
static void msort(wchar_t **, wchar_t **);
static void insert(struct btree **, int);
static void merge(int, int);
static void cline(wchar_t *, wchar_t *);
static int xrline(FILE *, struct btree *);
static int yrline(FILE *, int);
static void wline(wchar_t *);
static void checksort(void);
static void disorder(char *, wchar_t *);
static void newfile(void);
static char *setfil(int);
static void oldfile(void);
static void safeoutfil(void);
static void cant(char *);
static void diag1(const char *, int);
static void diag2(const char *f, const char *, int);
static void term(void);
static int getsign(wchar_t *, wchar_t *);
static int cmp(wchar_t *, wchar_t *);
static int cmpa(wchar_t *, wchar_t *);
static wchar_t *skip(wchar_t *, struct field *, int);
static wchar_t *eol(wchar_t *);
static void initree(void);
static int cmpsave(int);
static int field(char *, int, int);
static int number(char **);
static void qksort(wchar_t **, wchar_t **);
static void month_init(void);
static int month(wchar_t *);
static void rderror(char *);
static void wterror(char *);
static int grow_core(unsigned, unsigned);
static int nonprint(wchar_t);
static int dict(wchar_t);
static wchar_t fold(wchar_t);
static void initdecpnt(void);
static void warning(void);
static void usage(void);
static int zero(wchar_t);
static char *get_subopt(int, char **, char);
static int (*compare)(wchar_t *, wchar_t *) = cmpa;
static struct field proto = {
nofold,
zero,
ASC,
1,
0,
0,
0,
-1,
0,
0
};
static int nfields = 0;
static int error = 2;
static int not_c; /* flag showing if LC_COLLATE is not C locale */
static int modflg = 0; /* if -d, -i or -f is set */
static int collsize = 0;
static wchar_t *collb1, *collb2;
static wchar_t *months[12];
static wchar_t decpnt; /* decimal point */
static wchar_t mon_decpnt; /* decimal point for monetary */
static wchar_t thousands_sep; /* thousands separator */
static wchar_t mon_thousands_sep; /* thousands separator for monetary */
static struct tm ct = {
0, 0, 1, 0, 86, 0, 0};
int
main(argc, argv)
char **argv;
{
int a;
int i;
int nf;
int oldmaxrec;
char *arg;
char *tabarg;
struct field *p;
struct field *q;
unsigned int maxalloc;
unsigned int newalloc;
/*
* close any file descriptors that may have been
* left open -- we may need them all
*
* the above comment is from the original sort.
*
* Because sort depends on libmapmalloc which opens
* a file descriptor, #3, for /dev/zero,
* "for" statement is moved to the position before setlocale()
* so that it won't close /dev/zero.
*
* Perhaps it is not necessary to close these file descriptors.
*/
for (i = 4; i < 4 + N; i++)
(void) close(i);
(void) setlocale(LC_ALL, "");
#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
#endif
(void) textdomain(TEXT_DOMAIN);
not_c = strcmp("C", setlocale(LC_COLLATE, NULL));
fields = (struct field *)malloc(NF*sizeof (struct field));
nf = NF;
fields[nfields] = proto;
initree();
eargv = argv;
tryfor = DEFMEM;
initdecpnt();
while (--argc > 0) {
if (**++argv == '-') {
arg = *argv;
switch (*++arg) {
case '\0':
if (arg[-1] == '-')
eargv[eargc++] = "-";
break;
case 'o':
if (*(arg+1) != '\0')
outfil = arg + 1;
else {
outfil = get_subopt(argc, argv, 'o');
argc--;
argv++;
}
break;
case 'k':
if (++nfields >= nf) {
if ((fields = (struct field *)
realloc(fields, (nf + NF) *
sizeof (struct field))) == NULL) {
(void) fprintf(stderr, gettext(
"sort: too many keys\n"));
return (2);
}
nf += NF;
}
fields[nfields] = proto;
if (field(get_subopt(argc, argv, 'k'), 0, 1)
< 0)
fields[nfields--] = proto;
argc--;
argv++;
break;
case 'T':
if (--argc > 0)
dirtry = *++argv;
break;
case 't':
if (*(arg+1) == '\0') {
tabarg = get_subopt(argc, argv, 't');
if (tabarg[1] != '\0')
usage();
(void) mbtowc(&tabchar, tabarg,
MB_CUR_MAX);
argc--;
argv++;
} else
(void) mbtowc(&tabchar, arg+1,
MB_CUR_MAX);
break;
default:
if (field(++*argv, nfields > 0, 0) < 0)
fields[nfields--] = proto;
break;
}
} else if (**argv == '+') {
if (++nfields >= nf) {
if ((fields = (struct field *)
realloc(fields, (nf + NF) *
sizeof (struct field))) == NULL) {
(void) fprintf(stderr,
gettext("sort: too many keys\n"));
return (2);
}
nf += NF;
}
fields[nfields] = proto;
(void) field(++*argv, 0, 0);
} else
eargv[eargc++] = *argv;
}
q = &fields[0];
for (a = 1; a <= nfields; a++) {
p = &fields[a];
if (p->code != proto.code) continue;
if (p->ignore != proto.ignore) continue;
if (p->fcmp != proto.fcmp) continue;
if (p->rflg != proto.rflg) continue;
if (p->bflg[0] != proto.bflg[0]) continue;
if (p->bflg[1] != proto.bflg[1]) continue;
p->code = q->code;
p->ignore = q->ignore;
p->fcmp = q->fcmp;
p->rflg = q->rflg;
p->bflg[0] = p->bflg[1] = q->bflg[0];
}
if (eargc == 0)
eargv[eargc++] = "-";
if (cflg && eargc > 1) {
(void) fprintf(stderr,
gettext("sort: can check only 1 file\n"));
return (2);
}
safeoutfil();
lspace = (int *) sbrk(0);
maxbrk = (int *) ulimit(3, 0L);
if (!mflg && !cflg)
if ((alloc = grow_core(tryfor, (unsigned) 0)) == 0) {
(void) fprintf(stderr, gettext(
"sort: allocation error before sort\n"));
return (2);
}
a = -1;
if ((filep = tempnam(dirtry, "stm")) == NULL) {
(void) fprintf(stderr, gettext(
"sort: allocation error on temp name\n"));
return (2);
}
/* add the suffix "aa", used to keep count of files */
file1 = (char *) malloc(strlen(filep) + 3); /* 3 = strlen("aa") + 1 */
(void) strcpy(file1, filep);
(void) strcat(file1, "aa");
free(filep);
/* set filep to point to beginning of suffix */
filep = file1;
while (*filep)
filep++;
filep -= 2;
file = file1;
a = creat(file, 0600);
if (a < 0) {
diag1(gettext("sort: can't locate temp: "), errno);
return (2);
}
(void) close(a);
(void) unlink(file);
if (sigset(SIGHUP, SIG_IGN) != SIG_IGN)
(void) sigset(SIGHUP, (void (*)(int))term);
if (sigset(SIGINT, SIG_IGN) != SIG_IGN)
(void) sigset(SIGINT, (void (*)(int))term);
(void) sigset(SIGPIPE, (void (*)(int))term);
if (sigset(SIGTERM, SIG_IGN) != SIG_IGN)
(void) sigset(SIGTERM, (void (*)(int))term);
nfiles = eargc;
if (cflg) {
checksort();
return (0);
}
/* only executed when -c is not used */
maxrec = 0;
if (!mflg) {
if (not_c && modflg) {
#define INIT_COLL_LEN 128
collsize = INIT_COLL_LEN;
collb1 = (wchar_t *)
malloc(collsize * sizeof (wchar_t) * 2);
}
sort();
if (ferror(stdin))
rderror(NULL);
(void) fclose(stdin);
}
if (maxrec == 0) { /* sorting phase is skipped */
#define INIT_MAXREC 256
maxrec = INIT_MAXREC;
if (not_c && modflg) {
/* collbuf is not allocated because sorting phase is skipped */
/* otherwise collbuf is allocated */
/* If LC_COLLATE == C, no collation buffers are needed */
collb1 = (wchar_t *) malloc(maxrec *
sizeof (wchar_t) * 2);
collb2 = collb1 + maxrec;
}
}
alloc = (N + 1) * (maxrec * sizeof (wchar_t)) + N * BUFSIZ;
maxalloc = (maxbrk - lspace) * sizeof (int *);
for (nway = N; nway >= 2; --nway) {
if (alloc < maxalloc)
break;
alloc -= maxrec * sizeof (wchar_t) + BUFSIZ;
}
if (nway < 2 || brk((char *)lspace + alloc) != 0) {
(void) fprintf(stderr, gettext(
"sort: allocation error before merge\n"));
term();
}
wasfirst = notfirst = 0;
oldmaxrec = maxrec;
a = mflg ? 0 : eargc;
if ((i = nfiles - a) > nway) { /* Do leftovers early */
if ((i %= (nway - 1)) == 0)
i = nway - 1;
if (i != 1) {
newfile();
setbuf(os, bufout);
merge(a, a+i);
a += i;
}
}
for (; a+nway < nfiles || unsafeout && (a < eargc); a = i) {
i = a+nway;
if (i >= nfiles)
i = nfiles;
newfile();
setbuf(os, bufout);
if (oldmaxrec < maxrec) {
newalloc = (nway + 1) * maxrec * sizeof (wchar_t);
if (newalloc <= maxalloc) {
alloc = newalloc;
(void) brk((char *) lspace + alloc);
}
oldmaxrec = maxrec;
}
merge(a, i);
}
if (a != nfiles) {
oldfile();
setbuf(os, bufout);
if (oldmaxrec < maxrec) {
newalloc = (nway + 1) * maxrec * sizeof (wchar_t);
if (newalloc <= maxalloc) {
alloc = newalloc;
(void) brk((char *) lspace + alloc);
}
oldmaxrec = maxrec;
}
merge(a, nfiles);
}
error = 0;
term();
/*NOTREACHED*/
}
static void
sort()
{
register wchar_t *cp;
register wchar_t **lp;
FILE *iop;
wchar_t *keep;
wchar_t *ekeep = 0; /* keep lint quiet */
wchar_t **mp;
wchar_t **lmp;
wchar_t **ep;
int n;
int done, i, first;
char *f;
/*
** Records are read in from the front of the buffer area.
** Pointers to the records are allocated from the back of the buffer.
** If a partially read record exhausts the buffer, it is saved and
** then copied to the start of the buffer for processing with the
** next coreload.
*/
first = 1;
done = 0;
keep = 0;
i = 0;
ep = (wchar_t **) (((char *)lspace) + alloc);
if ((f = setfil(i++)) == NULL) /* open first file */
iop = stdin;
else if ((iop = fopen(f, "r")) == NULL)
cant(f);
setbuf(iop, bufin);
do {
lp = ep - 1;
cp = (wchar_t *) lspace;
*lp-- = cp;
if (keep != 0) /* move record from previous coreload */
for (; keep < ekeep; *cp++ = *keep++);
while ((wchar_t *)lp - cp > 1) {
if (fgetws(cp, (wchar_t *)lp - cp, iop) == NULL)
n = 0;
else
n = wslen(cp);
if (n == 0) {
if (ferror(iop))
rderror(f);
if (keep == 0)
if (i < eargc) {
(void) fclose(iop);
if ((f = setfil(i++)) == NULL)
iop = stdin;
else if ((iop = fopen(f, "r"))
== NULL)
cant(f);
setbuf(iop, bufin);
continue;
} else {
done++;
break;
}
}
cp += n - 1;
if (*cp == L'\n') {
cp += 2;
if (cp - *(lp+1) > maxrec) {
maxrec = cp - *(lp+1);
if (collsize != 0 &&
collsize < maxrec) {
free(collb1);
/* the special malloc from libmapmalloc.so always succeed */
collsize = maxrec +
INIT_COLL_LEN;
collb1 = (wchar_t *)
malloc(collsize *
sizeof (wchar_t) * 2);
}
}
*lp-- = cp;
keep = 0;
} else if (cp + 2 < (wchar_t *) lp) {
/* the last record of the input */
/* file is missing a NEWLINE */
if (f == NULL)
warning();
else
(void) fprintf(stderr, gettext(
"sort: warning: missing NEWLINE added at end of input file %s\n"), f);
*++cp = L'\n';
*++cp = 0;
*lp-- = ++cp;
keep = 0;
} else { /* the buffer is full */
keep = *(lp+1);
ekeep = ++cp;
}
if ((wchar_t *)lp - cp <= 2 && first == 1) {
/* full buffer */
tryfor = alloc;
tryfor = grow_core(tryfor, alloc);
if (tryfor == 0)
/* could not grow */
first = 0;
else { /* move pointers */
lmp = ep +
(tryfor/sizeof (wchar_t **) - 1);
mp = ep - 1;
while (mp > lp)
*lmp-- = *mp--;
ep += tryfor/sizeof (wchar_t **);
lp += tryfor/sizeof (wchar_t **);
alloc += tryfor;
}
}
}
if (keep != 0 && *(lp+1) == (wchar_t *) lspace) {
(void) fprintf(stderr, gettext(
"sort: fatal: record too large\n"));
term();
}
first = 0;
lp += 2;
if (done == 0 || nfiles != eargc)
newfile();
else
oldfile();
setbuf(os, bufout);
collb2 = collb1 + collsize;
msort(lp, ep);
if (ferror(os))
wterror(gettext("sort: write error while sorting: "));
(void) fclose(os);
} while (done == 0);
}
static void
msort(wchar_t **a, wchar_t **b)
{
register struct btree **tp;
register int i, j, n;
wchar_t *save;
int blkcnt[TREEZ];
wchar_t **blkcur[TREEZ];
i = (b - a);
if (i < 1)
return;
else if (i == 1) {
wline(*a);
return;
} else if (i >= TREEZ)
n = TREEZ; /* number of blocks of records */
else n = i;
/* break into n sorted subgroups of approximately equal size */
tp = &(treep[0]);
j = 0;
do {
(*tp++)->rn = j;
b = a + (blkcnt[j] = i / n);
qksort(a, b);
blkcur[j] = a = b;
i -= blkcnt[j++];
} while (--n > 0);
n = j;
/* make a sorted binary tree using the first record in each group */
i = 0;
while (i < n) {
(*--tp)->rp = *(--blkcur[--j]);
insert(tp, ++i);
}
wasfirst = notfirst = 0;
bonus = cmpsave(n);
j = uflg;
tp = &(treep[0]);
while (n > 0) {
wline((*tp)->rp);
if (j) save = (*tp)->rp;
/* Get another record and insert. Bypass repeats if uflg */
do {
i = (*tp)->rn;
if (j)
while ((blkcnt[i] > 1) &&
(**(blkcur[i]-1) == '\0')) {
--blkcnt[i];
--blkcur[i];
}
if (--blkcnt[i] > 0) {
(*tp)->rp = *(--blkcur[i]);
insert(tp, n);
} else {
if (--n <= 0) break;
bonus = cmpsave(n);
tp++;
}
} while (j && (*compare)((*tp)->rp, save) == 0);
}
}
/* Insert the element at tp[0] into its proper place in the array of size n */
/* Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching */
/* Special case for data that appears to be in correct order */
static void
insert(tp, n)
struct btree **tp;
int n;
{
register struct btree **lop, **hip, **midp;
register int c;
struct btree *hold;
midp = lop = tp;
hip = lop++ + (n - 1);
if ((wasfirst > notfirst) && (n > 2) &&
((*compare)((*tp)->rp, (*lop)->rp) >= 0)) {
wasfirst += bonus;
return;
}
while ((c = hip - lop) >= 0) {
/* leave midp at the one tp is in front of */
midp = lop + c / 2;
if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0)
break; /* match */
if (c < 0)
lop = ++midp; /* c < 0 => tp > midp */
else
hip = midp - 1; /* c > 0 => tp < midp */
}
c = midp - tp;
if (--c > 0) { /* number of moves to get tp just before midp */
hip = tp;
lop = hip++;
hold = *lop;
do
*lop++ = *hip++;
while (--c > 0);
*lop = hold;
notfirst++;
} else
wasfirst += bonus;
}
static void
merge(a, b)
{
FILE *tfile[N];
wchar_t *buffer;
register int nf; /* number of merge files */
register struct btree **tp;
register int i, j;
char *f;
char *iobuf;
struct btree *bptr;
/*
* Memory allocation policy:
*
* lspace == buffer ----> +START OF HEAP------------------+
* tfile[0] (FILE *) | IO buffer for file 0 |
* tfile[1] (FILE *) | IO buffer for file 1 |
* ... . .
* tfile[i] (FILE *) uses->| IO buffer (BUFSIZE bytes) |
* ... . .
* tfile[N-1] (FILE *) | |
* save -->|-------------------------------|
* |<maxrec> characters for temp? |
* |-------------------------------|
* |<maxrec> characters for file 0 |
* |<maxrec> characters for file 1 |
* treep[0] . .
* treep[1] . .
* ... /------->|<maxrec> characters for file i |
* treep[i].rp --/ . .
* .rn == i (?) . .
* ... . .
* treep[N-1] |-------------------------------|
*
*/
iobuf = (char *) lspace;
save = (wchar_t *) ((char *)lspace + nway * BUFSIZ);
save_alloc = 0;
buffer = save + maxrec;
tp = &(treep[0]);
for (nf = 0, i = a; i < b; i++) {
f = setfil(i);
if (f == 0)
tfile[nf] = stdin;
else if ((tfile[nf] = fopen(f, "r")) == NULL)
cant(f);
bptr = *tp;
bptr->rn = nf;
bptr->recsz = maxrec;
if ((char *) (buffer + maxrec) > ((char *) lspace + alloc)) {
if (bptr->allocflag)
free(bptr->rp);
bptr->rp = (wchar_t *) malloc
(maxrec * sizeof (wchar_t));
bptr->allocflag = 1;
} else {
if (bptr->allocflag)
free(bptr->rp);
bptr->rp = buffer;
bptr->allocflag = 0;
}
buffer += maxrec;
setbuf(tfile[nf], iobuf);
iobuf += BUFSIZ;
if (xrline(tfile[nf], (*tp)) == 0) {
nf++;
tp++;
} else {
if (ferror(tfile[nf]))
rderror(f);
(void) fclose(tfile[nf]);
}
}
/* make a sorted btree from the first record of each file */
--tp;
i = 1;
while (i++ < nf)
insert(--tp, i);
bonus = cmpsave(nf);
tp = &(treep[0]);
j = uflg;
while (nf > 0) {
wline((*tp)->rp);
if (j) cline(save, (*tp)->rp);
/* Get another record and insert. Bypass repeats if uflg */
do {
i = (*tp)->rn;
if (xrline(tfile[i], (*tp))) {
if (ferror(tfile[i]))
rderror(setfil(i+a));
(void) fclose(tfile[i]);
if (--nf <= 0) break;
++tp;
bonus = cmpsave(nf);
} else insert(tp, nf);
} while (j && (*compare)((*tp)->rp, save) == 0);
}
for (i = a; i < b; i++) {
if (i >= eargc)
(void) unlink(setfil(i));
}
if (ferror(os))
wterror(gettext("sort: write error while merging: "));
(void) fclose(os);
}
static void
cline(wchar_t *tp, wchar_t *fp)
{
while ((*tp++ = *fp++) != L'\0');
}
static int
xrline(FILE *iop, struct btree *btp)
{
register int n;
int sz = btp->recsz;
wchar_t *s = btp->rp;
int y;
if (fgetws(s, sz, iop) == NULL)
n = 0;
else
n = wslen(s);
if (n == 0)
return (1);
if (*(s+n-1) == L'\n')
return (0);
else if (n < sz - 1) {
warning();
s += n - 1;
*++s = L'\n';
*++s = L'\0';
return (0);
} else {
#define INC_MAXREC 128
sz += INC_MAXREC;
if (btp->allocflag) {
btp->rp = (wchar_t *)
realloc(btp->rp, sz * sizeof (wchar_t));
} else {
btp->rp = (wchar_t *) malloc(sz * sizeof (wchar_t));
(void) wscpy(btp->rp, s);
btp->allocflag = 1;
}
s = btp->rp + n;
for (;;) {
if (fgetws(s, INC_MAXREC + 1, iop) == NULL)
n = 0;
else
n = wslen(s);
if (n == 0) {
y = 1;
break;
}
if (*(s + n - 1) == L'\n') {
y = 0;
break;
} else if (n < INC_MAXREC) {
warning();
s += n - 1;
*++s = L'\n';
*++s = L'\0';
y = 0;
break;
} else {
sz += INC_MAXREC;
btp->rp = (wchar_t *)
realloc(btp->rp, sz * sizeof (wchar_t));
s = btp->rp + sz - INC_MAXREC - 1;
if (btp->rp == NULL) {
/*
* it is impossible in this special malloc
* which is from libmapmalloc
*/
(void) fprintf(stderr, gettext(
"out of memory\n"));
term();
}
}
}
if (maxrec < (sz - (INC_MAXREC - n))) {
maxrec = sz - (INC_MAXREC - n);
if (not_c && modflg) {
free(collb1);
collb1 = (wchar_t *) malloc
(maxrec * sizeof (wchar_t) * 2);
collb2 = collb1 + maxrec;
}
if (uflg) { /* expand save */
s = save;
save = (wchar_t *)
malloc(maxrec * sizeof (wchar_t));
(void) wscpy(save, s);
if (save_alloc != 0)
free(s);
save_alloc = 1;
}
}
btp->recsz = sz;
return (y);
}
}
static int
yrline(FILE *iop, int i)
{
register int n;
wchar_t *s, *t;
int sz;
int y;
s = lines[i];
if (fgetws(s, maxrec, iop) == NULL)
n = 0;
else
n = wslen(s);
if (n == 0)
return (1);
if (*(s + n -1) == L'\n')
return (0);
else if (n < maxrec - 1) {
warning();
s += n - 1;
*++s = L'\n';
*++s = L'\0';
return (0);
} else {
t = (wchar_t *) malloc(maxrec * sizeof (wchar_t));
(void) wscpy(t, lines[1-i]); /* save the other line */
if (lines[i] != (wchar_t *) lspace) { /* move around lines[i] */
(void) wscpy((wchar_t *) lspace, lines[i]);
s = lines[i] = (wchar_t *) lspace;
}
sz = INC_MAXREC + 1;
s += n;
for (;;) {
maxrec += INC_MAXREC;
alloc += INC_MAXREC * sizeof (wchar_t) * 2;
if (brk((char *) lspace + alloc) != 0) {
(void) fprintf(stderr, gettext(
"sort: fatal: line too long\n"));
term();
}
if (fgetws(s, sz, iop) == NULL)
n = 0;
else
n = wslen(s);
if (n == 0) {
y = 1;
break;
}
s += n - 1;
if (*s == L'\n') {
y = 0;
break;
} else if (n < sz - 1) {
warning();
*++s = L'\n';
*++s = '\0';
y = 0;
break;
} else {
s++;
}
}
lines[1-i] = (wchar_t *) lspace + maxrec;
(void) wscpy(lines[1-i], t); /* restore */
free(t);
if (not_c && modflg) {
free(collb1);
collb1 = (wchar_t *)
malloc(maxrec * 2 * sizeof (wchar_t));
collb2 = collb1 + maxrec;
}
return (y);
}
}
static void
wline(wchar_t *s)
{
(void) fputws(s, os);
if (ferror(os))
wterror(gettext("sort: write error while sorting: "));
}
static void
checksort()
{
char *f; /* Temp file name. */
register int i, j, r;
register FILE *iop;
f = setfil(0);
if (f == 0)
iop = stdin;
else if ((iop = fopen(f, "r")) == NULL)
cant(f);
setbuf(iop, bufin);
maxrec = INIT_MAXREC;
alloc = maxrec * 2 * sizeof (wchar_t);
if (alloc > (maxbrk - lspace) * sizeof (int *)) {
maxrec = (maxbrk - lspace) / 2;
alloc = maxrec * 2 * sizeof (int *);
}
(void) brk((char *) lspace + alloc);
lines[0] = (wchar_t *) lspace;
lines[1] = (wchar_t *) lspace + maxrec;
if (not_c && modflg) {
collb1 = (wchar_t *) malloc(maxrec * 2 * sizeof (wchar_t));
collb2 = collb1 + maxrec;
}
if (yrline(iop, 0)) {
if (ferror(iop)) {
rderror(f);
}
(void) fclose(iop);
exit(0);
}
i = 0; j = 1;
while (!yrline(iop, j)) {
r = (*compare)(lines[i], lines[j]);
if (r < 0)
disorder(gettext("sort: disorder: %ws\n"),
lines[j]);
if (r == 0 && uflg)
disorder(gettext("sort: non-unique: %ws\n"),
lines[j]);
r = i; i = j; j = r;
}
if (ferror(iop))
rderror(f);
(void) fclose(iop);
}
static void
disorder(char *format, wchar_t *t)
{
register wchar_t *u;
for (u = t; *u != L'\n'; u++);
*u = 0;
#ifndef XPG4
(void) fprintf(stderr, format, t);
#endif
error = 1;
term();
}
static void
newfile()
{
register char *f;
f = setfil(nfiles);
if ((os = fopen(f, "w")) == NULL) {
diag2(gettext("sort: can't create %s: "), f, errno);
term();
}
nfiles++;
}
static char *
setfil(i)
register int i;
{
if (i < eargc)
if (eargv[i][0] == '-' && eargv[i][1] == '\0')
return (0);
else
return (eargv[i]);
i -= eargc;
filep[0] = i/26 + 'a';
filep[1] = i%26 + 'a';
return (file);
}
static void
oldfile()
{
if (outfil) {
if ((os = fopen(outfil, "w")) == NULL) {
diag2(gettext("sort: can't create %s: "),
outfil, errno);
term();
}
} else
os = stdout;
}
static void
safeoutfil(void)
{
register int i;
struct stat ostat, istat;
if (!mflg || outfil == 0)
return;
if (stat(outfil, &ostat) == -1)
return;
if ((i = eargc - N) < 0) i = 0; /* -N is suff., not nec. */
for (; i < eargc; i++) {
if (stat(eargv[i], &istat) == -1)
continue;
if (ostat.st_dev == istat.st_dev&&
ostat.st_ino == istat.st_ino)
unsafeout++;
}
}
static void
cant(char *f)
{
diag2(gettext("sort: can't open %s: "), f, errno);
term();
}
static void
diag1(const char *format, int errcode)
{
char *s = strerror(errcode);
(void) fprintf(stderr, format);
if (s == NULL)
(void) fprintf(stderr, gettext("Error %d\n"), errcode);
else
(void) fprintf(stderr, "%s\n", s);
}
static void
diag2(const char *format, const char *file, int errcode)
{
char *s = strerror(errcode);
(void) fprintf(stderr, format, file);
if (s == NULL)
(void) fprintf(stderr, gettext("Error %d\n"), errcode);
else
(void) fprintf(stderr, "%s\n", s);
}
static void
term()
{
register i;
if (nfiles == eargc)
nfiles++;
for (i = eargc; i <= nfiles; i++) { /* <= in case of interrupt */
(void) unlink(setfil(i)); /* with nfiles not updated */
}
exit(error);
}
static int
getsign(wchar_t *pa, wchar_t *la)
{
int i = 1;
if (pa == la)
return (0);
if (*pa == L'-') {
i = -1;
pa++;
}
while (pa < la && iswdigit(*pa)) {
if (*pa != L'0')
return (i);
pa++;
}
if (*pa != decpnt) /* i is 0 */
return (0);
pa++;
while (pa < la && iswdigit(*pa)) {
if (*pa != L'0')
return (i);
pa++;
}
return (0);
}
static int
cmp(wchar_t *i, wchar_t *j)
{
wchar_t *pa, *pb;
int (*ignore)(wchar_t);
int sa;
int sb;
wchar_t (*code)(wchar_t);
int a, b;
int k;
wchar_t *la, *lb;
wchar_t *ipa, *ipb, *jpa, *jpb;
struct field *fp;
wchar_t *p1, wa, wb;
int ret;
for (k = nfields > 0; k <= nfields; k++) {
fp = &fields[k];
pa = i;
pb = j;
if (k) {
la = skip(pa, fp, 1);
pa = skip(pa, fp, 0);
lb = skip(pb, fp, 1);
pb = skip(pb, fp, 0);
} else {
la = eol(pa);
lb = eol(pb);
}
if (fp->fcmp == NUM) {
sa = sb = fp->rflg;
while (iswspace(*pa) && pa < la)
pa++;
while (iswspace(*pb) && pb < lb)
pb++;
if (pa == la) { /* i is 0 */
if (b = getsign(pb, lb))
return (sb * b);
continue;
} else if (pb == lb) { /* j is 0 */
if (a = getsign(pa, la))
return ((-sa) * a);
continue;
}
if (*pa == '-') {
pa++;
sa = -sa;
}
if (*pb == '-') {
pb++;
sb = -sb;
}
for (ipa = pa; ipa < la; ipa++) {
if (!(iswdigit(*ipa) ||
(*ipa == thousands_sep) ||
(*ipa == mon_thousands_sep)))
break;
}
for (ipb = pb; ipb < lb; ipb++) {
if (!(iswdigit(*ipb) ||
(*ipb == thousands_sep) ||
(*ipb == mon_thousands_sep)))
break;
}
jpa = ipa;
jpb = ipb;
a = 0;
if (sa == sb)
while (ipa > pa && ipb > pb) {
ipa--;
ipb--;
while ((ipa > pa) &&
((*ipa == thousands_sep) ||
(*ipa == mon_thousands_sep)))
ipa--;
while ((ipb > pb) &&
((*ipb == thousands_sep) ||
(*ipb == mon_thousands_sep)))
ipb--;
if ((b = *ipb - *ipa) != 0)
a = b;
}
while (ipa > pa)
if ((*--ipa != L'0') &&
(*ipa != thousands_sep) &&
(*ipa != mon_thousands_sep))
return (-sa);
while (ipb > pb)
if ((*--ipb != L'0') &&
(*ipb != thousands_sep) &&
(*ipb != mon_thousands_sep))
return (sb);
if (a)
return (a*sa);
if ((*(pa = jpa) == decpnt) ||
(*(pa = jpa) == mon_decpnt))
pa++;
if ((*(pb = jpb) == decpnt) ||
(*(pb = jpb) == mon_decpnt))
pb++;
if (sa == sb)
while (pa < la && iswdigit(*pa) &&
pb < lb && iswdigit(*pb))
if ((a = *pb++ - *pa++) != 0)
return (a*sa);
while (pa < la && iswdigit(*pa))
if (*pa++ != L'0')
return (-sa);
while (pb < lb && iswdigit(*pb))
if (*pb++ != L'0')
return (sb);
continue;
} else if (fp->fcmp == MON) {
sa = fp->rflg*(month(pb)-month(pa));
if (sa)
return (sa);
else
continue;
}
code = fp->code;
ignore = fp->ignore;
if (!not_c)
goto loop;
if (modflg) {
for (p1 = collb1; pa < la && *pa != L'\n'; pa++) {
if ((*ignore)(*pa))
continue;
*p1++ = (*code)(*pa);
}
*p1 = L'\0';
for (p1 = collb2; pb < lb && *pb != L'\n'; pb++) {
if ((*ignore)(*pb))
continue;
*p1++ = (*code)(*pb);
}
*p1 = L'\0';
ret = wscoll(collb1, collb2);
} else { /* no transformation is needed */
if (pa >= la || *pa == L'\n')
if (pb < lb && *pb != L'\n')
return (fp->rflg);
else continue;
if (pb >= lb || *pb == L'\n')
return (-fp->rflg);
wa = *la;
*la = L'\0';
wb = *lb;
*lb = L'\0';
ret = wscoll(pa, pb);
*la = wa;
*lb = wb;
}
if (ret > 0)
return (- fp->rflg);
else if (ret < 0)
return (fp->rflg);
else
continue;
loop: /* executed only when LC_COLLATE == C */
while ((*ignore)(*pa) && *pa)
pa++;
while ((*ignore)(*pb) && *pb)
pb++;
if (pa >= la || *pa == L'\n')
if (pb < lb && *pb != L'\n')
return (fp->rflg);
else continue;
if (pb >= lb || *pb == L'\n')
return (-fp->rflg);
if ((sa = (*code)(*pb++)-(*code)(*pa++)) == 0)
goto loop;
return (sa*fp->rflg);
}
if (uflg)
return (0);
return (cmpa(i, j));
}
static int
cmpa(wchar_t *pa, wchar_t *pb)
{
int result;
result = wscoll(pa, pb);
if (result == 0)
return (0);
else if (result > 0)
return (-fields[0].rflg);
else return (fields[0].rflg);
}
static wchar_t *
skip(wchar_t *p, struct field *fp, int j)
{
register i;
wchar_t tbc;
if ((i = fp->m[j]) < 0)
return (eol(p));
if ((tbc = tabchar) != 0)
while (--i >= 0) {
while (*p != tbc)
if (*p != L'\n')
p++;
else goto ret;
if (i > 0 || j == 0)
p++;
} else
while (--i >= 0) {
while (blank(*p))
p++;
while (!blank(*p))
if (*p != L'\n')
p++;
else goto ret;
}
if (fp->bflg[j]) {
if (j == 1 && fp->m[j] > 0)
p++;
while (blank(*p))
p++;
}
i = fp->n[j];
while ((i-- > 0) && (*p != L'\n'))
p++;
ret:
return (p);
}
static wchar_t *
eol(wchar_t *p)
{
while (*p != L'\n') p++;
return (p);
}
static void
initree()
{
register struct btree **tpp, *tp;
register int i;
tp = &(tree[0]);
tpp = &(treep[0]);
i = TREEZ;
while (--i >= 0)
*tpp++ = tp++;
}
int
cmpsave(int n)
{
register int award;
if (n < 2)
return (0);
for (n++, award = 0; (n >>= 1) > 0; award++);
return (award);
}
static int
field(char *s, int k, int kflag)
/*
* Fill field[nfields] for the current command argument s.
* k is non-zero iff this is the default and only field so far.
* kflag is 1 if command line is XCU4 sort key field style:
* -k field_start[type][,field_end[type]]
* kflag is 0 if command line is obsolescent sort key style:
* [+pos1 [-pos2]]
* where pos1 and pos2 are of the form:
* field0_number[.first0_character][type]
*
* NOTE: the fields and characters in pos1 and pos2 are
* numbered from 0; XCU4 fields and characters (-k) are
* numbered from 1. The relation as specified in XCU4.2 is:
*
* The fully specified +pos1 -pos2 form with type
* modifiers T and U:
* +w.xT -y.zU
* is equivalent to:
* undefined (z == 0 & U contains b & -t is present)
* -k w+1.x+1T,y.0U (z == 0 otherwise)
* -k w+1.x+1T,y+1.zU (z > 0)
*/
{
int (*save_compare)(wchar_t *, wchar_t *) = compare;
struct field *p;
int d;
int i;
p = &fields[nfields];
for (; *s != 0; s++) {
d = 0;
switch (*s) {
case '\0':
return (0);
case ',':
k = (nfields > 0);
break;
case 'b':
p->bflg[k]++;
break;
case 'd':
p->ignore = dict;
modflg = 1;
break;
case 'f':
p->code = fold;
modflg = 1;
break;
case 'i':
p->ignore = nonprint;
modflg = 1;
break;
case 'c':
cflg = 1;
continue;
case 'm':
mflg = 1;
continue;
case 'M':
month_init();
p->fcmp = MON;
p->bflg[0]++;
break;
case 'n':
p->fcmp = NUM;
p->bflg[0]++;
break;
case 't':
i = mbtowc(&tabchar, s+1, MB_CUR_MAX);
if (i > 0)
s += i;
continue;
case 'r':
p->rflg = -1;
continue;
case 'u':
uflg = 1;
continue;
case 'y':
if (*++s) {
if (isdigit(*s))
tryfor = number(&s) * 1024;
else
usage();
} else {
--s;
tryfor = MAXMEM;
}
continue;
case 'z': /* depricated by use of libmapmalloc */
#if 0
/* we don't want NOISE */
(void) fprintf(stderr, gettext(
"sort: warning: -z is no longer supported. "
"sort automatically allocates buffers large enough "
"to hold the longest lines.\n"));
#endif
return (0);
case '.':
if (p->m[k] == -1) /* -m.n with m missing */
p->m[k] = 0;
d = &fields[0].n[0]-&fields[0].m[0];
if (*++s == 0) {
--s;
p->m[k+d] = 0;
continue;
}
default:
if (isdigit(*s)) {
p->m[k+d] = number(&s);
} else
usage();
}
compare = cmp;
}
if (kflag == 1) {
if (p->m[0] != 0)
p->m[0]--;
if (p->n[0] != 0)
p->n[0]--;
if (p->n[1] != 0) /* this is not a bug */
p->m[1]--; /* decrement m[1] if n[1] != 0 */
} /* see comments above */
if (k) {
if ((p->m[1] != 0) && (p->m[0] > p->m[1])) {
compare = save_compare;
return (-1);
}
if ((p->m[0] == p->m[1]) &&
(p->n[0] != 0) &&
(p->n[0] > p->n[1])) {
compare = save_compare;
return (-1);
}
}
return (0);
}
static int
number(char **ppa)
/*
* Parse an integer at *ppa of a command argument, advance ppa past
* the number, return the integer value.
*/
{
register int n;
register char *pa;
pa = *ppa;
n = 0;
while (isdigit(*pa)) {
n = n*10 + *pa - '0';
*ppa = pa++;
}
return (n);
}
#define qsexc(p, q) t = *p; *p = *q; *q = t
#define qstexc(p, q, r) t = *p; *p = *r; *r = *q; *q = t
static void
qksort(wchar_t **a, wchar_t **l)
{
register wchar_t **i, **j;
register wchar_t **lp, **hp;
wchar_t **k;
int c, delta;
wchar_t *t;
unsigned n;
start:
if ((n = l-a) <= 1)
return;
n /= 2;
if (n >= MTHRESH) {
lp = a + n;
i = lp - 1;
j = lp + 1;
delta = 0;
c = (*compare)(*lp, *i);
if (c < 0) --delta;
else if (c > 0) ++delta;
c = (*compare)(*lp, *j);
if (c < 0) --delta;
else if (c > 0) ++delta;
if ((delta /= 2) && (c = (*compare)(*i, *j)))
if (c > 0)
n -= delta;
else
n += delta;
}
hp = lp = a+n;
i = a;
j = l-1;
for (;;) {
if (i < lp) {
if ((c = (*compare)(*i, *lp)) == 0) {
--lp;
qsexc(i, lp);
continue;
}
if (c < 0) {
++i;
continue;
}
}
loop:
if (j > hp) {
if ((c = (*compare)(*hp, *j)) == 0) {
++hp;
qsexc(hp, j);
goto loop;
}
if (c > 0) {
if (i == lp) {
++hp;
qstexc(i, hp, j);
i = ++lp;
goto loop;
}
qsexc(i, j);
--j;
++i;
continue;
}
--j;
goto loop;
}
if (i == lp) {
if (uflg) {
k = lp;
while (k < hp)
**k++ = 0;
}
if (lp-a >= l-hp) {
qksort(hp+1, l);
l = lp;
} else {
qksort(a, lp);
a = hp+1;
}
goto start;
}
--lp;
qstexc(j, lp, i);
j = --hp;
}
}
static void
month_init()
{
#define MAX_MON_LEN 20 /* Max. # of chars of month names. */
char time_buf[MAX_MON_LEN*MB_LEN_MAX];
wchar_t time_wbuf[MAX_MON_LEN];
int i;
for (i = 0; i < 12; i++) {
ct.tm_mon = i;
(void) ascftime(time_buf, "%b", &ct);
(void) mbstowcs(time_wbuf, time_buf, MAX_MON_LEN);
months[i] = wsdup(time_wbuf);
}
}
static int
month(s)
wchar_t *s;
{
register wchar_t *t, *u;
register i;
for (i = 0; i < 12; i++) {
for (t = s, u = months[i]; fold(*t++) == fold(*u++); )
if (*u == 0)
return (i);
}
return (-1);
}
static void
rderror(s)
char *s;
{
if (s == 0)
diag1(gettext("sort: read error on stdin: "), errno);
else
diag2(gettext("sort: read error on %s: "), s, errno);
term();
}
static void
wterror(format)
char *format;
{
/* gettext has already been applied to format when wterror is invoked */
diag1(format, errno);
term();
}
static int
grow_core(size, cursize)
unsigned size, cursize;
{
unsigned newsize;
/*
* The variable below and its associated code was written so
* this would work on pdp11s. It works on the vax & 3b20 also.
*/
u_long longnewsize;
longnewsize = (u_long) size + (u_long) cursize;
if (longnewsize < MINMEM)
longnewsize = MINMEM;
else if (longnewsize > MAXMEM)
longnewsize = MAXMEM;
newsize = (unsigned) longnewsize;
for (; ((char *)lspace+newsize) <= (char *)lspace; newsize >>= 1);
if (longnewsize > (u_long) (maxbrk - lspace) * (u_long) sizeof (int *))
newsize = (maxbrk - lspace) * sizeof (int *);
if (newsize <= cursize)
return (0);
if (brk((char *) lspace + newsize) != 0)
return (0);
return (newsize - cursize);
}
/* One of the three functions is used as an "ignore" function. */
static int
/*LINTED*/
zero(wchar_t w)
{
return (0);
}
static int
nonprint(wchar_t w)
{
return (!iswprint(w));
}
static int
dict(wchar_t w)
{
return (!(iswalnum(w)||iswspace(w)));
}
/* Either function is used as "code" function. */
static wchar_t
fold(wchar_t w)
{
return (iswlower(w)?towupper(w):w);
}
static wchar_t
nofold(wchar_t w)
{
return (w);
}
static void
initdecpnt()
/* Load the decimal points and thousands separators for this locale. */
{
struct lconv *l = localeconv();
(void) mbtowc(&decpnt, l->decimal_point, MB_CUR_MAX);
(void) mbtowc(&mon_decpnt, l->mon_decimal_point, MB_CUR_MAX);
(void) mbtowc(&thousands_sep, l->thousands_sep, MB_CUR_MAX);
(void) mbtowc(&mon_thousands_sep, l->mon_thousands_sep, MB_CUR_MAX);
}
static void
warning(void)
{
(void) fprintf(stderr, gettext(
"sort: warning: missing NEWLINE added at EOF\n"));
}
static void
usage(void)
{
(void) fprintf(stderr, gettext(
"sort [-bcdfiMmnru] [-o output] [-T directory] [-ykmem] [-t char]\n"
" [+pos1 [-pos2]] [-k field_start[type][,field_end[type]] "
"[file...]\n"));
exit(2);
}
static char *
get_subopt(int argc, char **argv, char option)
{
if ((--argc <= 0) || (**++argv == '-')) {
(void) fprintf(stderr, gettext(
"sort: option requires an argument -- %c\n"), option);
usage();
}
return (*argv);
}