Files
seta75D d6fe8fe829 Init
2021-10-11 22:19:34 -03:00

2681 lines
66 KiB
C

static char sccsid[] = "@(#)98 1.59 src/bos/usr/bin/sort/sort.c, cmdfiles, bos412, 9446C 11/14/94 16:46:54";
/*
* COMPONENT_NAME: (CMDFILES) commands that manipulate files
*
* FUNCTIONS: sort
*
* ORIGINS: 3, 18, 27
*
* This module contains IBM CONFIDENTIAL code. -- (IBM
* Confidential Restricted when combined with the aggregated
* modules for this product)
* SOURCE MATERIALS
* (C) COPYRIGHT International Business Machines Corp. 1985, 1994
* All Rights Reserved
*
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*
* (c) Copyright 1990, OPEN SOFTWARE FOUNDATION, INC.
* ALL RIGHTS RESERVED
*
* OSF/1 Release 1.0
*
* static char rcsid[] = "RCSfile: sort.c,v Revision: 2.8.2.2 (OSF) Date: 90/10/12 15:46:37 ";
*
*/
/* fixed for peachtree enhancement, MAXMEM, TREEZ and cmpa() */
/* please refer to H.Stuettgen and A.Schuur id: HJS-S */
/* This version of the sort for OSF/1 (from AIX 3.1) uses strxfrm and strcoll.
* To increase performance in the NLS case, the sort is not done
* in wchar_t's, but in char format. For a 'plain' sort, i.e.
* without any keys or flags, the compare is done using strcoll.
* If keys are specified, or folding/dictionary/printable flags,
* keys are extracted and converted to strxfrm format and prepended
* to the record. String compares are then done using strcmp.
* On final pass, the keys are stripped before records are written
* to output file.
* Max. record size is set to 20K. For keyed operations, the max.
* size is 3 times this (60K incl. strxfrm format keys).
* In checksort and merge operations, keys are not prepended but
* compared "in situ"; actual record max length is 20K.
* The "-A" option uses old-fashioned AT&T algorithms; the performance
* is about 10 times the collating sort. Use it when you can!
*/
/*
* NAME: sort
* FUNCTION: Sorts or merges files
* FLAGS
* -A Sorts on a byte-by-byte basis using ASCII character values.
* -b Ignores leading blanks, spaces, and tabs
* -c Checks that the input is sorted according to the ordering rules
* -d Sorts in dictionary order.
* -f Merges uppercase and lowercases letters.
* -i Sorts only by character in the ASCII range octal 040 - 0176
* -m Merges only, the input is already sorted
* -n Sorts any initial numeric strings
* -o fl Directs output to fl instead of stdout
* -r reverses the order of the specified sort
* -t ch Sets field separator character to char.
* -u Suppresses all but one in each set of equal lines.
* -T dir Places all the tempory files that are created in the directory "dir"
* -y KB Start up using KB kilobytes of storage.
* -z rsz Use rsz size records for reading in lines.
*/
#define _ILS_MACROS
#include <stdio.h>
#include <ctype.h>
#include <sys/signal.h>
#include <sys/stat.h>
#include <values.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <locale.h>
#include "sort_msg.h"
static nl_catd catd;
#define MSGSTR(Num,Str) catgets(catd,SORTMSG2,Num,Str)
#define N 16
#define C 20
#define NF 10
#define MTHRESH 8 /* threshhold for doing median of 3 qksort selection */
#define TREEZ 512 /* no less than N and best if power of 2 HJS-S-C */
#define LINE 1024
#define SORT_LINE_MAX 20480
static int mb_cur_max; /* max number of bytes per character in current locale */
/*
* Memory administration
*
* Using a lot of memory is great when sorting a lot of data.
* Using a megabyte to sort the output of `who' loses big.
* MAXMEM, MINMEM and DEFMEM define the absolute maximum,
* minimum and default memory requirements. Administrators
* can override any or all of these via defines at compile time.
* Users can override the amount allocated (within the limits
* of MAXMEM and MINMEM) on the command line.
*/
#ifndef MAXMEM
#define MAXMEM 4194304 /* 4 Megabyte maximum */ /* HJS-S-C */
#endif
#ifndef MINMEM
#define MINMEM 16384 /* 16K minimum */
#endif
#ifndef DEFMEM
#define DEFMEM 262144 /* start 256kb HJS-S-C */
#endif
#define ASC 0
#define NUM 1
#define XSTR 2
#define XNUM 3
#define sorting 0
#define merging 1
/* For <blank> detection: define macros for isblank() for SBCS and
* iswblank() for wchar with MBCS. POSIX defines function of -b, -d, and
* -n options options and default field separators in terms of <blank>
* character class. Standard bindings do not provide isblank() or
* iswblank(), so sort must provide its own.
*/
static wctype_t blankhandle;
#define iswblank(wc) (is_wctype((wc),blankhandle))
#define isblank(c) (is_wctype((wchar_t)(c),blankhandle))
/* Variables for determining collation requirements */
static unsigned char lc_collate[NL_LANGMAX+1];
static int POSIXlc_collate;
static int Aflag; /* used to signal the -A option ... i.e. byte compare */
static int posflag; /* used to signal the +pos option */
static FILE *os;
static char *dirtry[] = {"/var/tmp", "/usr/tmp", "/tmp", '\0'};
static char **dirs;
static char file1[100];
static char *file = file1;
static char *filep;
#define NAMEOHD 12 /* sizeof("/stm00000aa") */
static int nfiles;
static int *lspace;
/* Layout of lspace:
* lspace = address of low end of temporary sort area
* cp = next address at or above lspace usable for chars of records
* lp = next address below ep usable for address of next record
* (next record from fgetrec will be stored starting at cp++,
* and its address will be stored at lp--.)
* ep = next address above high end of temporary sort area
*/
static int *newlspace; /* For conditional realloc(lspace,...) */
static unsigned tryfor;
static unsigned alloc,oldalloc;
static char bufin[BUFSIZ], bufout[BUFSIZ]; /* Use setbuf's to avoid malloc calls.
*/
static char tbuf[SORT_LINE_MAX]; /* buffers for strxfrm use */
static char ebuf[SORT_LINE_MAX];
static char xbuf[SORT_LINE_MAX*3+1]; /* assume strxfrm < 3x data */
static char *te = tbuf + SORT_LINE_MAX-1;
static char *xe = xbuf + (SORT_LINE_MAX*3);
static int maxrec;
static int mflg;
static int nway;
static int cflg;
static int uflg; /* -u option */
static int uflgactive; /* Suppress final entire-record compare if otherwise equal */
static int outflag = 0; /* 1 = writing output to customer output */
/* 0 = writing output (if any) to intermediate files for */
/* later merge because internal area overflows MAXMEM.*/
static char *outfil;
static int unsafeout; /*kludge to assure -m -o works*/
static int eargc;
static char **eargv;
static struct btree {
char *rp;
int rn;
} tree[TREEZ], *treep[TREEZ];
static int blkcnt[TREEZ];
static long wasfirst = 0, notfirst = 0;
static int bonus;
static char **blkcur[TREEZ];
static wchar_t tabchar;
static struct lconv *loc;
static wchar_t dec;
static wchar_t decmon;
static wchar_t thsep;
static wchar_t thsepmon;
static char zero[256];
static wchar_t wcoptarg[_POSIX2_LINE_MAX*2]; /* optarg converted to widechar */
static int mbcodeset; /* 0=current locale SBCS, 1=current locale MBCS */
/* The following tables are not used by the
* normal sort. However, the "-A" option uses
* them...
*/
static char fold[256] = { /* table folds ASCII lowers to uppers */
0000,0001,0002,0003,0004,0005,0006,0007,
0010,0011,0012,0013,0014,0015,0016,0017,
0020,0021,0022,0023,0024,0025,0026,0027,
0030,0031,0032,0033,0034,0035,0036,0037,
0040,0041,0042,0043,0044,0045,0046,0047,
0050,0051,0052,0053,0054,0055,0056,0057,
0060,0061,0062,0063,0064,0065,0066,0067,
0070,0071,0072,0073,0074,0075,0076,0077,
0100,0101,0102,0103,0104,0105,0106,0107,
0110,0111,0112,0113,0114,0115,0116,0117,
0120,0121,0122,0123,0124,0125,0126,0127,
0130,0131,0132,0133,0134,0135,0136,0137,
0140,0101,0102,0103,0104,0105,0106,0107,
0110,0111,0112,0113,0114,0115,0116,0117,
0120,0121,0122,0123,0124,0125,0126,0127,
0130,0131,0132,0173,0174,0175,0176,0177,
0200,0201,0202,0203,0204,0205,0206,0207,
0210,0211,0212,0213,0214,0215,0216,0217,
0220,0221,0222,0223,0224,0225,0226,0227,
0230,0231,0232,0233,0234,0235,0236,0237,
0240,0241,0242,0243,0244,0245,0246,0247,
0250,0251,0252,0253,0254,0255,0256,0257,
0260,0261,0262,0263,0264,0265,0266,0267,
0270,0271,0272,0273,0274,0275,0276,0277,
0300,0301,0302,0303,0304,0305,0306,0307,
0310,0311,0312,0313,0314,0315,0316,0317,
0320,0321,0322,0323,0324,0325,0326,0327,
0330,0331,0332,0333,0334,0335,0336,0337,
0340,0341,0342,0343,0344,0345,0346,0347,
0350,0351,0352,0353,0354,0355,0356,0357,
0360,0361,0362,0363,0364,0365,0366,0367,
0370,0371,0372,0373,0374,0375,0376,0377
};
static char nofold[256] = {
0000,0001,0002,0003,0004,0005,0006,0007,
0010,0011,0012,0013,0014,0015,0016,0017,
0020,0021,0022,0023,0024,0025,0026,0027,
0030,0031,0032,0033,0034,0035,0036,0037,
0040,0041,0042,0043,0044,0045,0046,0047,
0050,0051,0052,0053,0054,0055,0056,0057,
0060,0061,0062,0063,0064,0065,0066,0067,
0070,0071,0072,0073,0074,0075,0076,0077,
0100,0101,0102,0103,0104,0105,0106,0107,
0110,0111,0112,0113,0114,0115,0116,0117,
0120,0121,0122,0123,0124,0125,0126,0127,
0130,0131,0132,0133,0134,0135,0136,0137,
0140,0141,0142,0143,0144,0145,0146,0147,
0150,0151,0152,0153,0154,0155,0156,0157,
0160,0161,0162,0163,0164,0165,0166,0167,
0170,0171,0172,0173,0174,0175,0176,0177,
0200,0201,0202,0203,0204,0205,0206,0207,
0210,0211,0212,0213,0214,0215,0216,0217,
0220,0221,0222,0223,0224,0225,0226,0227,
0230,0231,0232,0233,0234,0235,0236,0237,
0240,0241,0242,0243,0244,0245,0246,0247,
0250,0251,0252,0253,0254,0255,0256,0257,
0260,0261,0262,0263,0264,0265,0266,0267,
0270,0271,0272,0273,0274,0275,0276,0277,
0300,0301,0302,0303,0304,0305,0306,0307,
0310,0311,0312,0313,0314,0315,0316,0317,
0320,0321,0322,0323,0324,0325,0326,0327,
0330,0331,0332,0333,0334,0335,0336,0337,
0340,0341,0342,0343,0344,0345,0346,0347,
0350,0351,0352,0353,0354,0355,0356,0357,
0360,0361,0362,0363,0364,0365,0366,0367,
0370,0371,0372,0373,0374,0375,0376,0377
};
static char nonprint[256] = {
1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
};
static char dict[256] = {
1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
};
static struct field {
char *code;
char *ignore;
int fcmp;
int rflg;
int bflg[2];
int m[2];
int n[2];
} fields[NF];
static struct field proto = {
nofold,
zero,
ASC,
1,
0,0,
0,-1,
0,0
};
static int nfields;
static int error = 2;
static int exit_status = 0;
static int cmpset = 0;
static int pos1flag; /* pos1flag: nonzero iff a "+pos1" option has been
* seen but no corresponding "-pos2" option.
*/
static void sort(void);
static void msort(char **a, char **b);
static void insert(struct btree **tp, int n);
static void merge(int a, int b);
static void cline(register char *tp, register char *fp);
static int rline(register FILE *iop, register char *s);
static void wline(char *s);
static void checksort(void);
static void disorder(char *s, char *t);
static void newfile(void);
static char *setfil(register int i);
static void oldfile(void);
static void safeoutfil(void);
static void cant(char *f);
static void diag(char *s, char *t);
static void newdiag(char *s, char *t);
static void term(void);
static int cmp(char *a, char *b);
static int cmpa(register char *pa, register char *pb);
static int Acmp(char *i, char *j);
static int Acmpa(register char *pa, register char *pb);
static char *skip( register char *p, struct field *fp, int j);
static char *eol(register char *p);
static void copyproto(void);
static void initree(void);
static int cmpsave(register int n);
static void newoptfield(char *s,int spos,int ink, struct field *p);
static void convoptarg(char *s);
static void Usage(void);
static void qksort(char **a, char **l);
static void rderror(char *s);
static void wterror(int x);
static int fgetrec(char *s, int n, FILE *stream, char *f, int *partial);
static void fldtowa(char *p, char *l, char *b, char *ignore, char *code);
static unsigned grow_core(unsigned size, unsigned cursize);
static int (*compare)(char *a, char *b) = cmpa;
main(int argc, char *argv[])
{
register a;
char *arg;
struct field *p, *q;
int i;
int c; /* option character */
int kopt; /* -k option has been seen.*/
int badopt; /* at least one command line syntax error */
wchar_t *wcoptargend;
char *optargend;
char **oargv; /* oargv,oartc,oargi: for checking obsolete -o location */
int oargc,oargi,skipargs;
int signedalloc;
#ifdef DEBUG_OPTS
int Di;
char Dig,Dco;
#endif
setlocale(LC_ALL,"");
/* Determine whether to use ascii or locale-dependent collation. */
/* This lets English fall through into locale-dependent collation.*/
strcpy(lc_collate , setlocale(LC_COLLATE,NULL));
POSIXlc_collate = ((strcmp(lc_collate,"C")==0) || (strcmp(lc_collate,"POSIX")==0));
if (POSIXlc_collate) {
Aflag = 1;
compare = Acmpa;
}
blankhandle = get_wctype("blank");
/* close any file descriptors that may have been */
/* left open -- we may need them all */
for (i = 3; i < 3 + N; i++)
(void) close(i);
catd = catopen(MF_SORT, NL_CAT_LOCALE);
loc = localeconv();
dec = loc->decimal_point[0];
decmon = loc->mon_decimal_point[0];
thsep = (wchar_t)loc->thousands_sep[0];
thsepmon = (wchar_t)loc->mon_thousands_sep;
mb_cur_max = MB_CUR_MAX;
mbcodeset = (mb_cur_max > 1);
copyproto();
initree();
eargv = argv;
tryfor = DEFMEM;
nfields = 0;
pos1flag = 0;
kopt = 0;
badopt = 0;
uflg = 0;
uflgactive = 0;
maxrec = 0;
outfil = (char *)NULL;
/* Command parsing: follow POSIX guidelines; allow
* widely-used POSIX "obsolescent" sort key options;
* and allow -y option with optional argument.
*/
do {
p = &fields[nfields];
/* Four-stage processing of each potential option character:
* 1,2. If it is the first character of an obsolescent option
* that does not follow getopt() conventions, process it
* manually and update getopt() pointers to next possible
* option character.
* 3. If it is -y and is either (the last command line
* argument) or (followed in the next command line
* argument by something that does not begin with a digit)
* then (treat it as non-POSIX -y with omitted argument).
* 4. Otherwise process it through getopt() for normal
* option processing.
*/
if(optind > argc)
c = EOF;
else {
arg = argv[optind];
c = arg[0];
};
if ( c == '+') {
/* Part 1 of 4: Obsolescent +pos1 option */
newoptfield(arg+1,0,0,p);
optarg = argv[optind++];
} else if (c == '-' && strlen(arg) > 1
&& (iswdigit((wchar_t)arg[1]) || arg[1]=='.' )) {
/* Part 2 of 4: Obsolescent -pos2 option */
newoptfield(arg+1,1,0,p);
optarg = argv[optind++];
} else if (c == '-' && strlen(arg) == 2 && arg[1] == 'y'
&& (optind == argc || !isdigit(argv[optind+1][0])) ) {
/* Part 3 of 4: -y with optional Kilobytes argument omitted */
tryfor = MAXMEM;
optarg = argv[optind++];
} else {
/* Part 4 of 4: Normal POSIX command syntax option */
c = getopt(argc,argv,"bcdfik:mno:rt:uy:z:AT:");
switch(c){
/* Operation modification options */
case 'c':
cflg = 1;
break;;
case 'm':
mflg = 1;
cmpset = 0;
break;
case 'o':
outfil = optarg;
break;
case 't':
if (mbcodeset) {
convoptarg(optarg);
if(wcslen(wcoptarg) == 1)
tabchar = wcoptarg[0];
else badopt++;
} else {
if (strlen(optarg) == 1)
tabchar = optarg[0];
else badopt++;
}
break;
case 'u':
uflg = 1;
break;
case 'y':
/* -y with omitted argument handled separately above */
/* Check for -y argument = valid integer */
if (mbcodeset) {
convoptarg(optarg);
wcoptargend = wcoptarg;
tryfor = (unsigned int) wcstol(wcoptarg,&wcoptargend,10);
if ((tryfor == 0 && wcoptargend == wcoptarg )
|| (wcoptargend == NULL) || (*wcoptargend != '\0') )
badopt++;
else
tryfor *= 1024;
} else {
optargend=optarg;
tryfor = (unsigned int) strtol(optarg,&optargend,10);
if ((tryfor == 0 && optargend == optarg )
|| (optargend == NULL) || (*optargend != '\0') )
badopt++;
else
tryfor *= 1024;
}
/* Limit -y request to range [MINMEM,MAXMEM] , default=DEFMEM */
tryfor = (tryfor<MINMEM ? MINMEM :
(tryfor > MAXMEM ? MAXMEM :
(tryfor==0 ? DEFMEM : tryfor)));
break;
case 'z':
if (mbcodeset) {
convoptarg(optarg);
maxrec = (int) wcstol(wcoptarg,&wcoptargend,10);
if (*wcoptargend != L'\0')
badopt++;
} else {
maxrec = (int) strtol(optarg,&optargend,10);
if (*optargend != '\0')
badopt++;
}
break;
case 'A':
Aflag = 1;
setlocale(LC_ALL, "C");
compare = Acmpa;
break;
case 'T':
if (optarg[0] != '\0') {
if ((strlen(optarg) + NAMEOHD) > sizeof(file1)) {
newdiag(MSGSTR(PATH2, "path name too long: %s\n")
, optarg);
exit(2);
}
else dirtry[0] = optarg;
}
break;
/* Field modification options, must precede field spec option if preceded by '-' */
case 'b':
case 'd':
case 'f':
case 'i':
case 'n':
case 'r':
/* Per POSIX 1003.2/D11(4.58.3,11630f): options -b,-d,-f,-i,-n, and -r
* must precede option -k although b,d,f,i,n, and r may appear within a
* -k option as a type in a keydef. See (4.58.10,11855-11861).
*/
if (kopt) badopt++;
else {
switch(c) {
case 'd':
p->ignore = dict;
break;
case 'f':
p->code = fold;
break;
case 'i':
p->ignore = nonprint;
break;
case 'n':
p->fcmp = NUM;
break;
case 'b':
if (nfields==0) p->bflg[0]++;
else p->bflg[1-pos1flag]++;
break;
case 'r':
p->rflg = -1;
break;
default:
break;
}
}
break;
/* Sort key options */
case 'k':
kopt++;
newoptfield(optarg,0,1,p);
break;
case EOF:
break;
default:
badopt++;
break;
}; /*end switch(c)*/
}; /* end POSIX syntax option */
/* Update choice of comparison routine: if the option qualifies
* sort, use a sort routine that allows for field processing and
* processed sort keys prepended to the record. (All options
* except -A, -T, -o, -u, -y, and -z qualify sort somehow.)
*/
if ( (strchr("ATouyz",(int)c) == NULL ) && c!=EOF) {
if(Aflag)
compare = Acmp;
else
compare = cmp;
if (!mflg)
cmpset = 1;
}
} /* end do c = ... */
while (c != EOF);
/* Set up input file names for pre-getopt() parameter processing:
* eargc = number of input Files; eargv[0..eargc-1] = array of Filenames.
*/
eargv = &argv[optind];
eargc = argc - optind;
/* Check for -o option in the file operands (POSIX obsolescent requirement) */
for (oargv=eargv,oargc=eargc; oargc>0; oargv++,oargc--) {
if (strncmp(*oargv,"-o",2)==0) {
/* Found option beginning with -o,
* get -o argument as output file name.
*/
if (strlen(*oargv) == 2 && oargc > 1) {
/* -o with Filename in following argv[] string */
skipargs = 2;
outfil = *(oargv+1);
} else if (strlen(*oargv) > 2) {
/* -oFilename all in one argv[] string */
skipargs = 1;
outfil = (*oargv)+2;
} else continue;/* No file specified, take -o as last input file. */
/* Delete -o and argument from file names list */
eargc -= skipargs;
oargc -= skipargs;
for (oargi=0;oargi<oargc;oargi++)
oargv[oargi]=oargv[oargi+skipargs];
oargv++;
}
}
if (badopt) {
Usage();
}
#ifdef DEBUG_OPTS
/* Dump field structures to check option processing. */
fputs("Sort files: ",stdout);
for(Di=0;Di<eargc;Di++)
printf(" eargv[%d]=%s ",Di,eargv[Di]);
printf(" Outfile=%s\n",outfil);
printf("Sort flags: posflag=%d Aflag=%d cflg=%d mflg=%d uflg=%d cmpset=%d\n",
posflag,Aflag,cflg,mflg,uflg,cmpset);
printf("Sort: compare = %s()\n",(compare==cmpa?"cmpa":
(compare==Acmpa?"Acmpa":
(compare==Acmp?"Acmp":
(compare==cmp?"cmp":"?")))));
fputs("Sort fields: code ign fcmp rflg b[0] b[1] m[0] m[1] n[0] n[1]\n".stdout);
for(Di=0;Di<=nfields;Di++){
p=&fields[Di];
Dco=(p->code == fold?'f':(p->code == nofold?'n':'?'));
Dig=(p->ignore == zero?'z':(p->ignore == dict?'d':(p->ignore == nonprint?'n':'?')));
printf(" fields[%2d]= (%2c%4c%5d%5d%6d%5d%6d%5d%6d%5d )\n",
Di,Dco,Dig,p->fcmp,p->rflg, p->bflg[0],p->bflg[1], p->m[0],p->m[1], p->n[0],p->n[1]);
}
#endif
q = &fields[0];
for(a=1; a<=nfields; a++) {
p = &fields[a];
if(p->code != proto.code) continue;
if(p->ignore != proto.ignore) continue;
if(p->fcmp != proto.fcmp) continue;
if(p->rflg != proto.rflg) continue;
if(p->bflg[0] != proto.bflg[0]) continue;
if(p->bflg[1] != proto.bflg[1]) continue;
p->code = q->code;
p->ignore = q->ignore;
p->fcmp = q->fcmp;
p->rflg = q->rflg;
p->bflg[0] = p->bflg[1] = q->bflg[0];
}
if(eargc == 0)
eargv[eargc++] = "-";
if(cflg && eargc>1) {
diag(MSGSTR(CHECK2,"can check only 1 file\n"), "");
exit(2);
}
safeoutfil();
lspace = (int *)NULL;
if (!mflg && !cflg) {
if ( (alloc = grow_core(tryfor,0)) == 0 ) {
diag(MSGSTR(ALLOC2,"allocation error before sort\n"), "");
exit(2);
}
} else {
if ( (alloc = grow_core(MAXMEM,0)) == 0 ) {
diag(MSGSTR(MALLOC2,"allocation error before merge\n"), "");
exit(2);
}
}
a = -1;
for(dirs=dirtry; *dirs; dirs++) {
(void) sprintf(filep=file1, "%s/stm%.5uaa", *dirs, getpid());
while (*filep)
filep++;
filep -= 2;
if ( (a=creat(file, 0600)) >=0)
break;
}
if(a < 0) {
diag(MSGSTR(LOCATE2,"can't locate temp\n"), "");
exit(2);
}
(void) close(a);
(void) unlink(file);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
(void) signal(SIGHUP, (void (*)(int))term);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
(void) signal(SIGINT, (void (*)(int))term);
(void) signal(SIGPIPE, (void (*)(int))term);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
(void) signal(SIGTERM, (void (*)(int))term);
nfiles = eargc;
if(!mflg && !cflg) {
sort();
if (ferror(stdin))
rderror(NULL);
(void) fclose(stdin);
}
if (maxrec == 0) maxrec = SORT_LINE_MAX;
oldalloc = alloc;
alloc = (N + 1) * maxrec + N * BUFSIZ;
for (nway = N; nway >= 2; --nway) {
if (alloc < oldalloc)
break;
signedalloc = alloc - (maxrec + BUFSIZ);
alloc = (signedalloc<0 ? 0 : (unsigned)signedalloc);
}
if (nway < 2 || alloc == 0) {
diag(MSGSTR(MALLOC2,"allocation error before merge\n"), "");
term();
}
if (cflg) checksort();
wasfirst = notfirst = 0;
a = mflg || cflg ? 0 : eargc;
if ((i = nfiles - a) > nway) { /* Do leftovers early */
if ((i %= (nway - 1)) == 0)
i = nway - 1;
if (i != 1) {
newfile();
setbuf(os, bufout);
merge(a, a+i);
a += i;
}
}
for(; a+nway<nfiles || unsafeout&&a<eargc; a=i) {
i = a+nway;
if(i>=nfiles)
i = nfiles;
newfile();
setbuf(os, bufout);
merge(a, i);
}
if(a != nfiles) {
oldfile();
setbuf(os, bufout);
merge(a, nfiles);
}
error = exit_status;
term();
}
/*
* NAME: sort
* FUNCTION: setup the buffers for the sort and call msort routine
*/
static void
sort(void)
{
register char *cp;
register char **lp, **ep;
char *keep, *ekeep, **mp, **lmp;
char **oldep, **oldlp;
int *oldlspace, oldlspace2lp, oldlspace2cp;
int lspacemove;
FILE *iop;
int n;
int done, i, first;
int partial;
char *f;
/*
** Records are read in from the front of the buffer area.
** Pointers to the records are allocated from the back of the buffer.
** If a partially read record exhausts the buffer, it is saved and
** then copied to the start of the buffer for processing with the
** next coreload.
*/
first = 1;
done = 0;
keep = NULL;
ekeep = NULL;
i = 0;
ep = (char **) (((char *) lspace) + alloc);
do {
if ((f=setfil(i++)) == NULL) /* open first file */
iop = stdin;
else if ((iop=fopen(f,"r")) == NULL)
cant(f);
} while ((i < eargc) && (iop == NULL));
if (iop==NULL)
term();
setbuf(iop,bufin);
do {
lp = ep - 1;
cp = (char *) lspace;
*lp-- = cp; /* move record from previous coreload */
if (keep && ekeep)
for(; keep < ekeep; *cp++ = *keep++);
while ((char *)lp - cp > 1) {
n = fgetrec(cp,(char *) lp - cp, iop, f, &partial);
if (n == 0) {
if (ferror(iop))
rderror(f);
if (keep != 0 )
/* The kept record was at
the EOF. Let the code
below handle it. */;
else
if (i < eargc) {
do {
if ((f=setfil(i++)) == NULL)
iop = stdin;
else if ((iop=fopen(f,"r")) == NULL )
cant(f);
} while ((i < eargc) && (iop == NULL));
if (iop==NULL) {
done++;
break;
}
setbuf(iop,bufin);
continue;
}
else {
done++;
break;
}
}
cp += n-1;
if ( !partial ) {
cp += 2;
if ( cp - *(lp+1) > maxrec )
maxrec = cp - *(lp+1);
*lp-- = cp;
keep = 0;
}
else
if ( cp + 2 < (char *) lp ) {
/* Input record does not end with \n . Append '\n' and
* '\0' to end of input record. We do not attempt to force
* the end of the record to a character boundary.
*/
/* the last record of the input */
/* file is missing a NEWLINE */
if(f == NULL) newdiag(MSGSTR(NEWLINE4,
"warning: missing NEWLINE added at EOF\n"), "");
else newdiag(MSGSTR(NEWLINE5,
"warning: missing NEWLINE added at end of input file %s\n")
, f);
*++cp = '\n';
*++cp = '\0';
*lp-- = ++cp;
keep = 0;
}
else { /* the buffer is full */
keep = *(lp+1);
ekeep = ++cp;
}
if ((char *)lp - cp <= 2 && first == 1) {
/* full buffer */
tryfor = alloc;
oldlspace = lspace;
oldlspace2cp = (int)cp - (int)lspace;
oldlspace2lp = (int)lp - (int)lspace;
tryfor = grow_core(tryfor,alloc);
if (tryfor == 0)
/* could not grow */
first = 0;
else { /* move pointers */
oldep = (char **)((int)lspace + alloc);
oldlp = (char **)((int)lspace + oldlspace2lp);
alloc += tryfor;
lspacemove = (int)lspace - (int)oldlspace;
cp = (char *)((int)lspace + oldlspace2cp);
ep = (char **)((int)lspace + alloc);
lp = (char **)((int)ep - (int)oldep + (int)oldlp);
for ( mp = oldep-1, lmp = ep-1;
mp > oldlp; ) {
*lmp-- = (char *)((int)(*mp--) + lspacemove);
}
}
}
}
if (keep != 0 && *(lp+1) == (char *) lspace) {
fprintf(stderr,MSGSTR(TOOLONG2,"fatal: record too large %d\n"),LINE);
term();
}
first = 0;
lp += 2;
if(done == 0 || nfiles != eargc)
newfile();
else
oldfile();
setbuf(os, bufout);
msort(lp, ep);
if (ferror(os))
wterror(sorting);
(void) fclose(os);
} while(done == 0);
}
/*
* NAME: msort
* FUNCTION: setup the merge sort and call qksort to do the actual sorting
*/
static void
msort(char **a, char **b)
{
register struct btree **tp;
register int i, j, n;
char *save;
i = (b - a);
if (i < 1)
return;
else if (i == 1) {
wline(*a);
return;
}
else if (i >= TREEZ)
n = TREEZ; /* number of blocks of records */
else n = i;
/* break into n sorted subgroups of approximately equal size */
tp = &(treep[0]);
j = 0;
do {
(*tp++)->rn = j;
b = a + (blkcnt[j] = i / n);
qksort(a, b);
blkcur[j] = a = b;
i -= blkcnt[j++];
} while (--n > 0);
n = j;
/* make a sorted binary tree using the first record in each group */
for (i = 0; i < n;) {
(*--tp)->rp = *(--blkcur[--j]);
insert(tp, ++i);
}
wasfirst = notfirst = 0;
bonus = cmpsave(n);
j = uflg;
/* If -u option specified, suppress character-code comparison of records
* with equal collation values based on sort options.
*/
if (uflg) uflgactive++;
tp = &(treep[0]);
while (n > 0) {
wline((*tp)->rp);
if (j) save = (*tp)->rp;
/* Get another record and insert. Bypass repeats if uflg */
do {
/* Find group the record came from.*/
i = (*tp)->rn;
/* if (the group is not empty)
* {pop next record out of group, insert it into tree,
* then leave tp at next collated record in the tree.}
*/
if (--blkcnt[i] > 0) {
(*tp)->rp = *(--blkcur[i]);
insert(tp, n);
/* else {move to next collated record in the tree} */
} else {
if (--n <= 0) break;
bonus = cmpsave(n);
tp++;
}
} while (j && (*compare)((*tp)->rp, save) == 0);
}
}
/* Insert the element at tp[0] into its proper place in the array of size n */
/* Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching */
/* Special case for data that appears to be in correct order */
static void
insert(struct btree **tp, int n)
{
register struct btree **lop, **hip, **midp;
register int c;
struct btree *hold;
midp = lop = tp;
hip = lop++ + (n - 1);
if ((wasfirst > notfirst) && (n > 2) &&
((*compare)((*tp)->rp, (*lop)->rp) >= 0)) {
wasfirst += bonus;
return;
}
while ((c = hip - lop) >= 0) { /* leave midp at the one tp is in front of */
midp = lop + c / 2;
if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0)
if (Aflag) break; /* match */
if (c <= 0) lop = ++midp; /* c < 0 => tp > midp */
else hip = midp - 1; /* c > 0 => tp < midp */
}
c = midp - tp;
if (--c > 0) { /* number of moves to get tp just before midp */
hip = tp;
lop = hip++;
hold = *lop;
memcpy(lop, hip, (c)*sizeof(*lop));
lop += c;
*lop = hold;
notfirst++;
} else wasfirst += bonus;
}
/*
* NAME: merge
* FUNCTION: merge sorted files together
*/
static void
merge(int a, int b)
{
FILE *tfile[N];
char *buffer = (char *) lspace;
char *save;
char *iobuf;
register int nf; /* number of merge files */
register struct btree **tp;
register int i, j;
char *f;
save = (char *) lspace + (nway * maxrec);
iobuf = save + maxrec;
tp = &(treep[0]);
for (nf=0, i=a; i < b; i++) {
f = setfil(i);
if (f == 0)
tfile[nf] = stdin;
else if ((tfile[nf] = fopen(f, "r")) == NULL) {
cant(f);
continue;
}
(*tp)->rp = buffer + (nf * maxrec);
(*tp)->rn = nf;
setbuf(tfile[nf], iobuf);
iobuf += BUFSIZ;
if (rline(tfile[nf], (*tp)->rp)==0) {
nf++;
tp++;
} else {
if(ferror(tfile[nf]))
rderror(f);
(void) fclose(tfile[nf]);
}
}
/* make a sorted btree from the first record of each file */
for (--tp, i = 1; i++ < nf;) insert(--tp, i);
bonus = cmpsave(nf);
tp = &(treep[0]);
j = uflg;
/* If -u option specified, suppress character-code comparison of
* records with equal collation values based on sort options.
*/
if (uflg) uflgactive++;
while (nf > 0) {
wline((*tp)->rp);
if (j) cline(save, (*tp)->rp);
/* Get another record and insert. Bypass repeats if uflg */
do {
i = (*tp)->rn;
if (rline(tfile[i], (*tp)->rp)) {
if (ferror(tfile[i]))
rderror(setfil(i+a));
(void) fclose(tfile[i]);
if (--nf <= 0) break;
++tp;
bonus = cmpsave(nf);
} else insert(tp, nf);
} while (j && (*compare)((*tp)->rp, save) == 0 );
}
for (i=a; i < b; i++) {
if (i >= eargc)
(void) unlink(setfil(i));
}
if (ferror(os))
wterror(merging);
(void) fclose(os);
}
/*
* NAME: cline
* FUNCTION: copy line
*/
static void
cline(register char *tp, register char *fp)
{
while ((*tp++ = *fp++) != '\n');
}
/*
* NAME: rline
* FUNCTION: read line
* Because the lines may be read form temporary work files,
* we must check the state of the sort. For merge and cksort
* operations, (and plain sorts), we read as usual. For keyed
* sorts, the key area (which may contain zero bytes) is first
* read, then the remainder (text portion) of the record.
*/
static int
rline(register FILE *iop, register char *s)
{
register int n;
int rlen, maxlen;
maxlen = maxrec ;
/* If line is not being read as part of a (merge by sort -m) or
* (check by sort -c) then line must be being read as part of
* (merge intermediate output files that cumulatively exceed
* MAXMEM). If (cmpset && !Aflag), wline() wrote prefixed keys,
* so read those before line content.
*/
if (!mflg && !cflg && (cmpset == 1) && !Aflag) {
maxlen = maxrec-2;
fread(s, 1, 2, iop);
rlen = ((s[0] << 8) | (s[1]));
rlen -= 2;
if (rlen > maxlen)
rlen = maxlen;
s +=2;
if (fread(s, 1, rlen, iop) < rlen)
return(1);
maxlen -= rlen;
s += rlen;
}
if (fgets(s,maxlen,iop) == NULL )
n = 0;
else
n = strlen(s);
if ( n == 0 )
return(1);
s += n - 1;
if ( *s == '\n' )
return(0);
if ( n < maxlen) {
newdiag(MSGSTR(NEWLINE4,"warning: missing NEWLINE at EOF added\n"),"");
*++s = '\n';
return(0);
}
else {
fprintf(stderr,MSGSTR(TOOLONG2,"fatal: line too long %d\n"),LINE);
term();
}
return(0);
}
/*
* NAME: wline
* FUNCTION: write line
*/
static void
wline(char *s)
{
size_t rlen;
/* If (cmpset and !Aflag), line has prefixed keys. If line is
* not being written to user output file then line must be being
* written to intermediate output file for later merge because
* cumulative output size exceeds MAXMEM, so write those before
* line content.
*/
if (cmpset == 1 && !Aflag) {
rlen = ((s[0] << 8) | (s[1]));
if (!outflag) {
/* write key information to temporary file */
fwrite(s, 1, rlen, os);
}
/* strip key information */
s += rlen;
}
(void) fputs(s,os);
}
/*
* NAME: checksort
* FUCNTION: has file already been sorted.
*/
static void
checksort(void)
{
char *lines[2];
register char **s;
char *f;
register int i, j, r;
register FILE *iop;
s = &(lines[0]);
f = setfil(0);
if (f == 0)
iop = stdin;
else if ((iop = fopen(f, "r")) == NULL) {
cant(f);
term();
}
setbuf(iop, bufin);
i = 0; j = 1;
s[0] = (char *) lspace;
s[1] = s[0] + maxrec;
if ( rline(iop, s[0]) ) {
if (ferror(iop))
rderror(f);
(void) fclose(iop);
exit(exit_status);
}
while ( !rline(iop, s[j]) ) {
r = (*compare)(s[i], s[j]);
if (r < 0)
disorder(MSGSTR(DISORDER2,"disorder: %s\n"), s[j]);
if (r == 0 && uflg)
disorder(MSGSTR(NUNIQUE2,"not unique: %s\n"), s[j]);
r = i; i = j; j = r;
}
if (ferror(iop))
rderror(f);
(void) fclose(iop);
exit(exit_status);
}
/*
* NAME: disorder
* FUNCTION: added NULL character to the end of the string t and print error message
*/
static void
disorder(char *s, char *t)
{
register char *u;
for(u=t; *u!='\n';u++) ;
*u = 0;
newdiag(s, t);
error = 1;
term();
}
/*
* NAME: newfile
* FUNCTION: open file for writting
*/
static void
newfile(void)
{
register char *f;
f = setfil(nfiles);
if((os=fopen(f, "w")) == NULL) {
newdiag(MSGSTR(CREATE2,"can't create %s\n"), f);
term();
}
nfiles++;
outflag = 0;
}
/*
* NAME: setfil
* FUNCTION: set up unique temp file name
*/
static char *
setfil(register int i)
{
if(i < eargc)
if(eargv[i][0] == '-' && eargv[i][1] == '\0')
return(0);
else
return(eargv[i]);
i -= eargc;
filep[0] = i/26 + 'a';
filep[1] = i%26 + 'a';
return(file);
}
/*
* NAME: oldfile
* FUNCTION: open output file or set os to stdout
*/
static void
oldfile(void)
{
if(outfil) {
if((os=fopen(outfil, "w")) == NULL) {
newdiag(MSGSTR(CREATE2,"can't create %s\n"), outfil);
term();
}
} else
os = stdout;
outflag = 1; /* set to mark output file... */
}
/*
* NAME: safeoutfil
* FUNCTION: check the output file is it safe to use as an output file
*/
static void
safeoutfil(void)
{
register int i;
struct stat ostat, istat;
if(!mflg||outfil==0)
return;
if(stat(outfil, &ostat)==-1)
return;
if ((i = eargc - N) < 0) i = 0; /*-N is suff., not nec. */
for (; i < eargc; i++) {
if(stat(eargv[i], &istat)==-1)
continue;
if(ostat.st_dev==istat.st_dev&&
ostat.st_ino==istat.st_ino)
unsafeout++;
}
return;
}
/*
* NAME: cant
* FUNCTION: print error message when unable to open a file
*/
static void
cant(char *f)
{
newdiag(MSGSTR(OPEN2,"can't open %s\n"), f);
exit_status = 2;
}
/*
* NAME:
* FUNCTION: print error message
*/
static void
diag(char *s, char *t)
{
register FILE *iop;
iop = stderr;
(void) fputs("sort: ", iop);
(void) fputs(s, iop);
(void) fputs(t, iop);
(void) fputs("\n", iop);
}
/*
* NAME: newdiag()
* FUNCTION: Internationalizable version of diag(): print error message.
*/
static void
newdiag(char *s, char *t)
{
fputs("sort: ",stderr);
if (t != NULL && *t != '\0' )
fprintf(stderr,s,t);
else
fputs(s,stderr);
}
/*
* NAME: term
* FUNCTION: clean up and exit
*/
static void
term(void)
{
register i;
(void) signal(SIGINT, SIG_IGN);
(void) signal(SIGHUP, SIG_IGN);
(void) signal(SIGTERM, SIG_IGN);
if(nfiles == eargc)
nfiles++;
for(i=eargc; i<=nfiles; i++) { /*<= in case of interrupt*/
(void) unlink(setfil(i)); /*with nfiles not updated*/
}
exit(error);
}
/*
* NAME: cmp
* FUNCTION: compare two strings
* This routine is substantially expanded and changed to be
* able to handle the prefixed form of the data. If the compare
* code is either XNUM or XSTR, then the field is prepended to
* the record; otherwise, the old method for finding the key
* and compare it holds.
* Note that, for string compares, if the caller specified either
* folding or dictionary sort, then the fields are moved (and
* either compacted or folded) to work areas, where they are
* compared. If neither folding or dictionary order is speci-
* fied, the compare takes place in situ.
*/
static int
cmp(char *a, char *b)
{
register char *pa, *pb;
char *la, *lb;
char sava, savb;
char *ipa, *ipb, *jpa, *jpb;
char *code, *ignore;
register int sa;
int sb;
int i, j;
int k;
struct field *fp;
wchar_t wcchar, *pwcchar=&wcchar;
int chrlen;
for(k = nfields>0; k<=nfields; k++) {
fp = &fields[k];
pa = a;
pb = b;
if ( (fp->fcmp == ASC) || (fp->fcmp == NUM) ) {
if(k >= 0) { /* keys are in original place */
la = skip(pa, fp, 1);
pa = skip(pa, fp, 0);
lb = skip(pb, fp, 1);
pb = skip(pb, fp, 0);
} else {
la = eol(pa);
lb = eol(pb);
}
} else { /* keys are prepended to record */
pa += 2;
pb += 2;
for (i = 0; i < k; i++) {
pa += ((pa[0] << 8) | (pa[1]));
pb += ((pb[0] << 8) | (pb[1]));
}
la = pa + ((pa[0] << 8) | (pa[1]));
lb = pb + ((pb[0] << 8) | (pb[1]));
pa += 2;
pb += 2;
}
if( (fp->fcmp == NUM) || (fp->fcmp == XNUM) ) {
sa = sb = fp->rflg;
/* Skip leading <blank>s in both records.
* (POSIX Draft 11 decouples -n from implied -b . )
*/
if (mbcodeset) {
while( *pa != '\n' && (chrlen=mbtowc(pwcchar,pa,mb_cur_max))>0
&& iswblank(wcchar) )
pa += chrlen;
while( *pb != '\n' && (chrlen=mbtowc(pwcchar,pb,mb_cur_max))>0
&& iswblank(wcchar) )
pb += chrlen;
} else {
while((*pa != '\n') && isblank(*pa))
pa++;
while((*pb != '\n') && isblank(*pb))
pb++;
}
if(*pa == '-') {
pa++;
sa = -sa;
}
if(*pb == '-') {
pb++;
sb = -sb;
}
/* Find the radix position: first byte position to the
* right of the rightmost digit or thousands-separator
* from the left end of the field.
*/
for (ipa = pa; ipa<la; ipa++) {
if ( !(iswdigit((wchar_t)*ipa)
||(wchar_t)*ipa==thsep||(wchar_t)*ipa==thsepmon) )
break;
}
for (ipb = pb; ipb<lb; ipb++) {
if ( !(iswdigit((wchar_t)*ipb)
||(wchar_t)*ipb==thsep||(wchar_t)*ipb==thsepmon) )
break;
}
jpa = ipa;
jpb = ipb;
i = 0;
if(sa==sb)
/* If signs are the same, find the most significant
* digit position to left of the radix position in which
* the numbers differ. Skip thousands separators.
* There is no attempt here to check LC_NUMERIC grouping
* or LC_MONETARY mon_grouping sizes, or to infer
* zero-digits in omitted character positions between
* separators according to grouping or mon_grouping.
*/
while(ipa > pa && ipb > pb) {
ipa--;
ipb--;
while(ipa>pa &&
((wchar_t)*ipa==thsep || (wchar_t)*ipa==thsepmon)){
ipa--;
}
while(ipb>pb &&
((wchar_t)*ipb==thsep || (wchar_t)*ipb==thsepmon)){
ipb--;
}
if(j = *ipb - *ipa)
i = j;
}
/* If either number contains more significant digits than the other,
* or if the signs are different, the comparison is determined by
* the presence of a nonzero significant digit in either number
* and the sign of that number.
*/
while(ipa > pa)
if(*--ipa != '0'
&& (unsigned char)*ipa!=thsep
&& (unsigned char)*ipa!=thsepmon)
return(-sa);
while(ipb > pb)
if(*--ipb != '0'
&& (unsigned char)*ipb!=thsep
&& (unsigned char)*ipb!=thsepmon)
return(sb);
if(i) return(i*sa);
/* No discriminating corresponding character position was
* found to the left of the radix position. Now sort on the
* leftmost character position in which corresponding
* positions to the right of the radix position differ.
*/
if((wchar_t)*(pa=jpa) == dec || (wchar_t)*(pa) == decmon)
pa++;
if((wchar_t)*(pb=jpb) == dec || (wchar_t)*(pb) == decmon)
pb++;
if(sa==sb)
while(pa<la && iswdigit((wchar_t)*pa)
&& pb<lb && iswdigit((wchar_t)*pb))
if(i = *pb++ - *pa++)
return(i*sa);
while(pa<la && iswdigit((wchar_t)*pa))
if(*pa++ != '0')
return(-sa);
while(pb<lb && iswdigit((wchar_t)*pb))
if(*pb++ != '0')
return(sb);
continue;
}
if (fp->fcmp == XSTR) { /* 'key' is prepended */
sa = strcmp(pb, pa);
if (sa == 0)
continue;
return(sa*fp->rflg);
}
if (fp->fcmp == ASC) { /* record is not prepended */
code = fp->code;
ignore = fp->ignore;
if ( (ignore == zero) && (code == nofold) ) {
sava = *la;
*la = '\0';
savb = *lb;
*lb = '\0';
sa = strcoll(pb, pa);
*la = sava;
*lb = savb;
} else { /* have to use work areas */
ipa = tbuf;
fldtowa(pa, la, ipa, ignore, code);
ipb = ebuf;
fldtowa(pb, lb, ipb, ignore, code);
sa = strcoll(ipb, ipa);
}
if ( sa == 0 )
continue;
return(sa*fp->rflg);
}
}
if(uflgactive)
return(0);
/* If no return yet: the records collate equally on all qualifications
* specified by sort options. To make sort results independent of the
* order of sort records, now sort on character codes in the specified
* sort fields (if any) and then character codes of the entire record.
* uflgactive suppresses this final refining sort while suppressing
* multiple records with equal sort values for -u option.
*/
for(k = nfields>0; k<=nfields; k++) {
fp = &fields[k];
if ( (fp->fcmp == XNUM) || (fp->fcmp == XSTR) ) {
pa = a;
pb = b;
pa += ((pa[0] << 8) | (pa[1]));
pb += ((pb[0] << 8) | (pb[1]));
return(cmpa(pa, pb));
}
}
return(cmpa(a, b));
}
/* this routine was recoded in 370 assembler as part of the peachtree
* activities. HJS-S
*
* NAME: cmpa
* FUNCTION: compare two strings
* This routine is modified to use strcoll.
*/
static int
cmpa(register char *pa, register char *pb)
{
register int alen, blen;
register int r,i,j;
wchar_t wcpa[BUFSIZ],*pwcpa = &wcpa[0],wcpb[BUFSIZ],*pwcpb = &wcpb[0];
int wcpalen,wcpblen;
alen = eol(pa) - pa;
blen = eol(pb) - pb;
pa[alen] = '\0';
pb[blen] = '\0';
r = strcoll(pb, pa) * fields[0].rflg;
/* Strings that have different collation elements with the same
* collation values in corresponding positions will effectively
* collate on their relative positions in the source file. To make
* such strings collate consistently independent of their positions
* in the file, compare their characters' wchar_t values. (P37708)
*/
if (r == 0) {
/* Compare characters' wide char values */
wcpalen = (int)mbstowcs(pwcpa,pa,alen+1);
wcpblen = (int)mbstowcs(pwcpb,pb,blen+1);
i = (wcpalen<wcpblen?wcpalen:wcpblen);
j=0;
do {
r = (int)(wcpb[j] - wcpa[j]);
j++;
} while (--i > 0 && r == 0) ;
}
pa[alen] = '\n';
pb[blen] = '\n';
return (r);
}
/*
* NAME: Acmp
* FUNCTION: compare two strings
* This is the oldfashioned AT&T code; assuming the
* native collating sequence. It is activated using
* the -A flag.
*/
static int
Acmp(char *i, char *j)
{
register char *pa, *pb;
register char *ignore;
char *code;
char *la, *lb;
char *ipa, *ipb, *jpa, *jpb;
register int sa;
int sb;
int a, b;
int k;
struct field *fp;
wchar_t wcchar, *pwcchar=&wcchar;
int chrlen;
for(k = nfields>0; k<=nfields; k++) {
fp = &fields[k];
pa = i;
pb = j;
if(k >= 0) {
la = skip(pa, fp, 1);
pa = skip(pa, fp, 0);
/* showme(pa,la); */ /* showme() routine for debugging */
lb = skip(pb, fp, 1);
pb = skip(pb, fp, 0);
/* showme(pb,lb); */ /* showme() routine for debugging */
} else {
la = eol(pa);
lb = eol(pb);
}
if(fp->fcmp==NUM) {
sa = sb = fp->rflg;
/* Skip leading <blank>s in both records.
* (POSIX Draft 11 decouples -n from implied -b .)
*/
if (mbcodeset) {
while( *pa != '\n' && (chrlen=mbtowc(pwcchar,pa,mb_cur_max))>0
&& iswblank(wcchar) )
pa += chrlen;
while( *pb != '\n' && (chrlen=mbtowc(pwcchar,pb,mb_cur_max))>0
&& iswblank(wcchar) )
pb += chrlen;
} else {
while((*pa != '\n') && isblank(*pa))
pa++;
while((*pb != '\n') && isblank(*pb))
pb++;
}
if(*pa == '-') {
pa++;
sa = -sa;
}
if(*pb == '-') {
pb++;
sb = -sb;
}
/* See explanation of algorithm in cmp() above.*/
for(ipa = pa; ipa<la&& (isdigit(*ipa)||*ipa==thsep||*ipa==thsepmon); ipa++);
for(ipb = pb; ipb<lb&& (isdigit(*ipb)||*ipb==thsep||*ipb==thsepmon); ipb++);
jpa = ipa;
jpb = ipb;
a = 0;
if(sa==sb)
while(ipa > pa && ipb > pb) {
ipa--;
ipb--;
while(ipa>pa && (*ipa==thsep || *ipa==thsepmon))
ipa--;
while(ipb>pb && (*ipb==thsep || *ipb==thsepmon))
ipb--;
if(b = *ipb - *ipa)
a = b;
}
while(ipa > pa)
if(*--ipa != '0')
return(-sa);
while(ipb > pb)
if(*--ipb != '0')
return(sb);
if(a) return(a*sa);
if(*(pa=jpa) == dec || *(pa=jpa) == decmon)
pa++;
if(*(pb=jpb) == dec || *(pb=jpb) == decmon)
pb++;
if(sa==sb)
while(pa<la && isdigit(*pa)
&& pb<lb && isdigit(*pb))
if(a = *pb++ - *pa++)
return(a*sa);
while(pa<la && isdigit(*pa))
if(*pa++ != '0')
return(-sa);
while(pb<lb && isdigit(*pb))
if(*pb++ != '0')
return(sb);
continue;
}
code = fp->code;
ignore = fp->ignore;
loop:
while(ignore[*pa])
pa++;
while(ignore[*pb])
pb++;
if(pa>=la || *pa=='\n')
if(pb<lb && *pb!='\n')
return(fp->rflg);
else continue;
if(pb>=lb || *pb=='\n')
return(-fp->rflg);
sa = code[*pb++] - code[*pa++];
if(sa == 0)
goto loop;
return(sa*fp->rflg);
}
if(uflgactive)
return(0);
/* If no return yet: the records collate equally on all qualifications
* specified by sort options. To make sort results independent of the
* order of sort records, now sort on characters of the entire record.
* uflgactive suppresses this final refining sort while suppressing
* multiple records with equal sort values for -u option.
*/
return(Acmpa(i, j));
}
/*
* NAME: Acmpa
* FUNCTION: compare two strings
* This is the oldfashioned AT&T code, assuming that
* collation is according to the native code order.
* It is activated using the -A flag.
*/
static int
Acmpa(register char *pa, register char *pb)
{
while(*pa == *pb++)
if(*pa++ == '\n')
return(0);
return(
*pa == '\n' ? fields[0].rflg:
*--pb == '\n' ?-fields[0].rflg:
*pb > *pa ? fields[0].rflg:
-fields[0].rflg
);
}
/*
* NAME: skip
* FUNCTION: skip a field
*/
static char *
skip(register char *p, struct field *fp, int j)
{
register i;
register wchar_t tbc;
wchar_t wcchar, *pwcchar=&wcchar;
int chrlen=1;
if( (i=fp->m[j]) < 0)
return((char *)eol(p));
/* Skip characters to the beginning of the next field.
* The next field begins at the first character following the next field separator,
* where a field separator is
* if (tabchar specified by -t tabchar option)
* then {next occurrence of tabchar at or following initial p*}
* else {next occurrence of one or more consecutive <blank>s following
* a non<blank>.}
* The sorted part of the next field begins at
* if (-b option or b field modifier applies to the field)
* then {first non-<blank> character at or past beginning of field}
* else {beginning of the field} .
*/
if (mbcodeset) { /* skip in multibyte code set */
if (tbc = tabchar) { /* skip past next tabchar */
while (--i >= 0) {
while((chrlen=mbtowc(pwcchar,p,mb_cur_max))>0 && wcchar!=tbc)
if(*p != '\n')
p += chrlen;
else return(p);
if (i >= 0) {
chrlen=mbtowc(pwcchar,p,mb_cur_max);
p += (chrlen<1?1:chrlen);
}
}
} else { /* skip past end of non-<blank> string following
* next <blank> string
*/
while (--i >= 0) {
while((chrlen=mbtowc(pwcchar,p,mb_cur_max))>0 && iswblank(wcchar))
p += chrlen;
while(*p != '\n'
&& (chrlen=mbtowc(pwcchar,p,mb_cur_max))>0 && !iswblank(wcchar))
p += chrlen;
if (*p == '\n')
return(p);
}
}
} else { /* skip in single byte code set */
if (tbc = tabchar) { /* skip past next tabchar */
while (--i >= 0) {
while(*p != tbc)
if(*p != '\n')
p++;
else return(p);
if (i >= 0)
p++;
}
} else { /* skip past end of non-<blank> string following
* next <blank> string
*/
while (--i >= 0) {
while(isblank(*p))
p++;
while(*p != '\n' && !isblank(*p))
p++;
if (*p == '\n')
return(p);
}
}
}
/* This is actually the last character in the field fp->m[j]-1 */
if ((j==1) && (fp->n[j]==0)) {
if (tabchar)
return (mbcodeset?p-chrlen:p-1);
else
return (p);
}
/* Skip leading blanks in field if -b option or b modifier applies.*/
/* Note that -b only skips <blank>s. It does not skip non-<blank>
* field separator characters if specified by -t option.
*/
if(fp->bflg[j]) {
if (mbcodeset) { /* skip in multibyte code set */
if (fp->m[j] > 0 && !tabchar) {
p += mblen(p,mb_cur_max);
}
while((chrlen=mbtowc(pwcchar,p,mb_cur_max))>0 && iswblank(wcchar))
p += chrlen;
} else { /* skip in single byte code set */
if (fp->m[j] > 0 && !tabchar)
p++;
while(isblank(*p))
p++;
}
}
/* Skip to character position specified by n of +m.n or y of -kx.y */
i = fp->n[j];
while((i-- > 0) && (*p != '\n'))
if ( (chrlen=mbtowc(pwcchar,p,mb_cur_max)) >0 )
p += chrlen;
else p++;
return(p);
}
/*
* NAME: eol
* FUNCTION: find the end of the line
*/
static char *
eol(register char *p)
{
if ((p = (char *)strchr(p, '\n')) == NULL) {
newdiag(MSGSTR(NULLS2,
"cannot process data file (check for null chars)\n"),"");
exit(2);
}
return((char *)p);
}
/*
* NAME: copyproto
* FUNCTION: copy the prototype for the sort fields
*/
static void
copyproto(void)
{
register i;
register int *p, *q;
p = (int *)&proto;
q = (int *)&fields[nfields];
for(i=0; i<sizeof(proto)/sizeof(*p); i++)
*q++ = *p++;
}
/*
* NAME: initree
* FUNCTION: initialize the binary search tree
*/
static void
initree(void)
{
register struct btree **tpp, *tp;
register int i;
for (tp = &(tree[0]), tpp = &(treep[0]), i = TREEZ; --i >= 0;)
*tpp++ = tp++;
}
static int
cmpsave(register int n)
{
register int award;
if (n < 2) return (0);
for (n++, award = 0; (n >>= 1) > 0; award++);
return (award);
}
/*
* NAME: newoptfield
* FUNCTION: Process field specification option string.
*/
static void
newoptfield(char *s,int spos,int ink, struct field *p)
{
/* Entry: ink == 1 => s points to start of "-k" option value string
* ink == 0 && spos == 0 => s points to start of "+" pos1 string
* ink == 0 && spos == 1 => s points to start of "-" pos2 string
* nfields = number of prior field spec options
* pos1flag = 0 if no previous call to newoptfield(), or if previous call
* to newoptfield() was for "-m.n" or "-k...,m.n";
* = 1 if previous call to newoptfield() was for "+m.n" or "-km.n".
* Only single-byte portable character set characters are accepted as option
* values for +Pos1, -Pos2, and -kOptlist options.
*/
wchar_t *sc;
int rpart; /* 0 if processing "+m.n" part, 1 if processing "-m.n" part (obsolescent)*/
/* 0 if processing "-km.n" part, 1 if processing ",m.n" part (POSIX -k)*/
int state; /* 0 => no part of "m.n" processed */
/* 1 => "m" of "m.n" has been processed */
/* 2 => "." of "m.n" has been processed */
int numval;
if ( ink || !spos || !pos1flag ) { /* Either starting "-k", or starting "+m.n", or starting
* "-m.n" for which there was no corresponding "+m.n" .
*/
if(++nfields>=NF) {
diag(MSGSTR(KEYS2,"too many keys\n"), "");
exit(2);
}
if(!spos || (ink && *s!=','))
posflag++;
copyproto();
}
p = &fields[nfields];
state = 0;
rpart = (!ink && spos);
convoptarg(s);
for (sc=wcoptarg;*sc!=L'\0';sc++)
{
switch(*sc){
case L'.':
if (state++ == 0) { /* Omitted "m" of "m.n" defaults to 0 */
p->m[rpart] = 0;
state++;
}
if (state > 2) { /* Too many "." */
Usage();
}
break;
case L'-':
if (ink || !rpart++ ) {
Usage();
}
else
state = 0;
break;
case L',': /* found -k...,m.n */
if (!ink || rpart++ ) {
Usage();
}
else
state = 0;
break;
case L'b':
p->bflg[rpart]++;
break;
case L'd':
p->ignore = dict;
break;
case L'f':
p->code = fold;
break;
case L'i':
p->ignore = nonprint;
break;
case L'n':
p->fcmp = NUM;
break;
case L'r':
p->rflg = -1;
break;
default:
if(iswdigit(*sc)) {
numval = (int) wcstol(sc,&sc,10);
if((ink?numval-(state==2?0:1):numval)<0) {
Usage();
}
if(ink) { /* Convert -k measurements to +pos1 -pos2 measurements.
* Per POSIX 1003.2/D11(4.58.7,11785-11789),
* -ka.b,c.d = if d==0 then +(a-1).(b-1) -c.d
* else +(a-1).(b-1) -(c-1).d
*/
if(rpart)
if(state && numval>0)
p->m[rpart]--;
else;
else
numval--;
};
sc--;
if (!state++){
p->m[rpart] = numval;
} else {
p->n[rpart] = numval;
}
} else {
Usage();
}
break;
} /* switch(*sc) */
} /* for (sc=... */
pos1flag = !rpart;
}
/*
* NAME: convoptarg
* FUNCTION: convert multibyte string optarg to wchar_t string wcoptarg
*/
static void
convoptarg(char *s)
{
/* Globals: static wchar_t *wcoptarg, extern char *optarg */
int n,rval;
/* Entry conditions:
* 1. MB_CUR_MAX > 1 (in multibyte locale)
* 2. s points to argument for a -t, -y, or -z option
* Exit conditions:
* 1. IF s' pointed to a valid MBCS string
* THEN wcoptarg points to wchar_t string for s'*
* ELSE sort Usage message is written to standard error
* and program terminates with nozero exit code
*/
n = strlen(s)+1;
rval = mbstowcs(wcoptarg,s,n);
if (rval == -1) {
Usage();
}
}
/*
* NAME: Usage
* FUNCTION: Display Usage message and exit >0
*/
static void
Usage(void)
{
fprintf(stderr,MSGSTR(USAGE2,
"Usage: sort\t[-Abcdfimnru] [-T Directory] [-t Character] [-o File]\n\
\t\t[-y[Kilobytes]] [-z Recordsize] [-k Keydefinition]...\n\
\t\t[[+Position1][-Position2]]... [File]...\n"));
exit(2);
}
#define qsexc(p,q) t= *p;*p= *q;*q=t
#define qstexc(p,q,r) t= *p;*p= *r;*r= *q;*q=t
/*
* NAME: qksort
* FUNCTION: sort the binary tree
*/
static void
qksort(char **a, char **l)
{
register char **i, **j;
register char **lp, **hp;
char *t;
int c, delta;
unsigned n;
start:
if((n=l-a) <= 1)
return;
n /= 2;
if (n >= MTHRESH) {
lp = a + n;
i = lp - 1;
j = lp + 1;
delta = 0;
c = (*compare)(*lp, *i);
if (c < 0) --delta;
else if (c > 0) ++delta;
c = (*compare)(*lp, *j);
if (c < 0) --delta;
else if (c > 0) ++delta;
if ((delta /= 2) && (c = (*compare)(*i, *j)))
if (c > 0) n -= delta;
else n += delta;
}
hp = lp = a+n;
i = a;
j = l-1;
for(;;) {
if(i < lp) {
if((c = (*compare)(*i, *lp)) == 0) {
--lp;
qsexc(i, lp);
continue;
}
if(c < 0) {
++i;
continue;
}
}
loop:
if(j > hp) {
if((c = (*compare)(*hp, *j)) == 0) {
++hp;
qsexc(hp, j);
goto loop;
}
if(c > 0) {
if(i == lp) {
++hp;
qstexc(i, hp, j);
i = ++lp;
goto loop;
}
qsexc(i, j);
--j;
++i;
continue;
}
--j;
goto loop;
}
if(i == lp) {
if(lp-a >= l-hp) {
qksort(hp+1, l);
l = lp;
} else {
qksort(a, lp);
a = hp+1;
}
goto start;
}
--lp;
qstexc(j, lp, i);
j = --hp;
}
}
/*
* NAME: rderror
* FUNCTION: print read error
*/
static void
rderror(char *s)
{
newdiag(MSGSTR(EREAD2,"read error on %s\n"), s == NULL ? MSGSTR(STDIN2,"stdin") : s);
term();
}
/*
* NAME: wterror
* FUNCTION: print write error
*/
static void
wterror(int x)
{
if (x == sorting)
newdiag(MSGSTR(EWRITE2,"Write error while sorting.\n"),"");
else newdiag(MSGSTR(EWRITE3,"Write error while merging.\n"),"");
term();
}
/*
* NAME: grow_core
* FUNCTION: Increase the size of temporary sorting area lspace .
* ENTRY: 1. IF lspace!=NULL
* THEN lspace points to an available block of size cursize
* EXIT: 1. Return value = IF lspace points to an available block
* of size (size + cursize)
* THEN difference between (size of new block
* pointed to by lspace) and cursize'
* ELSE 0 .
* 2. 0 <= return value AND return value <= size'.
* 3. ANY POINTER INTO lspace MAY NOW HAVE AN INVALID VALUE.
* INFORMALLY: increase the size of lspace from cursize by the largest
* possible amount not larger than size.
*/
static unsigned
grow_core(unsigned size, unsigned cursize)
{
size_t newsize;
unsigned long longnewsize;
longnewsize = (unsigned long) size + (unsigned long) cursize;
if (longnewsize < MINMEM)
longnewsize = MINMEM;
else
if (longnewsize > MAXMEM)
longnewsize = MAXMEM;
newsize = (size_t) longnewsize;
if (lspace==(int *)NULL ) {
newlspace = (int *)malloc(newsize);
} else {
newlspace = (int *)realloc(lspace,newsize);
}
if (newlspace == (int *)NULL ) {
return(0);
} else {
lspace = newlspace;
return(newsize-cursize);
}
}
/*
* NAME: fgetrec
* FUNCTION: get a char string from the stream and, if field sorting or
* other special functions (e.g., folding) is needed, retrieve
* sort fields, converting string fields to strxfrm format, and
* prepend sort key(s) to the record. If possible, the record
* is built in the sort's work area; if that is not possible
* (not enough space) it is built in a work area and moved.
* The key area is structured as follows:
* Bytes 0-1: key length (offset to data)
* Bytes 2-3: length flag field 0
* Bytes 4-i: key field 0
* Bytes i+1-i+2: length flag field 1
* Bytes i-3... key field 1 ...
* Subsequent fields follow in same format.
*/
static int
fgetrec(char *s, int n, FILE *stream, char *f, int *partialp)
{
char *p;
char *l;
char *ignore;
char *code;
char *tb;
char *eb;
char *yb;
char *xb;
static char *xbb;
static int recl = 0;
int k;
struct field *fp;
char savechar;
int i, j, rc;
*partialp = 0; /* default is full record */
if ( (cmpset == 0) || (Aflag == 1) ) {
recl = 0;
if (fgets(s, n, stream) == NULL)
return (0);
if ((rc = strlen(s)) == 0)
return (0);
else {
*partialp = (s[rc-1] != '\n'); /* trailing '\n' */
return (rc); /* means complete read */
}
}
/* The following code will prepend the record
with an encoded version of the sort keys.
*/
if (recl != 0) {
if (recl < n) {
memcpy(s, xbb, recl+1);
j = --recl;
recl = 0;
return(j);
} else {
memcpy(s, xbb, n);
xbb += n;
recl -= n;
*partialp = 1; /* indicate partial read */
return(n);
}
}
tb = tbuf;
if (fgets(tb, SORT_LINE_MAX-1, stream) == NULL) {
rc = 0;
}
else rc = strlen(tb);
if (rc == 0)
return (rc);
te = &(tb[strlen(tb)-1]);
if (*te == '\n') {
te += 2;
rc++;
}
else {
if (rc < (SORT_LINE_MAX-1)) {
/* the last record of the input */
/* file is missing a NEWLINE */
if(f == NULL) newdiag(MSGSTR(NEWLINE4,
"warning: missing NEWLINE added at EOF\n"), "");
else newdiag(MSGSTR(NEWLINE5,
"warning: missing NEWLINE added at end of input file %s.\n")
, f);
*++te = '\n';
*++te = '\0';
rc += 2;
}
else {
fprintf(stderr,MSGSTR(TOOLONG2,
"fatal: line too long %d\n"),SORT_LINE_MAX);
term();
}
}
eb = ebuf;
if ((rc * 5) < n) {
xb = xbb = s;
xe = s + n - 1;
} else {
xb = xbb = xbuf;
xe = xbuf + (sizeof(xbuf) -1);
}
xb += 2; /* save space for length */
if (nfields > 0) {
*xb++ = '\0';
*xb++ = '\2';
}
for(k = nfields>0; k<=nfields; k++) {
fp = &fields[k];
p = tb;
if(k >= 0) {
l = skip(p, fp, 1);
p = skip(p, fp, 0);
} else {
l = eol(p);
}
if (l < p)
l = p;
if((fp->fcmp==NUM) || (fp->fcmp==XNUM)) {
fp->fcmp=XNUM;
j = l - p + 3;
*xb++ = (j >> 8);
*xb++ = (j & 0xff);
if ((l-p) > (xe - xb - 1)) {
if (xbb == s) {
j = xb - xbb;
memcpy(xbuf, xbb, j);
xb = xbb = xbuf;
xb += j;
xe = xbuf + (sizeof(xbuf) -1);
}
else {
fprintf(stderr,MSGSTR(TOOLONG2,
"fatal: line too long %d\n"),SORT_LINE_MAX);
term();
}
}
memcpy(xb, p, (l-p));
xb += (l-p);
*xb++ = '\0';
continue;
}
fp->fcmp=XSTR;
eb = ebuf;
yb = xb;
xb += 2;
code = fp->code;
ignore = fp->ignore;
if ( (ignore != dict) && (ignore != nonprint) &&
(code != fold) ) {
savechar = l[0];
*l = '\0';
j = strxfrm(xb, p, (xe - xb - 1));
if (j > (xe - xb - 1)) {
if (xbb == s) {
i = xb - xbb;
memcpy(xbuf, xbb, i);
yb = xb = xbb = xbuf;
yb += i-2;
xb += i;
xe = xbuf + (sizeof(xbuf) -1);
j = strxfrm(xb, p, (xe - xb - 1));
}
}
if (j < (xe - xb - 1)) {
yb[0] = ((j+3) >> 8);
yb[1] = ((j+3) & 0xff);
xb += ++j;
l[0] = savechar;
}
else {
fprintf(stderr,MSGSTR(TOOLONG2,
"fatal: line too long %d\n"),SORT_LINE_MAX);
term();
}
} else {
fldtowa(p, l, eb, ignore, code);
j = strxfrm(xb, ebuf, (xe - xb - 1));
if (j > (xe - xb - 1)) {
if (xbb == s) {
i = xb - xbb;
memcpy(xbuf, xbb, i);
yb = xb = xbb = xbuf;
yb += i-2;
xb += i;
xe = xbuf + (sizeof(xbuf) -1);
j = strxfrm(xb, ebuf, (xe - xb - 1));
}
}
if (j < (xe - xb - 1)) {
j = strxfrm(xb, ebuf, (xe - xb - 1));
xb += ++j;
j += 2;
*yb++ = (j >> 8);
*yb = (j & 0xff);
} else {
fprintf(stderr,MSGSTR(TOOLONG2,
"fatal: line too long %d\n"),SORT_LINE_MAX);
term();
}
}
}
recl = xb - xbb;
xbb[0] = (recl >> 8);
xbb[1] = (recl & 0xff);
if (rc > (xe - xb - 1)) {
if (xbb == s) {
i = xb - xbb;
memcpy(xbuf, xbb, i);
xb = xbb = xbuf;
xb += i;
xe = xbuf + (sizeof(xbuf) -1);
}
}
if (rc < (xe - xb - 1))
memcpy(xb, tb, rc+1);
else {
fprintf(stderr,MSGSTR(TOOLONG2,
"fatal: line too long %d\n"),SORT_LINE_MAX);
term();
}
recl += rc;
/* recl = current record length and key length including \0 terminator
** n = the amount of allocated memory available
** if the recl is <= the amount of allocated memory available
** copy the record into allocated memory
** else copy as much as possible and save the rest until the
** next call to fgetrec(). Set recl to amount remaining
** and return the amount copied.
*/
if (recl <= n) { /* Defect 8046 */
if (s != xbb)
memcpy(s, xbb, recl);
j = --recl;
recl = 0;
return(j);
}
else {
/* There must be enough room for the \n and \0.
** if not, fool it into copying all except \n and
** \0. If the two are not copied together, there
** are problems in sort() when it goes to look for
** the \n if \0 was the only byte copied the second
** time around */
if (recl-1 == n) n--; /* Defect 14770 */
memcpy(s, xbb, n);
s[n] = '\0';
xbb += n;
recl -= n;
*partialp = 1;
return(n);
}
}
/*
* NAME: fldtowa
* FUNCTION: copies input string between (p) and (l) to work area (b),
* removing non-sorting characters (i.e. non-dictionary and/or
* non-printing) and optionally folding into lowercase.
*/
static void
fldtowa(char *p, char *l, char *b, char *ignore, char *code)
{
wchar_t wc;
wchar_t *pwc = &wc;
char *sb;
int i,j;
char up[MB_LEN_MAX];
char *upp;
int chrlen;
sb = b;
while (p < l) {
if (mbcodeset) { /* Copy multibyte character set field */
/* Skip characters excluded from consideration in the field */
if (ignore == dict) { /* skip MBCS non-(<alphanum> or <blank>) */
while((chrlen=mbtowc(pwc,p,mb_cur_max))>0
&& !(iswalnum(wc) || iswblank(wc)) && p<l)
p += chrlen;
}
if (ignore == nonprint) { /* skip MBCS nonprinting non-(\t , \n) */
while((chrlen=mbtowc(pwc,p,mb_cur_max))>0
&& !(iswprint(wc) || wc == L'\t' || wc == L'\n' ) && p<l)
p += chrlen;
}
if (p>=l) continue;
/* Copy one character in the field */
if (code == fold) { /* copy MBCS folding lower case to UPPER */
if ( (chrlen = mbtowc(pwc,p,mb_cur_max))>0 ) {
p += chrlen;
wc = towupper(wc);
chrlen = wctomb(up,wc);
for(i=0,upp=up;i<chrlen;i++)
*b++ = *upp++;
} else p++;
} else { /* copy MBCS directly */
i = mblen(p,mb_cur_max);
for (j=0;j<i;j++);
*b++ = *p++;
}
} else { /* Copy single byte character set field */
/* Skip characters excluded from consideration in the field */
if (ignore == dict) { /* skip SBCS non-(<alphanum> or <blank>) */
while( !(isalnum(*p) || isblank(*p)) && p<l ) {
p++;
}
}
if (ignore == nonprint) { /* skip SBCS nonprinting non-(\t , \n) */
while( !(isprint(*p) || *p == '\t' || *p == '\n') && p<l )
p++;
}
if (p>=l) continue;
/* Copy one character in the field */
if (code == fold) { /* copy SBCS folding lower case to UPPER */
*b++ = toupper(*p);
p++;
} else { /* copy SBCS directly */
*b++ = *p++;
}
}
}
*b = '\0';
/* if an input string contains only non-sorting characters, treat it
as a null string. */
if (strcmp(sb, "\n") == 0)
*sb = '\0';
}