Files
Arquivotheca.SunOS-4.1.4/lib/libc/gen/common/strxfrm.c
seta75D ff309bfe1c Init
2021-10-11 18:37:13 -03:00

436 lines
10 KiB
C

/* Copyright (c) 1988 Sun Microsystems Inc. */
/* All Rights Reserved */
#if !defined(lint) && defined(SCCSIDS)
static char *sccsid = "@(#)strxfrm.c 1.1 94/10/31 SMI";
#endif
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <locale.h>
#include <malloc.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/types.h>
#define MAXSTR 256
#define TRUE 1
#define FALSE 0
#define IGNORE_KEY 01 /* magic number defined by colldef */
/* Default is machine code sort order, not including non printing
* characters
*/
struct prim_sec {
unsigned char p;
unsigned char s;
};
struct one_to_m {
unsigned char one;
struct prim_sec *many;
};
static struct one_to_m *one_to_m;
struct two_to_one {
unsigned char one;
unsigned char two;
struct prim_sec prim_sec;
};
static struct two_to_one *two_to_one;
static char saved_locale[MAXLOCALENAME] = 0;
static char *str = NULL;
static char *primary_sort = NULL;
static char *secondary_sort = NULL;
static struct prim_sec *prim_secp;
extern char _locales[MAXLOCALE - 1][MAXLOCALENAME + 1];
static struct one_to_m *start1tom;
static struct two_to_one *start2to1, *two_to_onep ;
/* openlocale declared in setlocale.c */
int openlocale();
/*
* so that strcoll works correctly, we use our own version of strcmp,
* that deals with unsigned characters
*/
static strcmp(s1, s2)
register unsigned char *s1, *s2;
{
while (*s1 == *s2++)
if (*s1++=='\0')
return (0);
return (*s1 - *--s2);
}
/* Strcoll calls up strxfrm to do the work */
int
strcoll(s1, s2)
char *s1;
char *s2;
{
static char *sv1, *sv2;
static int sv1sz = 0; /* size of space allocated to sv1 */
static int sv2sz = 0; /* size of space allocated to sv2 */
int i;
#define XFRMFACTOR 2.3 /* Ratio of max length of transformed
string and the original string. */
if (sv1sz==0){ /* initially, sv1 points to NULL */
sv1=malloc(sv1sz=strlen(s1)*XFRMFACTOR + 4);
if(sv1==NULL){ /* out of memory */
sv1sz=0;
fprintf(stderr, "strcoll: string %s is too long", s1);
return 0;
}
}
else { /* sv1 is allocted some space now */
i=strlen(s1)*XFRMFACTOR + 4;
if(sv1sz < i){ /* space alloctedt to sv1 is not enough */
sv1=realloc(sv1, i);
if(sv1==NULL) {
sv1sz=0;
fprintf(stderr, "strcoll: string %s is too long", s1);
return 0;
}
sv1sz = i;
}
}
if (i=strxfrm(sv1, s1, sv1sz) >= sv1sz) {
/* space allocated to sv1sz is not enough for strxfrm() */
/* no need to recalculate if strxfrm is correct */
/* i = strxfrm(NULL, s1, 0); */
sv1sz = i+1;
sv1=realloc(sv1, sv1sz);
if(sv1==NULL) {
sv1sz=0;
fprintf(stderr, "strcoll: string %s is too long", s1);
return 0;
}
strxfrm(sv1, s1, sv1sz);
}
/* Now for sv2, repeat the above steps */
if (sv2sz==0){
sv2=malloc(sv2sz=strlen(s2)*XFRMFACTOR + 1);
if(sv2==NULL){
sv2sz=0;
fprintf(stderr, "strcoll: string %s is too long", s2);
return 0;
}
}
else {
i=strlen(s2)*XFRMFACTOR + 1;
if(sv2sz < i){
sv2=realloc(sv2, i);
if(sv2==NULL) {
sv2sz=0;
fprintf(stderr, "strcoll: string %s is too long", s2);
return 0;
}
sv2sz = i;
}
}
if (strxfrm(sv2, s2, sv2sz) >= sv2sz) {
i = strxfrm(NULL, s1, 0);
sv2sz = i+1;
sv2=realloc(sv2, sv2sz);
if(sv2==NULL) {
sv2sz=0;
fprintf(stderr, "strcoll: string %s is too long", s2);
return 0;
}
strxfrm(sv2, s2, sv2sz);
}
/* after we strxfrm s1 and s2, we can simply use strcmp */
return strcmp(sv1, sv2);
}
/* strxfrm is driven by setlocale() and picks up tables that
* were generated by the colldef utility.
*/
int strxfrm(outputstr, inputstr, maxlen)
char *outputstr;
char *inputstr;
size_t maxlen;
{
static unsigned char *sec;
static int secsz = 0; /* size of space allocated to sec */
unsigned char *secondary;
register count; /* length of primary weights in outputstr after
strxfrm */
register seccount; /* like count, but for the secondary weights */
register i;
unsigned char ch;
if (outputstr != NULL) {
if (secsz == 0) { /* initially, sec points to NULL */
/* need to allocate space to it */
sec = (unsigned char *) malloc(maxlen);
secsz = maxlen;
}
else if (secsz < maxlen) { /* need to expand space of sec */
free(sec);
if ((sec = (unsigned char *) malloc(maxlen)) == NULL) {
printf("strxfrm: string %s too long\n", inputstr);
secsz = 0;
return 0;
}
secsz = maxlen;
}
}
if (!start2to1) {
two_to_one = (struct two_to_one *) malloc
(sizeof(struct two_to_one) * MAXSUBS);
one_to_m = (struct one_to_m *) malloc
(sizeof (struct one_to_m) * MAXSUBS);
start1tom = one_to_m;
start2to1 = two_to_one;
}
count = 0;
seccount = 0;
secondary = sec;
if (inputstr == NULL) {
return 0; /* No work to do */
}
/* If locale value has changed since last we used this
* function (or if it has never been called and is now not == C)
* then we need to load a new collation table, also we assign
* at runtime so we can xstr this module
*/
if (saved_locale[0] == 0)
strcpy(saved_locale, "C");
if (strcmp(_locales[LC_COLLATE-1] ,saved_locale))
setupcolldef();
/* If we are set to "C" or have not even called setlocale
* it means "machine order sort"
*/
if ( (strcmp(saved_locale, "C") == 0) ) {
if (outputstr != NULL)
return strlen(strcpy(outputstr, inputstr));
else
return strlen(inputstr);
}
/* step through input string */
while ( (ch = *inputstr++) != '\0') {
/* Search for 1-to-many mapping */
for (i = 0; one_to_m[i].one != 0; i++) {
if (one_to_m[i].one == ch) {
prim_secp = one_to_m[i].many;
while (prim_secp->p) {
if ((outputstr != NULL && count < maxlen)
&& (prim_secp->p != IGNORE_KEY)) {
*outputstr++ = prim_secp->p;
}
if (prim_secp->p != IGNORE_KEY)
count++;
/*
* If there is a secondary value in the
* substitution, then add this too
*/
if (prim_secp->s) {
if (seccount++ < maxlen)
*secondary++ = prim_secp->s;
}
prim_secp++;
}
break;
}
}
/* search for first character of potential 2-to-1 mapping */
for (two_to_onep = start2to1; two_to_onep->one != 0; two_to_onep++) {
if ((two_to_onep->one == ch) && (two_to_onep->two == *inputstr)) {
if (outputstr != NULL && count < maxlen) {
*outputstr++ = two_to_onep->prim_sec.p;
}
count++;
inputstr++;
if (two_to_onep->prim_sec.s)
if (seccount++ < maxlen)
*secondary++ = two_to_onep->prim_sec.s;
break;
}
}
/* Test to see if we got a match from previous
* two "for" loops
*/
if (two_to_onep -> one != 0 || one_to_m[i].one != 0)
continue;
/* Now assign real primary and secondary values */
if ((outputstr != NULL && count < maxlen)
&& (primary_sort[ch] != IGNORE_KEY)) {
*outputstr++ = primary_sort[ch] ;
}
count++;
if (secondary_sort[ch]) {
if (seccount++ < maxlen)
*secondary++ = secondary_sort[ch];
}
}
/*
* Put the delimiter between primary and secondary
* weight level strings
*/
if ((outputstr != NULL) && count < maxlen)
*outputstr++ = IGNORE_KEY;
count++;
/* Now compose primary and secondary lists and output
* string as necessary
*/
if (outputstr != NULL) {
*outputstr = '\0';
if (count >= maxlen) {
return (count+seccount); /* copy no more characters! */
/* fix to bugid 1151444 */
}
*secondary = '\0';
if (count + seccount > maxlen)
strncat(outputstr, sec, maxlen - count);
else
strcat(outputstr, sec) ;
return (count+seccount); /* fix to bugid 1151444 */
}
else {
return (count+seccount);
}
}
/*
* setupcolldef() sets up coll definitions. It is only called when the
* current locale is different from the saved_locale by strxfrm().
* (To force the call to setupcolldef() before setlocale() is called,
* strxfrm() faked saved_locale to be "C" if saved_locale is NULL.)
*/
setupcolldef()
{
register int fd;
register char *memp;
unsigned short i;
struct stat buf;
char locale[MAXLOCALENAME];
/*
* If _locale[LC_COLLATE-1] is not set yet, "C" should be used as the
* default locale, not the environment variable LANG or LC_COLLATE.
*
* openlocale(category, cat_id, locale, newlocale) compares
* locale with _locale[LC_COLLATE-1] and returns 0 (fd)
* if they are equal. So we force _locale[LC_COLLATE-1] to NULL
* before calling openlocale().
* We will copy the new locale to _locale[LC_COLLATE-1] from
* saved_locale a few lines later.
*/
if (_locales[LC_COLLATE-1][0] == 0) {
strcpy(locale, "C");
} else {
strcpy(locale, _locales[LC_COLLATE-1]);
_locales[LC_COLLATE-1][0] = 0;
}
fd = openlocale("LC_COLLATE", LC_COLLATE, locale, saved_locale);
if (saved_locale)
strcpy(_locales[LC_COLLATE-1], saved_locale);
if (fd < 0)
return;
if ((fstat(fd, &buf)) != 0)
return;
/* Now we have got a valid collation table,
* Free previously allocated collation table
* In all error situations return without any change
* Note that most error conditions should have been caught
* at setlocale() time.
*/
if (buf.st_size < (2*MAXSTR+2)) {
(void) close(fd);
return;
}
if (str != NULL)
free(str);
if ((str = malloc(buf.st_size +2 )) == NULL ) {
(void) close(fd);
return;
}
if ((read(fd, str, buf.st_size)) != buf.st_size) {
free(str);
(void) close(fd);
return;
}
/* Now step through str, casting pieces to the internal data structures */
primary_sort = str;
secondary_sort = str+MAXSTR;
memp = str+(2*MAXSTR); i=0;
while (*memp != '\0' && i < MAXSUBS) {
one_to_m[i].one = *memp++;
prim_secp = (struct prim_sec *) memp;
one_to_m[i].many = prim_secp;
while (prim_secp->p != 0 || prim_secp->s != 0)
prim_secp++;
memp = (char *) ++prim_secp;
++i;
}
memp += sizeof (struct prim_sec) + 1; /* last entry is always empty */
one_to_m[i].one = 0;
i=0;
while (*memp != '\0' && i < MAXSUBS) {
two_to_one[i].one = *memp++;
two_to_one[i].two = *memp++;
two_to_one[i].prim_sec.p = *memp++;
two_to_one[i].prim_sec.s = *memp++;
++i;
}
two_to_one[i].one = 0;
strcpy(saved_locale,_locales[LC_COLLATE-1]);
(void) close(fd);
}