277 lines
7.0 KiB
C
277 lines
7.0 KiB
C
#ifndef lint
|
|
#ifdef sccs
|
|
static char sccsid[] = "@(#)string_utils.c 1.1 94/10/31 Copyr 1985 Sun Micro";
|
|
#endif
|
|
#endif
|
|
|
|
/*
|
|
* Copyright (c) 1985 by Sun Microsystems, Inc.
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <sunwindow/sun.h>
|
|
#include <sunwindow/string_utils.h>
|
|
|
|
|
|
/*
|
|
* substring extracts a specified substring out of another string. It is a
|
|
* generalization of strncpy. substring copies n characters from s to dest,
|
|
* starting at position start. if start is negative, start = strlen(s) -
|
|
* start. for example, substring(s, -3, 3, dest) will store into dest the
|
|
* last three characters of s. returns True if successful, False if error,
|
|
* e.g. n < 0, there weren't n charcters in s, etc. in case of failure, dest
|
|
* will contain an empty string.
|
|
*/
|
|
Bool
|
|
substring(s, start, n, dest)
|
|
char *s;
|
|
int start, n;
|
|
char *dest;
|
|
{
|
|
int slen;
|
|
int i;
|
|
|
|
if (s == NULL)
|
|
return (False);
|
|
slen = strlen(s);
|
|
if (start < 0)
|
|
start = (slen - start); /* negative numbers mean count from
|
|
* back */
|
|
if ((start < 0) || (n < 0))
|
|
goto fail;
|
|
for (i = 0; i < n; i++)
|
|
if (s[start + i] == '\0')
|
|
goto fail;
|
|
else
|
|
dest[i] = s[start + i];
|
|
dest[i] = '\0';
|
|
return (True);
|
|
fail: dest[0] = '\0';
|
|
return (False);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* substrequal compares two substrings without having to construct them. If
|
|
* case_matters = False, 'a' will match with 'a' or 'A'.
|
|
*/
|
|
Bool
|
|
substrequal(s1, start1, s2, start2, n, case_matters)
|
|
char *s1, *s2;
|
|
int start1, start2;
|
|
Bool case_matters;
|
|
{
|
|
int i;
|
|
|
|
if ((s1 == NULL) || (s2 == NULL))
|
|
return ((n == 0 && s2 == s2) ? True : False);
|
|
for (i = 0; i < n; i++) {
|
|
char c1, c2;
|
|
c1 = s1[start1 + i];
|
|
c2 = s2[start2 + i];
|
|
if (c1 == c2) {
|
|
} else if (case_matters)
|
|
return (False);
|
|
else if (isupper(c1)) {
|
|
if (islower(c2)) {
|
|
if ((c1 - 'A') != (c2 - 'a'))
|
|
return (False);
|
|
} else
|
|
return (False);
|
|
} else if (islower(c1)) {
|
|
if (isupper(c2)) {
|
|
if ((c1 - 'a') != (c2 - 'A'))
|
|
return (False);
|
|
} else
|
|
return False;
|
|
} else
|
|
return False;
|
|
}
|
|
return (True);
|
|
}
|
|
|
|
|
|
/*
|
|
* strequal compares two strings It uses substrequal. If case_matters =
|
|
* False, 'a' will match with 'a' or 'A'. either s1 or s2 can be NULL without
|
|
* harm.
|
|
*/
|
|
Bool
|
|
string_equal(s1, s2, case_matters)
|
|
char *s1, *s2;
|
|
Bool case_matters;
|
|
{
|
|
int i;
|
|
if (s1 == s2)
|
|
return (True);
|
|
else if ((s1 == NULL) || (s2 == NULL))
|
|
return (False);
|
|
for (i = 0;; i++) {
|
|
char c1, c2;
|
|
c1 = s1[i];
|
|
c2 = s2[i];
|
|
if (c1 == c2) {
|
|
if (s1[i] == '\0')
|
|
return (True);
|
|
} else if (case_matters)
|
|
return (False);
|
|
else if (isupper(c1)) {
|
|
if (islower(c2)) {
|
|
if ((c1 - 'A') != (c2 - 'a'))
|
|
return (False);
|
|
} else
|
|
return (False);
|
|
} else if (islower(c1)) {
|
|
if (isupper(c2)) {
|
|
if ((c1 - 'a') != (c2 - 'A'))
|
|
return (False);
|
|
} else
|
|
return (False);
|
|
} else
|
|
return (False);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* string_find searches one instance of a string for another. If successful,
|
|
* returns the position in the string where the match began, otherwise -1. If
|
|
* case_matters = False, 'a' will match with 'a' or 'A'.
|
|
*/
|
|
int
|
|
string_find(s, target, case_matters)
|
|
char *s, *target;
|
|
Bool case_matters;
|
|
{
|
|
int i, n;
|
|
if (s == NULL)
|
|
return (-1);
|
|
else if (target == NULL)
|
|
return (0);
|
|
n = strlen(target);
|
|
for (i = 0;; i++)
|
|
if (s[i] == '\0')
|
|
return (-1);
|
|
else if (substrequal(s, i, target, 0, n, case_matters))
|
|
return (i);
|
|
}
|
|
|
|
|
|
/*
|
|
* string_get_token is used for tokenizing input, where more degree of
|
|
* flexibility is required than simply delimiting tokens by white spaces
|
|
* characters are divided into three classes, Break, Sepr, and Other.
|
|
* separators (Sepr) serve to delimit a token. Leading separators are
|
|
* skipped. think of separators as white space. Break characters delimit
|
|
* tokens, and are themselves tokens. Thus, if a break character is the first
|
|
* thing seen it is returned as the token. If any non-separator characters
|
|
* have been seen, then they are returned as the token, and the break
|
|
* character will be the returned as the result of the next call to
|
|
* get_token. for example, if charproc returns Sepr for space, and Break for
|
|
* '(' and ')' and Other for all alphabetic characters, then the string "now
|
|
* (is) the" will yield five tokens consisting of "now" "(" "is" ")" and
|
|
* "the"
|
|
*
|
|
* get_token stores the token that it constructs into dest, which is also
|
|
* returned as its value. index marks the current position in the string to
|
|
* "begin reading from" it is updated so that the client program does not
|
|
* have to keep track of how many characters have been read.
|
|
*
|
|
* get_token returns NULL, rather than the empty string, corresponding to the
|
|
* case where the token is empty
|
|
*/
|
|
|
|
char *
|
|
string_get_token(s, index, dest, charproc)
|
|
char *s;
|
|
int *index;
|
|
char *dest;
|
|
enum CharClass (*charproc) ();
|
|
{
|
|
char c;
|
|
int i = 0;
|
|
for (;;) {
|
|
c = s[(*index)++];
|
|
if (c == '\0')
|
|
goto backup;
|
|
switch ((*charproc) (c)) {
|
|
case Sepr:
|
|
if (i != 0) /* something seen */
|
|
goto backup;
|
|
else
|
|
continue;
|
|
case Break:
|
|
if (i == 0) {
|
|
/*
|
|
* nothing seen yet, this character is the
|
|
* token
|
|
*/
|
|
dest[i++] = c;
|
|
goto exit;
|
|
} else
|
|
goto backup;
|
|
case Other:
|
|
dest[i++] = c;
|
|
}
|
|
}
|
|
backup:
|
|
(*index)--;
|
|
exit:
|
|
dest[i] = '\0';
|
|
return (i == 0 ? NULL : dest);
|
|
}
|
|
|
|
|
|
/*
|
|
* string_get_sequence is a more primitive tokenizer than get_token. it takes
|
|
* a procedure which for each character specifies whether the character is to
|
|
* terminate the sequence, and whether or not the character is to be included
|
|
* in the sequence. (If the character terminates the sequence, but is not
|
|
* included, then it will be seen again on the next call.) For example,
|
|
* having seen a \"\, to read to the matching \"\, call get_sequence with an
|
|
* action procedure that returns {True, True} for \"\ and {False, True} for
|
|
* everything else. (If you want to detect the case where a " is preceded by
|
|
* a \\, simply save the last character and modify the procedure accordingly.
|
|
*
|
|
* Note that gettoken can be defined in terms of get_sequence by having Other
|
|
* characters return {False, True}, and also noticing whether any have been
|
|
* seen yet, having Seprs return {(seen_some_others ? True : False), False}
|
|
* and Break characters return {True, (seen_some_others ? False : True)}
|
|
*
|
|
* returns NULL for the empty sequence
|
|
*/
|
|
|
|
char *
|
|
string_get_sequence(s, index, dest, charproc)
|
|
char *s;
|
|
int *index;
|
|
char *dest;
|
|
struct CharAction (*charproc) ();
|
|
{
|
|
char c;
|
|
struct CharAction action;
|
|
int i = 0;
|
|
|
|
for (;;) {
|
|
c = s[(*index)++];
|
|
if (c == '\0')
|
|
goto backup;
|
|
action = (*charproc) (c);
|
|
if (action.include)
|
|
dest[i++] = c;
|
|
if (action.stop) {
|
|
if (!action.include)
|
|
goto backup; /* if c was not included,
|
|
* then need to back up */
|
|
else
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
backup:
|
|
(*index)--;
|
|
exit:
|
|
dest[i] = '\0';
|
|
return (i == 0 ? NULL : dest);
|
|
}
|