/* * Copyright (c) 1993 - 1994 by Sun Microsystems, Inc. */ #pragma ident "@(#)FN_string.cc 1.3 94/10/03 SMI" #include #include #include #include #include #include #include #include "FN_string_rep.hh" /* * Here, we define the FN_string class. * * If the constructor fails in some way, it may be possible for the rep ptr * to be null. We take care that this does not cause a core dump. */ /* * return length and number of bytes in a multibyte string. */ static size_t mbslen(const char *base, size_t *nbytes) { const char *cp = base; size_t ncodes; int clen; ncodes = 0; while (*cp != '\0') { clen = mblen(cp, 4); cp += clen; ++ncodes; } if (nbytes) *nbytes = cp - base; return (ncodes); } /* * %%% hairy * Here is a good example of a horrible hack to find out if the current * locale's code set requires multiple bytes to be represented. * * If this information were not available, one could assume that all * strings are multibyte. Everything should work as before, except that * the internal representation of strings will consume 4 times as much * memory and some extra multibyte <-> wchar_t conversions will have to * be performed. */ static int use_mb() { #if 1 if (_ctype[520] > 1) return (1); else return (0); #else return (1); #endif } /* * Tear down contents of object. */ void FN_string::destr() { if (rep && rep->release() == 0) { delete rep; // rep = 0; } } /* * Build up the object. If the rep * is invalid, return false and * mark object as dead (i.e. rep == 0). */ int FN_string::constr(FN_string_rep *r) { rep = 0; if (r) { if (r->valid()) { rep = r; return (1); } else delete r; } return (0); } /* * %%% locale support needs to be implemented here. */ int FN_string::constr(FN_string_rep *r, const void *, size_t) { return (constr(r)); } FN_string::FN_string() { if (use_mb()) constr(new string_wchar(0, 0, 0, DEFAULT_CODESET)); else constr(new string_char(0, 0, 0, DEFAULT_CODESET)); } FN_string::~FN_string() { destr(); } FN_string::FN_string(const FN_string &s) { if (s.rep) rep = s.rep->share(); else rep = 0; } FN_string & FN_string::operator=(const FN_string &s) { if (&s != this) { destr(); if (s.rep) rep = s.rep->share(); else rep = 0; } return (*this); } FN_string::FN_string(const unsigned char *s) { int ncodes; size_t nbytes; if (use_mb()) { wchar_t *wp; ncodes = mbslen((const char *)s, &nbytes); wp = new wchar_t[ncodes]; if (wp) { if (mbstowcs(wp, (const char *)s, ncodes) != (size_t)-1) constr(new string_wchar(wp, ncodes, ncodes, DEFAULT_CODESET)); delete[] wp; } } else { ncodes = strlen((const char *)s); constr(new string_char((const char *)s, ncodes, ncodes, DEFAULT_CODESET)); } } FN_string::FN_string(const unsigned char *s, size_t maxchars) { int ncodes; if (use_mb()) { wchar_t *wp; wp = new wchar_t[maxchars]; if (wp) { if ((ncodes = mbstowcs(wp, (const char *)s, maxchars)) != (size_t)-1) constr(new string_wchar(wp, ncodes, ncodes, DEFAULT_CODESET)); delete[] wp; } } else { ncodes = strlen((const char *)s); if (ncodes > maxchars) ncodes = maxchars; constr(new string_char((const char *)s, ncodes, ncodes, DEFAULT_CODESET)); } } const unsigned char * FN_string::str(unsigned int *status) const { const unsigned char *cp; if (rep == 0) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return (0); } if ((cp = rep->as_str()) == 0) { if (status) *status = FN_E_INCOMPATIBLE_CODE_SETS; return (0); } if (status) *status = FN_SUCCESS; return (cp); } FN_string::FN_string( unsigned long code_set, const void *locale_info, size_t locale_bytes, size_t charcount, size_t bytecount, const void *contents, unsigned int *status) { unsigned int sts; FN_string_rep *r; switch (code_set) { case PCS_CODESET: case LATIN1_CODESET: r = new string_char((const char *)contents, charcount, bytecount, code_set); if (constr(r, locale_info, locale_bytes)) sts = FN_SUCCESS; else sts = FN_E_INSUFFICIENT_RESOURCES; break; default: sts = FN_E_INCOMPATIBLE_CODE_SETS; return; } if (status) *status = sts; } unsigned long FN_string::code_set(const void **locale_info, size_t *locale_bytes) const { if (rep) return (rep->code_set(locale_info, locale_bytes)); else return (0); } size_t FN_string::charcount() const { if (rep) return (rep->charcount()); else return (0); } size_t FN_string::bytecount() const { size_t nbytes; if (rep && rep->as_str(&nbytes)) return (nbytes); return (0); } const void * FN_string::contents() const { if (rep) return (rep->contents()); else return (0); } FN_string::FN_string( unsigned int *status, const FN_string *s1, const FN_string *s2, ...) { unsigned int local_status; va_list ap; const FN_string *sn; FN_string_rep *r; void *tag; if (status) *status = FN_SUCCESS; if (s1 == 0) { r = new string_char(0, 0, 0, DEFAULT_CODESET); if (!constr(r)) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; } return; } if (s2 == 0) { if (s1->rep) { rep = s1->rep->share(); } else { rep = 0; if (status) *status = FN_E_INSUFFICIENT_RESOURCES; } return; } // make sure all the instances are alive and of the same type. if (s1->rep == 0 || s2->rep == 0) { rep = 0; if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return; } tag = s1->rep->typetag(); if (s2->rep->typetag() != tag) { rep = 0; if (status) *status = FN_E_INCOMPATIBLE_CODE_SETS; return; } va_start(ap, s2); while (sn = va_arg(ap, const FN_string *)) { if (sn->rep == 0) { rep = 0; if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return; } if (sn->rep->typetag() != tag) { rep = 0; if (status) *status = FN_E_INCOMPATIBLE_CODE_SETS; return; } } va_end(ap); // calculate total charcount // total_charcount is a hint as to how many chars to allocate // to prevent O(n^2) behavior. int total_charcount = s1->charcount() + s2->charcount(); va_start(ap, s2); while (sn = va_arg(ap, const FN_string *)) { total_charcount += sn->charcount(); } va_end(ap); // copy data r = s1->rep->clone(0, s1->rep->charcount(), total_charcount); if (!constr(r)) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return; } // concat s2 local_status = rep->cat(s2->rep); if (local_status != FN_SUCCESS) { delete rep; rep = 0; if (status) *status = local_status; return; } // concat sn va_start(ap, s2); while (sn = va_arg(ap, const FN_string *)) { if ((local_status = rep->cat(sn->rep)) != FN_SUCCESS) { delete rep; rep = 0; break; } } va_end(ap); if (status) *status = local_status; } /* * Constructor returns substring between character indices first and last * (inclusive). */ FN_string::FN_string(const FN_string &orig, int first, int last) { int lasti = orig.charcount() - 1; if (lasti < 0) { rep = 0; // rep = get_rep(orig)->clone(0, 0, 0); return; } // calculate start and length if (first < 0) first = 0; if (last > lasti) last = lasti; // what is given are indices (charcount) // a 0 size indicates it needs to be calculated // copy data // %%% '\0' terminator assumptions? constr(orig.rep->clone(first, last - first + 1, last - first + 1 + 1)); } int FN_string::is_empty() const { return (charcount() == 0); } int FN_string::compare( const FN_string &s, unsigned int string_case, unsigned int *status) const { unsigned int sts; int cs; if (rep == 0 || s.rep == 0) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return (-1); } if (string_case == FN_STRING_CASE_INSENSITIVE) cs = rep->casecmp(0, s.rep, sts); else cs = rep->cmp(0, s.rep, sts); if (status) *status = sts; return (cs); } /* * Compare characters specified between indices from this string and given * string 's'. */ int FN_string::compare_substring( int first, int last, const FN_string &s, unsigned int string_case, unsigned int *status) const { int lasti = charcount() - 1; int sub_num_chars = s.charcount(); unsigned int sts = FN_SUCCESS; if (rep == 0 || s.rep == 0) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return (-1); } if (status) *status = sts; if (lasti < 0) { if (sub_num_chars == 0) return (0); else return (-1); } if (first < 0) first = 0; if (last > lasti) last = lasti; int ret; int num_chars = last - first + 1; if (num_chars > sub_num_chars) { if (string_case == FN_STRING_CASE_INSENSITIVE) ret = rep->ncasecmp(first, s.rep, sub_num_chars, sts); else ret = rep->ncmp(first, s.rep, sub_num_chars, sts); if (status) *status = sts; if (ret == 0) return (1); return (ret); } if (string_case == FN_STRING_CASE_INSENSITIVE) ret = rep->ncasecmp(first, s.rep, num_chars, sts); else ret = rep->ncmp(first, s.rep, num_chars, sts); if (status) *status = sts; if (num_chars < sub_num_chars) { if (ret == 0) return (-1); return (ret); } return (ret); } /* * Get position of where 's' occurs, starting from character position 'index', * in this string. */ int FN_string::next_substring( const FN_string &s, int index, unsigned int string_case, unsigned int *status) const { int sub_num_chars = s.charcount(); unsigned int sts = FN_SUCCESS; if (rep == 0 || s.rep == 0) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return (FN_STRING_INDEX_NONE); } if (status) *status = sts; if (sub_num_chars == 0) return (FN_STRING_INDEX_NONE); int lasti = charcount() - sub_num_chars; if ((lasti < 0) || (index > lasti)) return (FN_STRING_INDEX_NONE); if (index < 0) index = 0; int i; for (i = index; i <= lasti; i++) { if (string_case == FN_STRING_CASE_INSENSITIVE) { if (rep->ncasecmp(i, s.rep, sub_num_chars, sts) == 0) { if (status) *status = sts; return (i); } } else { if (rep->ncmp(i, s.rep, sub_num_chars, sts) == 0) { if (status) *status = sts; return (i); } } } if (status) *status = sts; return (FN_STRING_INDEX_NONE); } /* * Get position of where 's' occurs, starting backwards from * character position 'index' towards the front of this string. */ int FN_string::prev_substring(const FN_string &s, int index, unsigned int string_case, unsigned int *status) const { int sub_num_chars = s.charcount(); unsigned int sts = FN_SUCCESS; if (rep == 0 || s.rep == 0) { if (status) *status = FN_E_INSUFFICIENT_RESOURCES; return (-1); } if (status) *status = sts; if (sub_num_chars == 0) return (FN_STRING_INDEX_NONE); int lasti = charcount() - sub_num_chars; if ((lasti < 0) || (index < 0)) return (FN_STRING_INDEX_NONE); if (index > lasti) index = lasti; int i; for (i = index; i >= 0; i--) { if (string_case == FN_STRING_CASE_INSENSITIVE) { if (rep->ncasecmp(i, s.rep, sub_num_chars, sts) == 0) { if (status) *status = sts; return (i); } } else { if (rep->ncmp(i, s.rep, sub_num_chars, sts) == 0) { if (status) *status = sts; return (i); } } } if (status) *status = sts; return (FN_STRING_INDEX_NONE); } #ifdef DEBUG void FN_string::report(FILE *fp) { // fprintf(fp, "FN_string_rep::nnodes %d\n", FN_string_rep::nnodes); } #endif