This repository has been archived on 2024-04-08. You can view files and clone it, but cannot push or open issues or pull requests.

126 lines
3.1 KiB
C++
Raw Normal View History

2006-04-05 17:20:12 +00:00
#ifndef __CSUTILHXX__
#define __CSUTILHXX__
// First some base level utility routines
typedef struct {
unsigned char l;
unsigned char h;
} w_char;
// convert UTF-16 characters to UTF-8
char * u16_u8(char * dest, int size, const w_char * src, int srclen);
// convert UTF-8 characters to UTF-16
int u8_u16(w_char * dest, int size, const char * src);
// sort 2-byte vector
void flag_qsort(unsigned short flags[], int begin, int end);
// binary search in 2-byte vector
int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
// remove end of line char(s)
void mychomp(char * s);
// duplicate string
char * mystrdup(const char * s);
// duplicate reverse of string
char * myrevstrdup(const char * s);
// parse into tokens with char delimiter
char * mystrsep(char ** sptr, const char delim);
// parse into tokens with char delimiter
char * mystrsep2(char ** sptr, const char delim);
// parse into tokens with char delimiter
char * mystrrep(char *, const char *, const char *);
// append s to ends of every lines in text
void strlinecat(char * lines, const char * s);
// tokenize into lines with new line
int line_tok(const char * text, char *** lines);
// tokenize into lines with new line and uniq in place
char * line_uniq(char * text);
// change \n to c in place
char * line_join(char * text, char c);
// leave only last {[^}]*} pattern in string
char * delete_zeros(char * morphout);
// reverse word
void reverseword(char *);
// reverse word
void reverseword_utf(char *);
// character encoding information
struct cs_info {
unsigned char ccase;
unsigned char clower;
unsigned char cupper;
};
// Unicode character encoding information
struct unicode_info {
unsigned short c;
unsigned short cupper;
unsigned short clower;
};
struct unicode_info2 {
char cletter;
unsigned short cupper;
unsigned short clower;
};
struct enc_entry {
const char * enc_name;
struct cs_info * cs_table;
};
// language to encoding default map
struct lang_map {
const char * lang;
const char * def_enc;
int num;
};
struct cs_info * get_current_cs(const char * es);
2006-05-13 16:15:35 +00:00
struct unicode_info * get_utf_cs(bool what = true);
2006-04-05 17:20:12 +00:00
int get_utf_cs_len();
const char * get_default_enc(const char * lang);
int get_lang_num(const char * lang);
// convert null terminated string to all caps using encoding
void enmkallcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all little using encoding
void enmkallsmall(char * d, const char * p, const char * encoding);
// convert null terminated string to have intial capital using encoding
void enmkinitcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all caps
void mkallcap(char * p, const struct cs_info * csconv);
// convert null terminated string to all little
void mkallsmall(char * p, const struct cs_info * csconv);
// convert null terminated string to have intial capital
void mkinitcap(char * p, const struct cs_info * csconv);
// convert first nc characters of UTF-8 string to little
void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv);
#endif