00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "precomp.h"
00030 #include "string_help.h"
00031 #include "exception.h"
00032 #ifndef WIN32
00033 #include <wchar.h>
00034 #include <wctype.h>
00035 #else
00036 #include <string.h>
00037 #include <tchar.h>
00038 #endif
00039
00041
00042
00043 std::vector<CL_String> CL_StringHelp::split_text(const CL_String &text, const CL_String &split_string)
00044 {
00045 std::vector<CL_String> result;
00046 CL_String::size_type end_pos = 0, begin_pos = 0;
00047 while (true)
00048 {
00049 end_pos = text.find(split_string, begin_pos);
00050 if (end_pos == CL_String::npos)
00051 {
00052 result.push_back(text.substr(begin_pos));
00053 break;
00054 }
00055 else
00056 {
00057 result.push_back(text.substr(begin_pos, end_pos-begin_pos));
00058 begin_pos = end_pos + split_string.length();
00059 }
00060 }
00061 return result;
00062 }
00063
00064 int CL_StringHelp::compare(const CL_StringA &a, const CL_StringA &b, bool case_insensitive)
00065 {
00066 #ifdef WIN32
00067 if (case_insensitive)
00068 return stricmp(a.c_str(), b.c_str());
00069 else
00070 return strcmp(a.c_str(), b.c_str());
00071 #else
00072 if (case_insensitive)
00073 return strcasecmp(a.c_str(), b.c_str());
00074 else
00075 return strcmp(a.c_str(), b.c_str());
00076 #endif
00077 }
00078
00079 int CL_StringHelp::compare(const CL_StringW &a, const CL_StringW &b, bool case_insensitive)
00080 {
00081 #ifdef WIN32
00082 if (case_insensitive)
00083 return wcsicmp(a.c_str(), b.c_str());
00084 else
00085 return wcscmp(a.c_str(), b.c_str());
00086 #else
00087 if (case_insensitive)
00088 return wcscasecmp(a.c_str(), b.c_str());
00089 else
00090 return wcscmp(a.c_str(), b.c_str());
00091 #endif
00092 }
00093
00094 CL_String CL_StringHelp::text_to_upper(const CL_String &s)
00095 {
00096 #ifdef UNICODE
00097 return ucs2_to_upper(s);
00098 #else
00099 return local8_to_upper(s);
00100 #endif
00101 }
00102
00103 CL_StringA CL_StringHelp::local8_to_upper(const CL_StringA &s)
00104 {
00105 CL_StringA result = s;
00106 CL_StringA::size_type index, size;
00107 size = result.length();
00108 for (index = 0; index < size; index++)
00109 {
00110 result[index] = (unsigned char) toupper((unsigned char) result[index]);
00111 }
00112 return result;
00113 }
00114
00115 CL_StringW CL_StringHelp::ucs2_to_upper(const CL_StringW &s)
00116 {
00117 CL_StringW result = s;
00118 CL_StringW::size_type index, size;
00119 size = result.length();
00120 for (index = 0; index < size; index++)
00121 {
00122 result[index] = towupper(result[index]);
00123 }
00124 return result;
00125 }
00126
00127 CL_String CL_StringHelp::text_to_lower(const CL_String &s)
00128 {
00129 #ifdef UNICODE
00130 return ucs2_to_lower(s);
00131 #else
00132 return local8_to_lower(s);
00133 #endif
00134 }
00135
00136 CL_StringA CL_StringHelp::local8_to_lower(const CL_StringA &s)
00137 {
00138 CL_StringA result = s;
00139 CL_StringA::size_type index, size;
00140 size = result.length();
00141 for (index = 0; index < size; index++)
00142 {
00143 result[index] = (unsigned char) tolower((unsigned char) result[index]);
00144 }
00145 return result;
00146 }
00147
00148 CL_StringW CL_StringHelp::ucs2_to_lower(const CL_StringW &s)
00149 {
00150 CL_StringW result = s;
00151 CL_StringW::size_type index, size;
00152 size = result.length();
00153 for (index = 0; index < size; index++)
00154 {
00155 result[index] = towlower(result[index]);
00156 }
00157 return result;
00158 }
00159
00160 CL_String CL_StringHelp::float_to_text(float value)
00161 {
00162 #ifdef UNICODE
00163 return float_to_ucs2(value);
00164 #else
00165 return float_to_local8(value);
00166 #endif
00167 }
00168
00169 CL_StringA CL_StringHelp::float_to_local8(float value)
00170 {
00171 char buf[64];
00172 memset(buf, 0, 64);
00173 #ifdef WIN32
00174 _snprintf(buf, 63, "%f", value);
00175 #else
00176 snprintf(buf, 63, "%f", value);
00177 #endif
00178 return CL_StringA(buf);
00179 }
00180
00181 CL_StringW CL_StringHelp::float_to_ucs2(float value)
00182 {
00183 #ifdef WIN32
00184 WCHAR buf[64];
00185 memset(buf, 0, 64 * sizeof(WCHAR));
00186 swprintf(buf, L"%f", value);
00187 return CL_StringW(buf);
00188 #else
00189 wchar_t buf[64];
00190 memset(buf, 0, 64 * sizeof(wchar_t));
00191 swprintf(buf, 63, L"%f", value);
00192 return CL_StringW(buf);
00193 #endif
00194 }
00195
00196 float CL_StringHelp::text_to_float(const CL_String &value)
00197 {
00198 #ifdef UNICODE
00199 return ucs2_to_float(value);
00200 #else
00201 return local8_to_float(value);
00202 #endif
00203 }
00204
00205 float CL_StringHelp::local8_to_float(const CL_StringA &value)
00206 {
00207 return 0.0f;
00208 }
00209
00210 float CL_StringHelp::ucs2_to_float(const CL_StringW &value)
00211 {
00212 return 0.0f;
00213 }
00214
00215 CL_String CL_StringHelp::double_to_text(double value)
00216 {
00217 #ifdef UNICODE
00218 return double_to_ucs2(value);
00219 #else
00220 return double_to_local8(value);
00221 #endif
00222 }
00223
00224 CL_StringA CL_StringHelp::double_to_local8(double value)
00225 {
00226 char buf[64];
00227 memset(buf, 0, 64);
00228 #ifdef WIN32
00229 _snprintf(buf, 63, "%#f", value);
00230 #else
00231 snprintf(buf, 63, "%#f", value);
00232 #endif
00233 return CL_StringA(buf);
00234 }
00235
00236 CL_StringW CL_StringHelp::double_to_ucs2(double value)
00237 {
00238 #ifdef WIN32
00239 WCHAR buf[64];
00240 memset(buf, 0, 64 * sizeof(WCHAR));
00241 swprintf(buf, L"%#f", value);
00242 return CL_StringW(buf);
00243 #else
00244 wchar_t buf[64];
00245 memset(buf, 0, 64 * sizeof(wchar_t));
00246 swprintf(buf, 63, L"%#f", value);
00247 return CL_StringW(buf);
00248 #endif
00249 }
00250
00251 double CL_StringHelp::text_to_double(const CL_String &value)
00252 {
00253 #ifdef UNICODE
00254 return ucs2_to_double(value);
00255 #else
00256 return local8_to_double(value);
00257 #endif
00258 }
00259
00260 double CL_StringHelp::local8_to_double(const CL_StringA &value)
00261 {
00262 return 0.0;
00263 }
00264
00265 double CL_StringHelp::ucs2_to_double(const CL_StringW &value)
00266 {
00267 return 0.0;
00268 }
00269
00270 CL_String CL_StringHelp::int_to_text(int value)
00271 {
00272 #ifdef UNICODE
00273 return int_to_ucs2(value);
00274 #else
00275 return int_to_local8(value);
00276 #endif
00277 }
00278
00279 CL_StringA CL_StringHelp::int_to_local8(int value)
00280 {
00281 char buf[32];
00282 memset(buf, 0, 32);
00283 #ifdef WIN32
00284 _snprintf(buf, 31, "%d", value);
00285 #else
00286 snprintf(buf, 31, "%d", value);
00287 #endif
00288 return CL_StringA(buf);
00289 }
00290
00291 CL_StringW CL_StringHelp::int_to_ucs2(int value)
00292 {
00293 #ifdef WIN32
00294 WCHAR buf[32];
00295 memset(buf, 0, 32 * sizeof(WCHAR));
00296 swprintf(buf, L"%d", value);
00297 return CL_StringW(buf);
00298 #else
00299 wchar_t buf[32];
00300 memset(buf, 0, 32 * sizeof(wchar_t));
00301 swprintf(buf, 31, L"%d", value);
00302 return CL_StringW(buf);
00303 #endif
00304 }
00305
00306 int CL_StringHelp::text_to_int(const CL_String &value)
00307 {
00308 #ifdef UNICODE
00309 return ucs2_to_int(value);
00310 #else
00311 return local8_to_int(value);
00312 #endif
00313 }
00314
00315 int CL_StringHelp::local8_to_int(const CL_StringA &value)
00316 {
00317 return atoi(value.c_str());
00318 }
00319
00320 int CL_StringHelp::ucs2_to_int(const CL_StringW &value)
00321 {
00322 #ifdef WIN32
00323 return _wtoi(value.c_str());
00324 #else
00325 throw CL_Exception(TEXT("ucs2_to_int not implemented on unix yet"));
00326 return 0;
00327 #endif
00328 }
00329
00330 CL_StringA CL_StringHelp::text_to_local8(const CL_String &text)
00331 {
00332 #ifdef UNICODE
00333 return ucs2_to_local8(text);
00334 #else
00335 return text;
00336 #endif
00337 }
00338
00339 CL_StringA CL_StringHelp::text_to_utf8(const CL_String &text)
00340 {
00341 #ifdef UNICODE
00342 return ucs2_to_utf8(text);
00343 #else
00344 CL_StringW ucs2 = local8_to_ucs2(text);
00345 return ucs2_to_utf8(ucs2);
00346 #endif
00347 }
00348
00349 CL_StringA CL_StringHelp::ucs2_to_latin1(const CL_StringW &ucs2)
00350 {
00351 CL_StringA::size_type i, length = ucs2.length();
00352 CL_StringA latin1(length, ' ');
00353 for (i=0; i<length; i++)
00354 latin1[i] = (char) ucs2[i];
00355 return latin1;
00356 }
00357
00358 CL_StringA CL_StringHelp::ucs2_to_latin9(const CL_StringW &ucs2)
00359 {
00360 CL_StringA::size_type i, length = ucs2.length();
00361 CL_StringA latin1(length, ' ');
00362 for (i=0; i<length; i++)
00363 latin1[i] = (ucs2[i] != 0x20ac) ? ucs2[i] : 0xa4;
00364 return latin1;
00365 }
00366
00367 CL_StringA CL_StringHelp::ucs2_to_local8(const CL_StringW &ucs2)
00368 {
00369 return ucs2_to_latin9(ucs2);
00370 }
00371
00372 CL_StringA CL_StringHelp::ucs2_to_utf8(const CL_StringW &ucs2)
00373 {
00374
00375
00376 CL_StringW::size_type length_ucs2 = ucs2.length();
00377 CL_StringA::size_type length_utf8 = 0;
00378 CL_StringW::size_type pos;
00379 for (pos = 0; pos < length_ucs2; pos++)
00380 {
00381 if (ucs2[pos] < 0x0080)
00382 length_utf8++;
00383 else if (ucs2[pos] < 0x0800)
00384 length_utf8 += 2;
00385 else
00386 length_utf8 += 3;
00387 }
00388
00389
00390
00391 CL_StringA utf8(length_utf8, ' ');
00392 CL_StringA::size_type pos_utf8 = 0;
00393 for (pos = 0; pos < length_ucs2; pos++)
00394 {
00395 if (ucs2[pos] < 0x0080)
00396 {
00397 utf8[pos_utf8++] = (char) ucs2[pos];
00398 }
00399 else if (ucs2[pos] < 0x0800)
00400 {
00401 utf8[pos_utf8++] = 0xc0 + (ucs2[pos] >> 6);
00402 utf8[pos_utf8++] = (ucs2[pos] & 0x3f);
00403 }
00404 else
00405 {
00406 utf8[pos_utf8++] = 0xe0 + (ucs2[pos] >> 12);
00407 utf8[pos_utf8++] = ((ucs2[pos] >> 6) & 0x3f);
00408 utf8[pos_utf8++] = (ucs2[pos] & 0x3f);
00409 }
00410 }
00411
00412 return utf8;
00413 }
00414
00415 CL_StringW CL_StringHelp::latin1_to_ucs2(const CL_StringA &latin1)
00416 {
00417 CL_StringW::size_type i, length = latin1.length();
00418 CL_StringW ucs2(length, ' ');
00419 for (i=0; i<length; i++)
00420 ucs2[i] = latin1[i];
00421 return ucs2;
00422 }
00423
00424 CL_StringW CL_StringHelp::latin9_to_ucs2(const CL_StringA &latin9)
00425 {
00426 CL_StringW::size_type i, length = latin9.length();
00427 CL_StringW ucs2(length, ' ');
00428 for (i=0; i<length; i++)
00429 ucs2[i] = ((unsigned char) latin9[i] != 0xa4) ? latin9[i] : 0x20ac;
00430 return ucs2;
00431 }
00432
00433 CL_String CL_StringHelp::local8_to_text(const CL_StringA &local8)
00434 {
00435 #ifdef UNICODE
00436 return local8_to_ucs2(local8);
00437 #else
00438 return local8;
00439 #endif
00440 }
00441
00442 CL_String CL_StringHelp::ucs2_to_text(const CL_StringW &ucs2)
00443 {
00444 #ifdef UNICODE
00445 return ucs2;
00446 #else
00447 return ucs2_to_local8(ucs2);
00448 #endif
00449 }
00450
00451 CL_StringW CL_StringHelp::local8_to_ucs2(const CL_StringA &local8)
00452 {
00453 return latin9_to_ucs2(local8);
00454 }
00455
00456 CL_StringW CL_StringHelp::utf8_to_ucs2(const CL_StringA &utf8)
00457 {
00458
00459
00460 CL_StringW::size_type length_ucs2 = 0;
00461 CL_StringA::size_type length_utf8 = utf8.length();
00462 CL_StringA::size_type pos = 0;
00463 while (pos < length_utf8)
00464 {
00465 unsigned char c = utf8[pos++];
00466 int trailing_bytes = trailing_bytes_for_utf8[c];
00467 length_ucs2++;
00468 pos += trailing_bytes;
00469 }
00470 if (pos != length_utf8)
00471 {
00472
00473 length_ucs2--;
00474 }
00475 if (pos <= 0)
00476 return CL_StringW();
00477
00478
00479
00480 CL_StringW ucs2(length_ucs2, L'?');
00481 pos = 0;
00482 CL_StringW::size_type ucs2_pos = 0;
00483 while (pos < length_utf8 && ucs2_pos < length_ucs2)
00484 {
00485 unsigned char c = utf8[pos++];
00486 int trailing_bytes = trailing_bytes_for_utf8[c];
00487 if (trailing_bytes == 0)
00488 {
00489 ucs2[ucs2_pos] = c;
00490 }
00491 else
00492 {
00493 unsigned int ucs4 = (c & bitmask_leadbyte_for_utf8[trailing_bytes]);
00494 for (int i=1; i<=trailing_bytes; i++)
00495 {
00496 c = (unsigned char) utf8[pos+i];
00497 if (c < 0xc0)
00498 {
00499 ucs4 = (ucs4 << 6) + (c & 0x3f);
00500 }
00501 else
00502 {
00503
00504 ucs4 = 0;
00505 break;
00506 }
00507 }
00508 if (ucs4 > 0 && ucs4 <= 0xffff)
00509 ucs2[ucs2_pos] = ucs4;
00510 else
00511 ucs2[ucs2_pos] = L'?';
00512 }
00513 ucs2_pos++;
00514 pos += trailing_bytes;
00515 }
00516 return ucs2;
00517 }
00518
00519 CL_String CL_StringHelp::utf8_to_text(const CL_StringA &utf8)
00520 {
00521 #ifdef UNICODE
00522 return utf8_to_ucs2(utf8);
00523 #else
00524 CL_StringW ucs2 = utf8_to_ucs2(utf8);
00525 return ucs2_to_local8(ucs2);
00526 #endif
00527 }
00528
00530
00531
00532 const char CL_StringHelp::trailing_bytes_for_utf8[256] =
00533 {
00534 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00535 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00536 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00537 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00538 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00539 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00540 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00541 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
00542 };
00543
00544 const unsigned char CL_StringHelp::bitmask_leadbyte_for_utf8[6] =
00545 {
00546 0x7f,
00547 0x1f,
00548 0x0f,
00549 0x07,
00550 0x03,
00551 0x01
00552 };