Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

string_help.cpp

Go to the documentation of this file.
00001 /*
00002 **  ClanLib SDK
00003 **  Copyright (c) 1997-2005 The ClanLib Team
00004 **
00005 **  This software is provided 'as-is', without any express or implied
00006 **  warranty.  In no event will the authors be held liable for any damages
00007 **  arising from the use of this software.
00008 **
00009 **  Permission is granted to anyone to use this software for any purpose,
00010 **  including commercial applications, and to alter it and redistribute it
00011 **  freely, subject to the following restrictions:
00012 **
00013 **  1. The origin of this software must not be misrepresented; you must not
00014 **     claim that you wrote the original software. If you use this software
00015 **     in a product, an acknowledgment in the product documentation would be
00016 **     appreciated but is not required.
00017 **  2. Altered source versions must be plainly marked as such, and must not be
00018 **     misrepresented as being the original software.
00019 **  3. This notice may not be removed or altered from any source distribution.
00020 **
00021 **  Note: Some of the libraries ClanLib link to may have additional
00022 **  requirements or restrictions.
00023 **
00024 **  File Author(s):
00025 **
00026 **    Magnus Norddahl
00027 */
00028 
00029 #include "precomp.h"
00030 #include "string_help.h"
00031 #include "exception.h"
00032 #ifndef WIN32
00033 #include <wchar.h>
00034 #include <wctype.h>
00035 #else
00036 #include <string.h>
00037 #include <tchar.h>
00038 #endif
00039 
00041 // CL_StringHelp Operations:
00042 
00043 std::vector<CL_String> CL_StringHelp::split_text(const CL_String &text, const CL_String &split_string)
00044 {
00045         std::vector<CL_String> result;
00046         CL_String::size_type end_pos = 0, begin_pos = 0;
00047         while (true)
00048         {
00049                 end_pos = text.find(split_string, begin_pos);
00050                 if (end_pos == CL_String::npos)
00051                 {
00052                         result.push_back(text.substr(begin_pos));
00053                         break;
00054                 }
00055                 else
00056                 {
00057                         result.push_back(text.substr(begin_pos, end_pos-begin_pos));
00058                         begin_pos = end_pos + split_string.length();
00059                 }
00060         }
00061         return result;
00062 }
00063 
00064 int CL_StringHelp::compare(const CL_StringA &a, const CL_StringA &b, bool case_insensitive)
00065 {
00066 #ifdef WIN32
00067         if (case_insensitive)
00068                 return stricmp(a.c_str(), b.c_str());
00069         else
00070                 return strcmp(a.c_str(), b.c_str());
00071 #else
00072         if (case_insensitive)
00073                 return strcasecmp(a.c_str(), b.c_str());
00074         else
00075                 return strcmp(a.c_str(), b.c_str());
00076 #endif
00077 }
00078 
00079 int CL_StringHelp::compare(const CL_StringW &a, const CL_StringW &b, bool case_insensitive)
00080 {
00081 #ifdef WIN32
00082         if (case_insensitive)
00083                 return wcsicmp(a.c_str(), b.c_str());
00084         else
00085                 return wcscmp(a.c_str(), b.c_str());
00086 #else
00087         if (case_insensitive)
00088                 return wcscasecmp(a.c_str(), b.c_str());
00089         else
00090                 return wcscmp(a.c_str(), b.c_str());
00091 #endif
00092 }
00093 
00094 CL_String CL_StringHelp::text_to_upper(const CL_String &s)
00095 {
00096 #ifdef UNICODE
00097         return ucs2_to_upper(s);
00098 #else
00099         return local8_to_upper(s);
00100 #endif
00101 }
00102         
00103 CL_StringA CL_StringHelp::local8_to_upper(const CL_StringA &s)
00104 {
00105         CL_StringA result = s;
00106         CL_StringA::size_type index, size;
00107         size = result.length();
00108         for (index = 0; index < size; index++)
00109         {
00110                 result[index] = (unsigned char) toupper((unsigned char) result[index]);
00111         }
00112         return result;
00113 }
00114 
00115 CL_StringW CL_StringHelp::ucs2_to_upper(const CL_StringW &s)
00116 {
00117         CL_StringW result = s;
00118         CL_StringW::size_type index, size;
00119         size = result.length();
00120         for (index = 0; index < size; index++)
00121         {
00122                 result[index] = towupper(result[index]);
00123         }
00124         return result;
00125 }
00126         
00127 CL_String CL_StringHelp::text_to_lower(const CL_String &s)
00128 {
00129 #ifdef UNICODE
00130         return ucs2_to_lower(s);
00131 #else
00132         return local8_to_lower(s);
00133 #endif
00134 }
00135         
00136 CL_StringA CL_StringHelp::local8_to_lower(const CL_StringA &s)
00137 {
00138         CL_StringA result = s;
00139         CL_StringA::size_type index, size;
00140         size = result.length();
00141         for (index = 0; index < size; index++)
00142         {
00143                 result[index] = (unsigned char) tolower((unsigned char) result[index]);
00144         }
00145         return result;
00146 }
00147         
00148 CL_StringW CL_StringHelp::ucs2_to_lower(const CL_StringW &s)
00149 {
00150         CL_StringW result = s;
00151         CL_StringW::size_type index, size;
00152         size = result.length();
00153         for (index = 0; index < size; index++)
00154         {
00155                 result[index] = towlower(result[index]);
00156         }
00157         return result;
00158 }
00159 
00160 CL_String CL_StringHelp::float_to_text(float value)
00161 {
00162 #ifdef UNICODE
00163         return float_to_ucs2(value);
00164 #else
00165         return float_to_local8(value);
00166 #endif
00167 }
00168 
00169 CL_StringA CL_StringHelp::float_to_local8(float value)
00170 {
00171         char buf[64];
00172         memset(buf, 0, 64);
00173 #ifdef WIN32
00174         _snprintf(buf, 63, "%f", value);
00175 #else
00176         snprintf(buf, 63, "%f", value);
00177 #endif
00178         return CL_StringA(buf);
00179 }
00180         
00181 CL_StringW CL_StringHelp::float_to_ucs2(float value)
00182 {
00183 #ifdef WIN32
00184         WCHAR buf[64];
00185         memset(buf, 0, 64 * sizeof(WCHAR));
00186         swprintf(buf, L"%f", value);
00187         return CL_StringW(buf);
00188 #else
00189         wchar_t buf[64];
00190         memset(buf, 0, 64 * sizeof(wchar_t));
00191         swprintf(buf, 63, L"%f", value);
00192         return CL_StringW(buf);
00193 #endif
00194 }
00195 
00196 float CL_StringHelp::text_to_float(const CL_String &value)
00197 {
00198 #ifdef UNICODE
00199         return ucs2_to_float(value);
00200 #else
00201         return local8_to_float(value);
00202 #endif
00203 }
00204         
00205 float CL_StringHelp::local8_to_float(const CL_StringA &value)
00206 {
00207         return 0.0f;
00208 }
00209         
00210 float CL_StringHelp::ucs2_to_float(const CL_StringW &value)
00211 {
00212         return 0.0f;
00213 }
00214 
00215 CL_String CL_StringHelp::double_to_text(double value)
00216 {
00217 #ifdef UNICODE
00218         return double_to_ucs2(value);
00219 #else
00220         return double_to_local8(value);
00221 #endif
00222 }
00223 
00224 CL_StringA CL_StringHelp::double_to_local8(double value)
00225 {
00226         char buf[64];
00227         memset(buf, 0, 64);
00228 #ifdef WIN32
00229         _snprintf(buf, 63, "%#f", value);
00230 #else
00231         snprintf(buf, 63, "%#f", value);
00232 #endif
00233         return CL_StringA(buf);
00234 }
00235         
00236 CL_StringW CL_StringHelp::double_to_ucs2(double value)
00237 {
00238 #ifdef WIN32
00239         WCHAR buf[64];
00240         memset(buf, 0, 64 * sizeof(WCHAR));
00241         swprintf(buf, L"%#f", value);
00242         return CL_StringW(buf);
00243 #else
00244         wchar_t buf[64];
00245         memset(buf, 0, 64 * sizeof(wchar_t));
00246         swprintf(buf, 63, L"%#f", value);
00247         return CL_StringW(buf);
00248 #endif
00249 }
00250         
00251 double CL_StringHelp::text_to_double(const CL_String &value)
00252 {
00253 #ifdef UNICODE
00254         return ucs2_to_double(value);
00255 #else
00256         return local8_to_double(value);
00257 #endif
00258 }
00259         
00260 double CL_StringHelp::local8_to_double(const CL_StringA &value)
00261 {
00262         return 0.0;
00263 }
00264         
00265 double CL_StringHelp::ucs2_to_double(const CL_StringW &value)
00266 {
00267         return 0.0;
00268 }
00269 
00270 CL_String CL_StringHelp::int_to_text(int value)
00271 {
00272 #ifdef UNICODE
00273         return int_to_ucs2(value);
00274 #else
00275         return int_to_local8(value);
00276 #endif
00277 }
00278 
00279 CL_StringA CL_StringHelp::int_to_local8(int value)
00280 {
00281         char buf[32];
00282         memset(buf, 0, 32);
00283 #ifdef WIN32
00284         _snprintf(buf, 31, "%d", value);
00285 #else
00286         snprintf(buf, 31, "%d", value);
00287 #endif
00288         return CL_StringA(buf);
00289 }
00290         
00291 CL_StringW CL_StringHelp::int_to_ucs2(int value)
00292 {
00293 #ifdef WIN32
00294         WCHAR buf[32];
00295         memset(buf, 0, 32 * sizeof(WCHAR));
00296         swprintf(buf, L"%d", value);
00297         return CL_StringW(buf);
00298 #else
00299         wchar_t buf[32];
00300         memset(buf, 0, 32 * sizeof(wchar_t));
00301         swprintf(buf, 31, L"%d", value);
00302         return CL_StringW(buf);
00303 #endif
00304 }
00305 
00306 int CL_StringHelp::text_to_int(const CL_String &value)
00307 {
00308 #ifdef UNICODE
00309         return ucs2_to_int(value);
00310 #else
00311         return local8_to_int(value);
00312 #endif
00313 }
00314         
00315 int CL_StringHelp::local8_to_int(const CL_StringA &value)
00316 {
00317         return atoi(value.c_str());
00318 }
00319         
00320 int CL_StringHelp::ucs2_to_int(const CL_StringW &value)
00321 {
00322 #ifdef WIN32
00323         return _wtoi(value.c_str());
00324 #else
00325         throw CL_Exception(TEXT("ucs2_to_int not implemented on unix yet"));
00326         return 0;
00327 #endif
00328 }
00329 
00330 CL_StringA CL_StringHelp::text_to_local8(const CL_String &text)
00331 {
00332 #ifdef UNICODE
00333         return ucs2_to_local8(text);
00334 #else
00335         return text;
00336 #endif
00337 }
00338 
00339 CL_StringA CL_StringHelp::text_to_utf8(const CL_String &text)
00340 {
00341 #ifdef UNICODE
00342         return ucs2_to_utf8(text);
00343 #else
00344         CL_StringW ucs2 = local8_to_ucs2(text);
00345         return ucs2_to_utf8(ucs2);
00346 #endif
00347 }
00348 
00349 CL_StringA CL_StringHelp::ucs2_to_latin1(const CL_StringW &ucs2)
00350 {
00351         CL_StringA::size_type i, length = ucs2.length();
00352         CL_StringA latin1(length, ' ');
00353         for (i=0; i<length; i++)
00354                 latin1[i] = (char) ucs2[i];
00355         return latin1;
00356 }
00357 
00358 CL_StringA CL_StringHelp::ucs2_to_latin9(const CL_StringW &ucs2)
00359 {
00360         CL_StringA::size_type i, length = ucs2.length();
00361         CL_StringA latin1(length, ' ');
00362         for (i=0; i<length; i++)
00363                 latin1[i] = (ucs2[i] != 0x20ac) ? ucs2[i] : 0xa4;
00364         return latin1;
00365 }
00366 
00367 CL_StringA CL_StringHelp::ucs2_to_local8(const CL_StringW &ucs2)
00368 {
00369         return ucs2_to_latin9(ucs2);
00370 }
00371 
00372 CL_StringA CL_StringHelp::ucs2_to_utf8(const CL_StringW &ucs2)
00373 {
00374         // Calculate length:
00375 
00376         CL_StringW::size_type length_ucs2 = ucs2.length();
00377         CL_StringA::size_type length_utf8 = 0;
00378         CL_StringW::size_type pos;
00379         for (pos = 0; pos < length_ucs2; pos++)
00380         {
00381                 if (ucs2[pos] < 0x0080)
00382                         length_utf8++;
00383                 else if (ucs2[pos] < 0x0800)
00384                         length_utf8 += 2;
00385                 else
00386                         length_utf8 += 3;
00387         }
00388         
00389         // Perform conversion:
00390         
00391         CL_StringA utf8(length_utf8, ' ');
00392         CL_StringA::size_type pos_utf8 = 0;
00393         for (pos = 0; pos < length_ucs2; pos++)
00394         {
00395                 if (ucs2[pos] < 0x0080)
00396                 {
00397                         utf8[pos_utf8++] = (char) ucs2[pos];
00398                 }
00399                 else if (ucs2[pos] < 0x0800)
00400                 {
00401                         utf8[pos_utf8++] = 0xc0 + (ucs2[pos] >> 6);
00402                         utf8[pos_utf8++] = (ucs2[pos] & 0x3f);
00403                 }
00404                 else
00405                 {
00406                         utf8[pos_utf8++] = 0xe0 + (ucs2[pos] >> 12);
00407                         utf8[pos_utf8++] = ((ucs2[pos] >> 6) & 0x3f);
00408                         utf8[pos_utf8++] = (ucs2[pos] & 0x3f);
00409                 }
00410         }
00411 
00412         return utf8;
00413 }
00414 
00415 CL_StringW CL_StringHelp::latin1_to_ucs2(const CL_StringA &latin1)
00416 {
00417         CL_StringW::size_type i, length = latin1.length();
00418         CL_StringW ucs2(length, ' ');
00419         for (i=0; i<length; i++)
00420                 ucs2[i] = latin1[i];
00421         return ucs2;
00422 }
00423 
00424 CL_StringW CL_StringHelp::latin9_to_ucs2(const CL_StringA &latin9)
00425 {
00426         CL_StringW::size_type i, length = latin9.length();
00427         CL_StringW ucs2(length, ' ');
00428         for (i=0; i<length; i++)
00429                 ucs2[i] = ((unsigned char) latin9[i] != 0xa4) ? latin9[i] : 0x20ac;
00430         return ucs2;
00431 }
00432 
00433 CL_String CL_StringHelp::local8_to_text(const CL_StringA &local8)
00434 {
00435 #ifdef UNICODE
00436         return local8_to_ucs2(local8);
00437 #else
00438         return local8;
00439 #endif
00440 }
00441 
00442 CL_String CL_StringHelp::ucs2_to_text(const CL_StringW &ucs2)
00443 {
00444 #ifdef UNICODE
00445         return ucs2;
00446 #else
00447         return ucs2_to_local8(ucs2);
00448 #endif
00449 }
00450 
00451 CL_StringW CL_StringHelp::local8_to_ucs2(const CL_StringA &local8)
00452 {
00453         return latin9_to_ucs2(local8);
00454 }
00455 
00456 CL_StringW CL_StringHelp::utf8_to_ucs2(const CL_StringA &utf8)
00457 {
00458         // Calculate length:
00459 
00460         CL_StringW::size_type length_ucs2 = 0;
00461         CL_StringA::size_type length_utf8 = utf8.length();
00462         CL_StringA::size_type pos = 0;
00463         while (pos < length_utf8)
00464         {
00465                 unsigned char c = utf8[pos++];
00466                 int trailing_bytes = trailing_bytes_for_utf8[c];
00467                 length_ucs2++;
00468                 pos += trailing_bytes;
00469         }
00470         if (pos != length_utf8)
00471         {
00472                 // error in utf8 string
00473                 length_ucs2--;
00474         }
00475         if (pos <= 0)
00476                 return CL_StringW();
00477 
00478         // Perform conversion:
00479         
00480         CL_StringW ucs2(length_ucs2, L'?');
00481         pos = 0;
00482         CL_StringW::size_type ucs2_pos = 0;
00483         while (pos < length_utf8 && ucs2_pos < length_ucs2)
00484         {
00485                 unsigned char c = utf8[pos++];
00486                 int trailing_bytes = trailing_bytes_for_utf8[c];
00487                 if (trailing_bytes == 0)
00488                 {
00489                         ucs2[ucs2_pos] = c;
00490                 }
00491                 else
00492                 {
00493                         unsigned int ucs4 = (c & bitmask_leadbyte_for_utf8[trailing_bytes]);
00494                         for (int i=1; i<=trailing_bytes; i++)
00495                         {
00496                                 c = (unsigned char) utf8[pos+i];
00497                                 if (c < 0xc0)
00498                                 {
00499                                         ucs4 = (ucs4 << 6) + (c & 0x3f);
00500                                 }
00501                                 else
00502                                 {
00503                                         // error in utf8 string
00504                                         ucs4 = 0;
00505                                         break;
00506                                 }
00507                         }
00508                         if (ucs4 > 0 && ucs4 <= 0xffff)
00509                                 ucs2[ucs2_pos] = ucs4;
00510                         else
00511                                 ucs2[ucs2_pos] = L'?';
00512                 }
00513                 ucs2_pos++;
00514                 pos += trailing_bytes;
00515         }
00516         return ucs2;
00517 }
00518 
00519 CL_String CL_StringHelp::utf8_to_text(const CL_StringA &utf8)
00520 {
00521 #ifdef UNICODE
00522         return utf8_to_ucs2(utf8);
00523 #else
00524         CL_StringW ucs2 = utf8_to_ucs2(utf8);
00525         return ucs2_to_local8(ucs2);
00526 #endif
00527 }
00528 
00530 // CL_StringHelp Implementation:
00531 
00532 const char CL_StringHelp::trailing_bytes_for_utf8[256] =
00533 {
00534         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00535         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00536         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00537         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00538         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00539         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00540         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00541         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
00542 };
00543 
00544 const unsigned char CL_StringHelp::bitmask_leadbyte_for_utf8[6] =
00545 {
00546         0x7f,
00547         0x1f,
00548         0x0f,
00549         0x07,
00550         0x03,
00551         0x01
00552 };

Generated on Sat Feb 19 22:51:16 2005 for npcore by  doxygen 1.4.1