Dogcows Code - chaz/openbox/blob - otk/ustring.cc

   1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
   2
   3 #ifdef    HAVE_CONFIG_H
   4 #  include "../config.h"
   5 #endif // HAVE_CONFIG_H
   6
   7 #include "ustring.hh"
   8
   9 extern "C" {
  10 #include <assert.h>
  11 }
  12
  13 namespace otk {
  14
  15 // helper functions
  16
  17 // The number of bytes to skip to find the next character in the string
  18 static const char utf8_skip[256] = {
  19   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  20   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  21   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  22   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  23   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  24   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  25   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  26   3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
  27 };
  28
  29 // takes a pointer into a utf8 string and returns a unicode character for the
  30 // first character at the pointer
  31 unichar utf8_get_char (const char *p)
  32 {
  33   unichar result = static_cast<unsigned char>(*p);
  34
  35   // if its not a 7-bit ascii character
  36   if((result & 0x80) != 0) {
  37     // len is the number of bytes this character takes up in the string
  38     unsigned char len = utf8_skip[result];
  39     result &= 0x7F >> len;
  40
  41     while(--len != 0) {
  42       result <<= 6;
  43       result |= static_cast<unsigned char>(*++p) & 0x3F;
  44     }
  45   }
  46
  47   return result;
  48 }
  49
  50 // takes a pointer into a string and finds its offset
  51 static ustring::size_type utf8_ptr_to_offset(const char *str, const char *pos)
  52 {
  53   ustring::size_type offset = 0;
  54
  55   while (str < pos) {
  56     str += utf8_skip[static_cast<unsigned char>(*str)];
  57     offset++;
  58   }
  59
  60   return offset;
  61 }
  62
  63 // takes an offset into a string and returns a pointer to it
  64 const char *utf8_offset_to_ptr(const char *str, ustring::size_type offset)
  65 {
  66   while (offset--)
  67     str += utf8_skip[static_cast<unsigned char>(*str)];
  68   return str;
  69 }
  70
  71 // First overload: stop on '\0' character.
  72 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
  73 {
  74   if(offset == ustring::npos)
  75     return ustring::npos;
  76
  77   const char* p = str;
  78
  79   for(; offset != 0; --offset)
  80   {
  81     if(*p == '\0')
  82       return ustring::npos;
  83
  84     p += utf8_skip[static_cast<unsigned char>(*p)];
  85   }
  86
  87   return (p - str);
  88 }
  89
  90 // Second overload: stop when reaching maxlen.
  91 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
  92                                     ustring::size_type maxlen)
  93 {
  94   if(offset == ustring::npos)
  95     return ustring::npos;
  96
  97   const char *const pend = str + maxlen;
  98   const char* p = str;
  99
 100   for(; offset != 0; --offset)
 101   {
 102     if(p >= pend)
 103       return ustring::npos;
 104
 105     p += utf8_skip[static_cast<unsigned char>(*p)];
 106   }
 107
 108   return (p - str);
 109 }
 110
 111
 112 // ustring methods
 113
 114 ustring::ustring()
 115 {
 116 }
 117
 118 ustring::~ustring()
 119 {
 120 }
 121
 122 ustring::ustring(const ustring& other)
 123   : _string(other._string), _utf8(other._utf8)
 124 {
 125 }
 126
 127 ustring& ustring::operator=(const ustring& other)
 128 {
 129   _string = other._string;
 130   _utf8 = other._utf8;
 131   return *this;
 132 }
 133
 134 ustring::ustring(const std::string& src)
 135   : _string(src), _utf8(true)
 136 {
 137 }
 138
 139 ustring::ustring(const char* src)
 140   : _string(src), _utf8(true)
 141 {
 142 }
 143
 144 ustring& ustring::operator+=(const ustring& src)
 145 {
 146   assert(_utf8 == src._utf8);
 147   _string += src._string;
 148   return *this;
 149 }
 150
 151 ustring& ustring::operator+=(const char* src)
 152 {
 153   _string += src;
 154   return *this;
 155 }
 156
 157 ustring& ustring::operator+=(char c)
 158 {
 159   _string += c;
 160   return *this;
 161 }
 162
 163 ustring::size_type ustring::size() const
 164 {
 165   if (_utf8) {
 166     const char *const pdata = _string.data();
 167     return utf8_ptr_to_offset(pdata, pdata + _string.size());
 168   } else
 169     return _string.size();
 170 }
 171
 172 ustring::size_type ustring::bytes() const
 173 {
 174   return _string.size();
 175 }
 176
 177 ustring::size_type ustring::capacity() const
 178 {
 179   return _string.capacity();
 180 }
 181
 182 ustring::size_type ustring::max_size() const
 183 {
 184   return _string.max_size();
 185 }
 186
 187 bool ustring::empty() const
 188 {
 189   return _string.empty();
 190 }
 191
 192 void ustring::clear()
 193 {
 194   _string.erase();
 195 }
 196
 197 ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
 198 {
 199   if (_utf8) {
 200     // find a proper offset
 201     size_type utf_i = utf8_byte_offset(_string.c_str(), i);
 202     if (utf_i != npos) {
 203       // if the offset is not npos, find a proper length for 'n'
 204       size_type utf_n = utf8_byte_offset(_string.data() + utf_i, n,
 205                                          _string.size() - utf_i);
 206       _string.erase(utf_i, utf_n);
 207     }
 208   } else
 209     _string.erase(i, n);
 210
 211   return *this;
 212 }
 213
 214 void ustring::resize(ustring::size_type n, char c)
 215 {
 216   if (_utf8) {
 217     const size_type size_now = size();
 218     if(n < size_now)
 219       erase(n, npos);
 220     else if(n > size_now)
 221       _string.append(n - size_now, c);
 222   } else
 223     _string.resize(n, c);
 224 }
 225
 226 ustring::value_type ustring::operator[](ustring::size_type i) const
 227 {
 228   return utf8_get_char(utf8_offset_to_ptr(_string.data(), i));
 229 }
 230
 231 const char* ustring::data() const
 232 {
 233   return _string.data();
 234 }
 235
 236 const char* ustring::c_str() const
 237 {
 238   return _string.c_str();
 239 }
 240
 241 bool ustring::utf8() const
 242 {
 243   return _utf8;
 244 }
 245
 246 void ustring::setUtf8(bool utf8)
 247 {
 248   _utf8 = utf8;
 249 }
 250
 251 }