]> Dogcows Code - chaz/openbox/blob - otk/ustring.cc
make an optional bool param for ustring's other constructors
[chaz/openbox] / otk / ustring.cc
1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
2
3 #ifdef HAVE_CONFIG_H
4 # include "../config.h"
5 #endif // HAVE_CONFIG_H
6
7 #include "ustring.hh"
8
9 extern "C" {
10 #include <assert.h>
11 }
12
13 namespace otk {
14
15 // helper functions
16
17 // The number of bytes to skip to find the next character in the string
18 static const char utf8_skip[256] = {
19 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
20 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
21 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
22 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
23 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
24 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
25 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
26 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
27 };
28
29 // takes a pointer into a utf8 string and returns a unicode character for the
30 // first character at the pointer
31 unichar utf8_get_char (const char *p)
32 {
33 unichar result = static_cast<unsigned char>(*p);
34
35 // if its not a 7-bit ascii character
36 if((result & 0x80) != 0) {
37 // len is the number of bytes this character takes up in the string
38 unsigned char len = utf8_skip[result];
39 result &= 0x7F >> len;
40
41 while(--len != 0) {
42 result <<= 6;
43 result |= static_cast<unsigned char>(*++p) & 0x3F;
44 }
45 }
46
47 return result;
48 }
49
50 // takes a pointer into a string and finds its offset
51 static ustring::size_type utf8_ptr_to_offset(const char *str, const char *pos)
52 {
53 ustring::size_type offset = 0;
54
55 while (str < pos) {
56 str += utf8_skip[static_cast<unsigned char>(*str)];
57 offset++;
58 }
59
60 return offset;
61 }
62
63 // takes an offset into a string and returns a pointer to it
64 const char *utf8_offset_to_ptr(const char *str, ustring::size_type offset)
65 {
66 while (offset--)
67 str += utf8_skip[static_cast<unsigned char>(*str)];
68 return str;
69 }
70
71 // First overload: stop on '\0' character.
72 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
73 {
74 if(offset == ustring::npos)
75 return ustring::npos;
76
77 const char* p = str;
78
79 for(; offset != 0; --offset)
80 {
81 if(*p == '\0')
82 return ustring::npos;
83
84 p += utf8_skip[static_cast<unsigned char>(*p)];
85 }
86
87 return (p - str);
88 }
89
90 // Second overload: stop when reaching maxlen.
91 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
92 ustring::size_type maxlen)
93 {
94 if(offset == ustring::npos)
95 return ustring::npos;
96
97 const char *const pend = str + maxlen;
98 const char* p = str;
99
100 for(; offset != 0; --offset)
101 {
102 if(p >= pend)
103 return ustring::npos;
104
105 p += utf8_skip[static_cast<unsigned char>(*p)];
106 }
107
108 return (p - str);
109 }
110
111
112 // ustring methods
113
114 ustring::ustring(bool utf8)
115 : _utf8(utf8)
116 {
117 }
118
119 ustring::~ustring()
120 {
121 }
122
123 ustring::ustring(const ustring& other)
124 : _string(other._string), _utf8(other._utf8)
125 {
126 }
127
128 ustring& ustring::operator=(const ustring& other)
129 {
130 _string = other._string;
131 _utf8 = other._utf8;
132 return *this;
133 }
134
135 ustring::ustring(const std::string& src, bool utf8)
136 : _string(src), _utf8(utf8)
137 {
138 }
139
140 ustring::ustring(const char* src, bool utf8)
141 : _string(src), _utf8(utf8)
142 {
143 }
144
145 ustring& ustring::operator+=(const ustring& src)
146 {
147 assert(_utf8 == src._utf8);
148 _string += src._string;
149 return *this;
150 }
151
152 ustring& ustring::operator+=(const char* src)
153 {
154 _string += src;
155 return *this;
156 }
157
158 ustring& ustring::operator+=(char c)
159 {
160 _string += c;
161 return *this;
162 }
163
164 ustring::size_type ustring::size() const
165 {
166 if (_utf8) {
167 const char *const pdata = _string.data();
168 return utf8_ptr_to_offset(pdata, pdata + _string.size());
169 } else
170 return _string.size();
171 }
172
173 ustring::size_type ustring::bytes() const
174 {
175 return _string.size();
176 }
177
178 ustring::size_type ustring::capacity() const
179 {
180 return _string.capacity();
181 }
182
183 ustring::size_type ustring::max_size() const
184 {
185 return _string.max_size();
186 }
187
188 bool ustring::empty() const
189 {
190 return _string.empty();
191 }
192
193 void ustring::clear()
194 {
195 _string.erase();
196 }
197
198 ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
199 {
200 if (_utf8) {
201 // find a proper offset
202 size_type utf_i = utf8_byte_offset(_string.c_str(), i);
203 if (utf_i != npos) {
204 // if the offset is not npos, find a proper length for 'n'
205 size_type utf_n = utf8_byte_offset(_string.data() + utf_i, n,
206 _string.size() - utf_i);
207 _string.erase(utf_i, utf_n);
208 }
209 } else
210 _string.erase(i, n);
211
212 return *this;
213 }
214
215 void ustring::resize(ustring::size_type n, char c)
216 {
217 if (_utf8) {
218 const size_type size_now = size();
219 if(n < size_now)
220 erase(n, npos);
221 else if(n > size_now)
222 _string.append(n - size_now, c);
223 } else
224 _string.resize(n, c);
225 }
226
227 ustring::value_type ustring::operator[](ustring::size_type i) const
228 {
229 return utf8_get_char(utf8_offset_to_ptr(_string.data(), i));
230 }
231
232 const char* ustring::data() const
233 {
234 return _string.data();
235 }
236
237 const char* ustring::c_str() const
238 {
239 return _string.c_str();
240 }
241
242 bool ustring::utf8() const
243 {
244 return _utf8;
245 }
246
247 void ustring::setUtf8(bool utf8)
248 {
249 _utf8 = utf8;
250 }
251
252 }
This page took 0.053966 seconds and 5 git commands to generate.