]>
Dogcows Code - chaz/openbox/blob - otk/ustring.cc
1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
13 // The number of bytes to skip to find the next character in the string
14 static const char utf8_skip
[256] = {
15 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
16 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
17 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
18 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
19 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
20 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
21 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
22 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
25 // takes a pointer into a utf8 string and returns a unicode character for the
26 // first character at the pointer
27 unichar
utf8_get_char (const char *p
)
29 unichar result
= static_cast<unsigned char>(*p
);
31 // if its not a 7-bit ascii character
32 if((result
& 0x80) != 0) {
33 // len is the number of bytes this character takes up in the string
34 unsigned char len
= utf8_skip
[result
];
35 result
&= 0x7F >> len
;
39 result
|= static_cast<unsigned char>(*++p
) & 0x3F;
46 // takes a pointer into a string and finds its offset
47 static ustring::size_type
utf8_ptr_to_offset(const char *str
, const char *pos
)
49 ustring::size_type offset
= 0;
52 str
+= utf8_skip
[static_cast<unsigned char>(*str
)];
59 // takes an offset into a string and returns a pointer to it
60 const char *utf8_offset_to_ptr(const char *str
, ustring::size_type offset
)
63 str
+= utf8_skip
[static_cast<unsigned char>(*str
)];
67 // First overload: stop on '\0' character.
68 ustring::size_type
utf8_byte_offset(const char* str
, ustring::size_type offset
)
70 if(offset
== ustring::npos
)
75 for(; offset
!= 0; --offset
)
80 p
+= utf8_skip
[static_cast<unsigned char>(*p
)];
86 // Second overload: stop when reaching maxlen.
87 ustring::size_type
utf8_byte_offset(const char* str
, ustring::size_type offset
,
88 ustring::size_type maxlen
)
90 if(offset
== ustring::npos
)
93 const char *const pend
= str
+ maxlen
;
96 for(; offset
!= 0; --offset
)
101 p
+= utf8_skip
[static_cast<unsigned char>(*p
)];
110 ustring::ustring(bool utf8
)
119 ustring::ustring(const ustring
& other
)
120 : _string(other
._string
), _utf8(other
._utf8
)
124 ustring
& ustring::operator=(const ustring
& other
)
126 _string
= other
._string
;
131 ustring::ustring(const std::string
& src
, bool utf8
)
132 : _string(src
), _utf8(utf8
)
136 ustring::ustring(const char* src
, bool utf8
)
137 : _string(src
), _utf8(utf8
)
141 ustring
& ustring::operator+=(const ustring
& src
)
143 assert(_utf8
== src
._utf8
);
144 _string
+= src
._string
;
148 ustring
& ustring::operator+=(const char* src
)
154 ustring
& ustring::operator+=(char c
)
160 ustring::size_type
ustring::size() const
163 const char *const pdata
= _string
.data();
164 return utf8_ptr_to_offset(pdata
, pdata
+ _string
.size());
166 return _string
.size();
169 ustring::size_type
ustring::bytes() const
171 return _string
.size();
174 ustring::size_type
ustring::capacity() const
176 return _string
.capacity();
179 ustring::size_type
ustring::max_size() const
181 return _string
.max_size();
184 bool ustring::empty() const
186 return _string
.empty();
189 void ustring::clear()
194 ustring
& ustring::erase(ustring::size_type i
, ustring::size_type n
)
197 // find a proper offset
198 size_type utf_i
= utf8_byte_offset(_string
.c_str(), i
);
200 // if the offset is not npos, find a proper length for 'n'
201 size_type utf_n
= utf8_byte_offset(_string
.data() + utf_i
, n
,
202 _string
.size() - utf_i
);
203 _string
.erase(utf_i
, utf_n
);
211 void ustring::resize(ustring::size_type n
, char c
)
214 const size_type size_now
= size();
217 else if(n
> size_now
)
218 _string
.append(n
- size_now
, c
);
220 _string
.resize(n
, c
);
223 ustring::value_type
ustring::operator[](ustring::size_type i
) const
225 return utf8_get_char(utf8_offset_to_ptr(_string
.data(), i
));
228 bool ustring::operator==(const ustring
&other
) const
230 return _string
== other
._string
&& _utf8
== other
._utf8
;
233 bool ustring::operator==(const std::string
&other
) const
235 return _string
== other
;
238 bool ustring::operator==(const char *other
) const
240 return _string
== other
;
243 const char* ustring::data() const
245 return _string
.data();
248 const char* ustring::c_str() const
250 return _string
.c_str();
253 bool ustring::utf8() const
258 void ustring::setUtf8(bool utf8
)
This page took 0.05158 seconds and 4 git commands to generate.