]> Dogcows Code - chaz/openbox/blob - otk/ustring.hh
03f893d309333ea34b69203cbff93a9ae7d0c701
[chaz/openbox] / otk / ustring.hh
1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
2 #ifndef __ustring_hh
3 #define __ustring_hh
4
5 /*! @file ustring.hh
6 @brief Provides a simple UTF-8 encoded string
7 */
8
9 extern "C" {
10 #ifdef HAVE_STDINT_H
11 # include <stdint.h>
12 #else
13 # ifdef HAVE_SYS_TYPES_H
14 # include <sys/types.h>
15 # endif
16 #endif
17 }
18
19 #include <string>
20
21 namespace otk {
22
23 #ifdef HAVE_STDINT_H
24 typedef uint32_t unichar;
25 #else
26 typedef u_int32_t unichar;
27 #endif
28
29 #ifndef DOXYGEN_IGNORE
30
31 //! The number of bytes to skip to find the next character in the string
32 const char g_utf8_skip[256] = {
33 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
34 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
36 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
37 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
38 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
39 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
40 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
41 };
42
43 //! The iterator type for ustring
44 /*!
45 Note this is not a random access iterator but a bidirectional one, since all
46 index operations need to iterate over the UTF-8 data. Use std::advance() to
47 move to a certain position.
48 <p>
49 A writeable iterator isn't provided because: The number of bytes of the old
50 UTF-8 character and the new one to write could be different. Therefore, any
51 write operation would invalidate all other iterators pointing into the same
52 string.
53 */
54 template <class T>
55 class ustring_Iterator
56 {
57 public:
58 typedef std::bidirectional_iterator_tag iterator_category;
59 typedef unichar value_type;
60 typedef std::string::difference_type difference_type;
61 typedef value_type reference;
62 typedef void pointer;
63
64 inline ustring_Iterator() {}
65 inline ustring_Iterator(const ustring_Iterator<std::string::iterator>&
66 other) : _pos(other.base()) {}
67
68 inline value_type operator*() const {
69 // get a unicode character from the iterator's position
70
71 // get an iterator to the internal string
72 std::string::const_iterator pos = _pos;
73
74 unichar result = static_cast<unsigned char>(*pos);
75
76 // if its not a 7-bit ascii character
77 if((result & 0x80) != 0) {
78 // len is the number of bytes this character takes up in the string
79 unsigned char len = g_utf8_skip[result];
80 result &= 0x7F >> len;
81
82 while(--len != 0) {
83 result <<= 6;
84 result |= static_cast<unsigned char>(*++pos) & 0x3F;
85 }
86 }
87
88 return result;
89 }
90
91 inline ustring_Iterator<T> & operator++() {
92 pos_ += g_utf8_skip[static_cast<unsigned char>(*pos_)];
93 return *this;
94 }
95 inline ustring_Iterator<T> & operator--() {
96 do { --_pos; } while((*_pos & '\xC0') == '\x80');
97 return *this;
98 }
99
100 explicit inline ustring_Iterator(T pos) : _pos(pos) {}
101 inline T base() const { return _pos; }
102
103 private:
104 T _pos;
105 };
106
107 #endif // DOXYGEN_IGNORE
108
109 //! This class provides a simple wrapper to a std::string that is encoded as
110 //! UTF-8.
111 /*!
112 This class does <b>not</b> handle extended 8-bit ASCII charsets like
113 ISO-8859-1.
114 <p>
115 More info on Unicode and UTF-8 can be found here:
116 http://www.cl.cam.ac.uk/~mgk25/unicode.html
117 <p>
118 This does not subclass std::string, because std::string was intended to be a
119 final class. For instance, it does not have a virtual destructor.
120 */
121 class ustring {
122 std::string _string;
123
124 public:
125 typedef std::string::size_type size_type;
126 typedef std::string::difference_type difference_type;
127
128 typedef unichar value_type;
129 typedef unichar & reference;
130 typedef const unichar & const_reference;
131
132 typedef ustring_Iterator<std::string::iterator> iterator;
133 typedef ustring_Iterator<std::string::const_iterator> const_iterator;
134
135 static const size_type npos = std::string::npos;
136
137 ustring();
138 ~ustring();
139
140 // make new strings
141
142 ustring(const ustring& other);
143 ustring& operator=(const ustring& other);
144 ustring(const std::string& src);
145 ustring::ustring(const char* src);
146
147 // sizes
148
149 ustring::size_type size() const;
150 ustring::size_type length() const;
151 ustring::size_type bytes() const;
152 ustring::size_type capacity() const;
153 ustring::size_type max_size() const;
154
155 // internal data
156
157 const char* data() const;
158 const char* c_str() const;
159
160 };
161
162 }
163
164 #endif // __ustring_hh
This page took 0.040105 seconds and 4 git commands to generate.