1 /* HomeBank -- Free, easy, personal accounting for everyone.
2 * Copyright (C) 1995-2016 Maxime DOYEN
4 * This file is part of HomeBank.
6 * HomeBank is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * HomeBank is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "hb-encoding.h"
31 /* our global datas */
32 extern struct HomeBank
*GLOBALS
;
34 /* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = */
39 * The original versions of the following tables are taken from profterm
41 * Copyright (C) 2002 Red Hat, Inc.
46 static const GeditEncoding utf8_encoding
= {
52 static const GeditEncoding encodings
[] = {
54 { GEDIT_ENCODING_ISO_8859_1
,
55 "ISO-8859-1", "Western" },
56 { GEDIT_ENCODING_ISO_8859_2
,
57 "ISO-8859-2", "Central European" },
58 { GEDIT_ENCODING_ISO_8859_3
,
59 "ISO-8859-3", "South European" },
60 { GEDIT_ENCODING_ISO_8859_4
,
61 "ISO-8859-4", "Baltic" },
62 { GEDIT_ENCODING_ISO_8859_5
,
63 "ISO-8859-5", "Cyrillic" },
64 { GEDIT_ENCODING_ISO_8859_6
,
65 "ISO-8859-6", "Arabic" },
66 { GEDIT_ENCODING_ISO_8859_7
,
67 "ISO-8859-7", "Greek" },
68 { GEDIT_ENCODING_ISO_8859_8
,
69 "ISO-8859-8", "Hebrew Visual" },
70 { GEDIT_ENCODING_ISO_8859_8_I
,
71 "ISO-8859-8-I", "Hebrew" },
72 { GEDIT_ENCODING_ISO_8859_9
,
73 "ISO-8859-9", "Turkish" },
74 { GEDIT_ENCODING_ISO_8859_10
,
75 "ISO-8859-10", "Nordic" },
76 { GEDIT_ENCODING_ISO_8859_13
,
77 "ISO-8859-13", "Baltic" },
78 { GEDIT_ENCODING_ISO_8859_14
,
79 "ISO-8859-14", "Celtic" },
80 { GEDIT_ENCODING_ISO_8859_15
,
81 "ISO-8859-15", "Western" },
82 { GEDIT_ENCODING_ISO_8859_16
,
83 "ISO-8859-16", "Romanian" },
85 { GEDIT_ENCODING_UTF_7
,
87 { GEDIT_ENCODING_UTF_16
,
88 "UTF-16", "Unicode" },
89 { GEDIT_ENCODING_UTF_16_BE
,
90 "UTF-16BE", "Unicode" },
91 { GEDIT_ENCODING_UTF_16_LE
,
92 "UTF-16LE", "Unicode" },
93 { GEDIT_ENCODING_UTF_32
,
94 "UTF-32", "Unicode" },
95 { GEDIT_ENCODING_UCS_2
,
97 { GEDIT_ENCODING_UCS_4
,
100 { GEDIT_ENCODING_ARMSCII_8
,
101 "ARMSCII-8", "Armenian" },
102 { GEDIT_ENCODING_BIG5
,
103 "BIG5", "Chinese Traditional" },
104 { GEDIT_ENCODING_BIG5_HKSCS
,
105 "BIG5-HKSCS", "Chinese Traditional" },
106 { GEDIT_ENCODING_CP_866
,
107 "CP866", "Cyrillic/Russian" },
109 { GEDIT_ENCODING_EUC_JP
,
110 "EUC-JP", "Japanese" },
111 { GEDIT_ENCODING_EUC_JP_MS
,
112 "EUC-JP-MS", "Japanese" },
113 { GEDIT_ENCODING_CP932
,
114 "CP932", "Japanese" },
116 { GEDIT_ENCODING_EUC_KR
,
117 "EUC-KR", "Korean" },
118 { GEDIT_ENCODING_EUC_TW
,
119 "EUC-TW", "Chinese Traditional" },
121 { GEDIT_ENCODING_GB18030
,
122 "GB18030", "Chinese Simplified" },
123 { GEDIT_ENCODING_GB2312
,
124 "GB2312", "Chinese Simplified" },
125 { GEDIT_ENCODING_GBK
,
126 "GBK", "Chinese Simplified" },
127 { GEDIT_ENCODING_GEOSTD8
,
128 "GEORGIAN-ACADEMY", "Georgian" }, /* FIXME GEOSTD8 ? */
130 "HZ", "Chinese Simplified" },
132 { GEDIT_ENCODING_IBM_850
,
133 "IBM850", "Western" },
134 { GEDIT_ENCODING_IBM_852
,
135 "IBM852", "Central European" },
136 { GEDIT_ENCODING_IBM_855
,
137 "IBM855", "Cyrillic" },
138 { GEDIT_ENCODING_IBM_857
,
139 "IBM857", "Turkish" },
140 { GEDIT_ENCODING_IBM_862
,
141 "IBM862", "Hebrew" },
142 { GEDIT_ENCODING_IBM_864
,
143 "IBM864", "Arabic" },
145 { GEDIT_ENCODING_ISO_2022_JP
,
146 "ISO-2022-JP", "Japanese" },
147 { GEDIT_ENCODING_ISO_2022_KR
,
148 "ISO-2022-KR", "Korean" },
149 { GEDIT_ENCODING_ISO_IR_111
,
150 "ISO-IR-111", "Cyrillic" },
151 { GEDIT_ENCODING_JOHAB
,
153 { GEDIT_ENCODING_KOI8_R
,
154 "KOI8R", "Cyrillic" },
155 { GEDIT_ENCODING_KOI8__R
,
156 "KOI8-R", "Cyrillic" },
157 { GEDIT_ENCODING_KOI8_U
,
158 "KOI8U", "Cyrillic/Ukrainian" },
160 { GEDIT_ENCODING_SHIFT_JIS
,
161 "SHIFT_JIS", "Japanese" },
162 { GEDIT_ENCODING_TCVN
,
163 "TCVN", "Vietnamese" },
164 { GEDIT_ENCODING_TIS_620
,
166 { GEDIT_ENCODING_UHC
,
168 { GEDIT_ENCODING_VISCII
,
169 "VISCII", "Vietnamese" },
171 { GEDIT_ENCODING_WINDOWS_1250
,
172 "WINDOWS-1250", "Central European" },
173 { GEDIT_ENCODING_WINDOWS_1251
,
174 "WINDOWS-1251", "Cyrillic" },
175 { GEDIT_ENCODING_WINDOWS_1252
,
176 "WINDOWS-1252", "Western" },
177 { GEDIT_ENCODING_WINDOWS_1253
,
178 "WINDOWS-1253", "Greek" },
179 { GEDIT_ENCODING_WINDOWS_1254
,
180 "WINDOWS-1254", "Turkish" },
181 { GEDIT_ENCODING_WINDOWS_1255
,
182 "WINDOWS-1255", "Hebrew" },
183 { GEDIT_ENCODING_WINDOWS_1256
,
184 "WINDOWS-1256", "Arabic" },
185 { GEDIT_ENCODING_WINDOWS_1257
,
186 "WINDOWS-1257", "Baltic" },
187 { GEDIT_ENCODING_WINDOWS_1258
,
188 "WINDOWS-1258", "Vietnamese" }
191 const GeditEncoding
*
192 gedit_encoding_get_from_index (gint index
)
194 //g_return_val_if_fail (index >= 0, NULL);
196 if (index
>= GEDIT_ENCODING_LAST
)
199 //gedit_encoding_lazy_init ();
201 return &encodings
[index
];
204 const GeditEncoding
*
205 gedit_encoding_get_utf8 (void)
207 //gedit_encoding_lazy_init ();
209 return &utf8_encoding
;
213 static gchar
*homebank_utf8_convert(gchar
*buffer
, const gchar
**charset
)
216 gchar
* conv_buffer
= NULL
;
220 const struct _GeditEncoding
*enc
;
222 DB( g_print("(homebank) homebank_utf8_convert\n") );
224 for (i
=0 ; i
<GEDIT_ENCODING_LAST
; i
++)
228 enc
= gedit_encoding_get_from_index(i
);
229 DB( g_print("-> should try %s\n", enc
->charset
) );
231 conv_buffer
= g_convert(buffer
, -1, "UTF-8", enc
->charset
, NULL
, &new_len
, &conv_error
);
232 valid
= g_utf8_validate (conv_buffer
, -1, NULL
);
233 if ((conv_error
!= NULL
) || !valid
)
235 DB( g_print (" -> Couldn't convert from %s to UTF-8.\n", enc
->charset
) );
239 DB( g_print (" -> file compatible with '%s'\n", enc
->charset
) );
241 *charset
= enc
->charset
;
253 * Ensure a buffer to be utf-8, and convert if necessary
256 gchar
*homebank_utf8_ensure(gchar
*buffer
)
261 DB( g_print("(homebank) homebank_utf8_ensure\n") );
266 isvalid
= g_utf8_validate(buffer
, -1, NULL
);
267 DB( g_print(" -> is valid utf8: %d\n", isvalid
) );
271 converted
= homebank_utf8_convert(buffer
, NULL
);
272 if(converted
!= NULL
)
284 const gchar
*homebank_file_getencoding(gchar
*filename
)
286 const gchar
*charset
= NULL
;
289 GError
*error
= NULL
;
291 const gchar
*locale_charset
;
292 const struct _GeditEncoding
*enc
;
294 DB( g_print("(homebank) test encoding\n") );
296 if (g_get_charset (&locale_charset
) == FALSE
)
298 //unknown_encoding.charset = g_strdup (locale_charset);
302 DB( g_print(" -> locale charset is '%s'\n", locale_charset
) );
304 if (g_file_get_contents (filename
, &buffer
, &length
, &error
))
307 isutf8
= g_utf8_validate(buffer
, -1, NULL
);
308 DB( g_print(" -> is valid utf8: %d\n", isutf8
) );
310 if( isutf8
== FALSE
)
314 converted
= homebank_utf8_convert(buffer
, &charset
);
316 DB( g_print(" -> converted charset match: '%s'\n", charset
) );
317 DB( g_print(" -> converted: '%p' %s\n", converted
, converted
) );
319 if(converted
!= NULL
)
324 enc
= gedit_encoding_get_utf8();
325 charset
= enc
->charset
;
332 DB( g_print (" -> charset is '%s'\n", charset
) );