2006-05-07 03:45:43 +00:00
|
|
|
// FbString.cc for fluxbox
|
|
|
|
// Copyright (c) 2006 Henrik Kinnunen (fluxgen at fluxbox dot org)
|
|
|
|
// Copyright (c) 2006 Simon Bowden (rathnor at fluxbox dot org)
|
|
|
|
//
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
// copy of this software and associated documentation files (the "Software"),
|
|
|
|
// to deal in the Software without restriction, including without limitation
|
|
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
|
|
//
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
// DEALINGS IN THE SOFTWARE.
|
|
|
|
|
2007-12-30 15:32:53 +00:00
|
|
|
#include "FbString.hh"
|
2006-05-07 03:45:43 +00:00
|
|
|
|
|
|
|
#ifdef HAVE_CERRNO
|
|
|
|
#include <cerrno>
|
|
|
|
#else
|
|
|
|
#include <errno.h>
|
|
|
|
#endif
|
2007-06-29 17:25:24 +00:00
|
|
|
#ifdef HAVE_CSTRING
|
|
|
|
#include <cstring>
|
|
|
|
#else
|
|
|
|
#include <string.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_CSTDLIB
|
|
|
|
#include <cstdlib>
|
|
|
|
#else
|
|
|
|
#include <stdlib.h>
|
|
|
|
#endif
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2010-09-05 08:47:12 +00:00
|
|
|
#ifdef HAVE_CSTDIO
|
|
|
|
#include <cstdio>
|
|
|
|
#else
|
|
|
|
#include <stdio.h>
|
|
|
|
#endif
|
2006-05-20 15:27:40 +00:00
|
|
|
|
2006-05-07 03:45:43 +00:00
|
|
|
#include <langinfo.h>
|
|
|
|
#include <locale.h>
|
|
|
|
|
|
|
|
#include <iostream>
|
2010-09-05 08:47:01 +00:00
|
|
|
#include <vector>
|
2006-10-27 06:57:43 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
#ifndef HAVE_ICONV
|
|
|
|
typedef int iconv_t;
|
|
|
|
#endif // HAVE_ICONV
|
|
|
|
|
2010-09-04 13:01:33 +00:00
|
|
|
#ifdef HAVE_FRIBIDI
|
2010-09-05 08:47:12 +00:00
|
|
|
#include <fribidi/fribidi.h>
|
2010-09-04 13:01:33 +00:00
|
|
|
#endif
|
|
|
|
|
2006-10-27 06:57:43 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
using std::cerr;
|
|
|
|
using std::endl;
|
|
|
|
#endif // DEBUG
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2010-09-08 18:17:21 +00:00
|
|
|
namespace {
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
const iconv_t ICONV_NULL = (iconv_t)(-1);
|
|
|
|
|
2010-09-08 18:17:21 +00:00
|
|
|
#ifdef HAVE_FRIBIDI
|
|
|
|
FbTk::FbString makeVisualFromLogical(const FbTk::FbString& src) {
|
|
|
|
|
|
|
|
FriBidiCharType base = FRIBIDI_TYPE_N;
|
|
|
|
|
|
|
|
// reuse allocated memory for reencoding / reordering
|
|
|
|
static std::vector<FriBidiChar> us;
|
|
|
|
static std::vector<FriBidiChar> out_us;
|
|
|
|
static FbTk::FbString result;
|
|
|
|
|
|
|
|
const size_t S = src.size() + 1;
|
|
|
|
const size_t S4 = S * 4;
|
|
|
|
|
|
|
|
if (us.capacity() < S)
|
|
|
|
us.reserve(S);
|
|
|
|
if (out_us.capacity() < S)
|
|
|
|
out_us.reserve(S);
|
|
|
|
if (result.capacity() < S4)
|
|
|
|
result.reserve(S4);
|
|
|
|
|
|
|
|
us.resize(S);
|
|
|
|
FriBidiStrIndex len = fribidi_charset_to_unicode(FRIBIDI_CHAR_SET_UTF8,
|
|
|
|
const_cast<char*>(src.c_str()), S - 1,
|
|
|
|
&us[0]);
|
|
|
|
|
|
|
|
out_us.resize(S);
|
|
|
|
fribidi_log2vis(&us[0], len, &base, &out_us[0], NULL, NULL, NULL);
|
|
|
|
|
|
|
|
result.resize(S4);
|
|
|
|
len = fribidi_unicode_to_charset(FRIBIDI_CHAR_SET_UTF8, &out_us[0], len, &result[0]);
|
|
|
|
result.resize(len); // trim to currently used chars
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
} // end of anonymous namespace
|
|
|
|
|
|
|
|
|
2006-05-07 03:45:43 +00:00
|
|
|
namespace FbTk {
|
|
|
|
|
2010-09-08 18:17:21 +00:00
|
|
|
BiDiString::BiDiString(const FbString& logical)
|
|
|
|
#ifdef HAVE_FRIBIDI
|
|
|
|
: m_visual_dirty(false)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
if (!logical.empty())
|
|
|
|
setLogical(logical);
|
|
|
|
}
|
|
|
|
|
|
|
|
const FbString& BiDiString::setLogical(const FbString& logical) {
|
|
|
|
m_logical = logical;
|
|
|
|
#if HAVE_FRIBIDI
|
|
|
|
if (m_logical.empty()) {
|
|
|
|
m_visual_dirty = false;
|
|
|
|
m_visual.clear();
|
|
|
|
} else {
|
|
|
|
m_visual_dirty = true;
|
|
|
|
}
|
|
|
|
#endif
|
2010-09-14 23:22:01 +00:00
|
|
|
return m_logical;
|
2010-09-08 18:17:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const FbString& BiDiString::visual() const {
|
|
|
|
#if HAVE_FRIBIDI
|
|
|
|
if (m_visual_dirty) {
|
|
|
|
m_visual = ::makeVisualFromLogical(logical());
|
|
|
|
}
|
|
|
|
m_visual_dirty = false;
|
|
|
|
return m_visual;
|
|
|
|
#else
|
|
|
|
return m_logical;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2006-05-07 03:45:43 +00:00
|
|
|
namespace FbStringUtil {
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
enum ConvType {
|
|
|
|
FB2X = 0,
|
|
|
|
X2FB,
|
|
|
|
LOCALE2FB,
|
|
|
|
FB2LOCALE,
|
|
|
|
CONVSIZE
|
|
|
|
};
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
static bool s_inited = false;
|
|
|
|
static iconv_t s_iconv_convs[CONVSIZE];
|
|
|
|
static std::string s_locale_codeset;
|
2006-06-25 09:05:58 +00:00
|
|
|
|
2006-05-07 03:45:43 +00:00
|
|
|
/// Initialise all of the iconv conversion descriptors
|
|
|
|
void init() {
|
2006-06-10 16:42:39 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
if (s_inited)
|
2006-05-20 15:23:54 +00:00
|
|
|
return;
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
s_inited = true;
|
|
|
|
setlocale(LC_CTYPE, "");
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2006-05-07 03:45:43 +00:00
|
|
|
#ifdef CODESET
|
2011-10-23 07:01:19 +00:00
|
|
|
s_locale_codeset = nl_langinfo(CODESET);
|
2006-05-07 03:45:43 +00:00
|
|
|
#else // openbsd doesnt have this (yet?)
|
2011-10-23 07:01:19 +00:00
|
|
|
std::string locale = setlocale(LC_CTYPE, NULL);
|
2006-05-07 03:45:43 +00:00
|
|
|
size_t pos = locale.find('.');
|
2006-10-27 06:57:43 +00:00
|
|
|
if (pos != string::npos)
|
2011-10-23 07:01:19 +00:00
|
|
|
s_locale_codeset = locale.substr(pos+1);
|
2006-05-07 03:45:43 +00:00
|
|
|
#endif // CODESET
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
2011-10-23 07:01:19 +00:00
|
|
|
cerr << "FbTk::FbString: setup converts for local codeset = " << s_locale_codeset << endl;
|
2006-05-07 03:45:43 +00:00
|
|
|
#endif // DEBUG
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
s_iconv_convs[FB2X] = iconv_open("ISO8859-1", "UTF-8");
|
|
|
|
s_iconv_convs[X2FB] = iconv_open("UTF-8", "ISO8859-1");
|
|
|
|
s_iconv_convs[FB2LOCALE] = iconv_open(s_locale_codeset.c_str(), "UTF-8");
|
|
|
|
s_iconv_convs[LOCALE2FB] = iconv_open("UTF-8", s_locale_codeset.c_str());
|
2006-05-07 03:45:43 +00:00
|
|
|
#else
|
2011-10-23 07:01:19 +00:00
|
|
|
memset(s_iconv_convs, 0, sizeof(s_iconv_convs));
|
2006-05-07 03:45:43 +00:00
|
|
|
#endif // HAVE_ICONV
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void shutdown() {
|
2006-06-10 16:42:39 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2011-10-23 07:01:19 +00:00
|
|
|
int i;
|
|
|
|
for (i = 0; i < CONVSIZE; ++i)
|
|
|
|
if (s_iconv_convs[i] != ICONV_NULL)
|
|
|
|
iconv_close(s_iconv_convs[i]);
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
memset(s_iconv_convs, 0, sizeof(s_iconv_convs));
|
|
|
|
s_inited = false;
|
2006-06-10 16:42:39 +00:00
|
|
|
#endif // HAVE_ICONV
|
2006-05-07 03:45:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Recodes the text from one encoding to another
|
|
|
|
assuming cd is correct
|
|
|
|
@param cd the iconv type
|
|
|
|
@param msg text to be converted, **NOT** necessarily NULL terminated
|
|
|
|
@param size number of BYTES to convert
|
|
|
|
@return the recoded string, or 0 on failure
|
|
|
|
*/
|
2011-10-23 07:01:19 +00:00
|
|
|
std::string recode(iconv_t cd, const std::string &in) {
|
2006-05-07 10:08:25 +00:00
|
|
|
|
2010-09-08 18:17:21 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2006-10-27 06:57:43 +00:00
|
|
|
/**
|
2006-05-07 10:08:25 +00:00
|
|
|
--NOTE--
|
|
|
|
In the "C" locale, this will strip any high-bit characters
|
|
|
|
because C means 7-bit ASCII charset. If you don't want this
|
|
|
|
then you need to set your locale to something UTF-8, OR something
|
|
|
|
ISO8859-1.
|
|
|
|
*/
|
2006-05-07 03:45:43 +00:00
|
|
|
|
|
|
|
// If empty message, yes this can happen, return
|
2006-10-27 06:57:43 +00:00
|
|
|
if (in.empty())
|
2006-05-07 03:45:43 +00:00
|
|
|
return "";
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
if (cd == ICONV_NULL)
|
2006-06-25 07:18:41 +00:00
|
|
|
return in; // can't convert
|
|
|
|
|
2006-05-07 03:45:43 +00:00
|
|
|
size_t insize = in.size();
|
|
|
|
size_t outsize = insize;
|
2011-10-23 07:01:19 +00:00
|
|
|
std::vector<char> out(outsize);
|
|
|
|
char* out_ptr = &out[0];
|
2006-05-07 03:45:43 +00:00
|
|
|
|
|
|
|
size_t inbytesleft = insize;
|
|
|
|
size_t outbytesleft = outsize;
|
|
|
|
|
2010-09-08 18:17:21 +00:00
|
|
|
#ifdef HAVE_CONST_ICONV
|
|
|
|
const char* in_ptr = in.data();
|
|
|
|
#else
|
2011-10-23 07:01:19 +00:00
|
|
|
char* in_ptr = const_cast<char*>(in.data());
|
2010-09-08 18:17:21 +00:00
|
|
|
#endif
|
2006-05-07 03:45:43 +00:00
|
|
|
size_t result = (size_t)(-1);
|
|
|
|
bool again = true;
|
|
|
|
|
|
|
|
while (again) {
|
|
|
|
again = false;
|
2006-05-22 07:34:25 +00:00
|
|
|
|
|
|
|
result = iconv(cd, &in_ptr, &inbytesleft, &out_ptr, &outbytesleft);
|
|
|
|
|
2006-05-07 03:45:43 +00:00
|
|
|
if (result == (size_t)(-1)) {
|
|
|
|
switch(errno) {
|
|
|
|
case EILSEQ:
|
|
|
|
// Try skipping a byte
|
|
|
|
in_ptr++;
|
|
|
|
inbytesleft--;
|
|
|
|
again = true;
|
|
|
|
case EINVAL:
|
|
|
|
break;
|
|
|
|
case E2BIG:
|
|
|
|
// need more space!
|
|
|
|
outsize += insize;
|
2011-10-23 07:01:19 +00:00
|
|
|
out.resize(outsize);
|
|
|
|
if (out.capacity() != outsize)
|
2006-05-07 03:45:43 +00:00
|
|
|
again = true;
|
|
|
|
outbytesleft += insize;
|
2011-10-23 07:01:19 +00:00
|
|
|
out_ptr = (&out[0] + outsize) - outbytesleft;
|
2006-05-07 03:45:43 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// something else broke
|
|
|
|
perror("iconv");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy to our return string
|
2011-10-23 07:01:19 +00:00
|
|
|
std::string ret;
|
|
|
|
ret.append(&out[0], outsize - outbytesleft);
|
2006-05-07 03:45:43 +00:00
|
|
|
|
|
|
|
// reset the conversion descriptor
|
|
|
|
iconv(cd, NULL, NULL, NULL, NULL);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
#else
|
2011-09-09 18:41:17 +00:00
|
|
|
return in;
|
2006-05-07 03:45:43 +00:00
|
|
|
#endif // HAVE_ICONV
|
2010-09-08 18:17:21 +00:00
|
|
|
}
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
FbString XStrToFb(const std::string &src) {
|
|
|
|
return recode(s_iconv_convs[X2FB], src);
|
2006-05-07 03:45:43 +00:00
|
|
|
}
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
std::string FbStrToX(const FbString &src) {
|
|
|
|
return recode(s_iconv_convs[FB2X], src);
|
2006-05-07 03:45:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Handle thislocale string encodings (strings coming from userspace)
|
2011-10-23 07:01:19 +00:00
|
|
|
FbString LocaleStrToFb(const std::string &src) {
|
|
|
|
return recode(s_iconv_convs[LOCALE2FB], src);
|
2006-05-07 03:45:43 +00:00
|
|
|
}
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
std::string FbStrToLocale(const FbString &src) {
|
|
|
|
return recode(s_iconv_convs[FB2LOCALE], src);
|
2006-05-07 03:45:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool haveUTF8() {
|
|
|
|
#ifdef HAVE_ICONV
|
2011-10-23 07:01:19 +00:00
|
|
|
if (s_iconv_convs[LOCALE2FB] != ICONV_NULL)
|
2006-05-07 03:45:43 +00:00
|
|
|
return true;
|
|
|
|
#endif // HAVE_ICONV
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-04-21 22:43:10 +00:00
|
|
|
} // end namespace StringUtil
|
2006-05-07 03:45:43 +00:00
|
|
|
|
2006-06-25 09:05:58 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2011-10-23 07:01:19 +00:00
|
|
|
StringConvertor::StringConvertor(EncodingTarget target) : m_iconv(ICONV_NULL) {
|
2006-06-25 09:05:58 +00:00
|
|
|
if (target == ToLocaleStr)
|
2011-10-23 07:01:19 +00:00
|
|
|
m_destencoding = FbStringUtil::s_locale_codeset;
|
2006-06-25 09:05:58 +00:00
|
|
|
else
|
|
|
|
m_destencoding = "UTF-8";
|
|
|
|
}
|
2006-06-26 01:36:27 +00:00
|
|
|
#else
|
2010-09-08 18:17:21 +00:00
|
|
|
StringConvertor::StringConvertor(EncodingTarget target) { }
|
2006-06-26 01:36:27 +00:00
|
|
|
#endif
|
|
|
|
|
2006-06-25 09:05:58 +00:00
|
|
|
StringConvertor::~StringConvertor() {
|
2011-10-23 07:01:19 +00:00
|
|
|
reset();
|
2006-06-25 09:05:58 +00:00
|
|
|
}
|
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
bool StringConvertor::setSource(const std::string &encoding) {
|
2006-06-25 09:05:58 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2011-10-23 07:01:19 +00:00
|
|
|
std::string tempenc = encoding.empty() ? FbStringUtil::s_locale_codeset : encoding;
|
|
|
|
|
|
|
|
if ((tempenc == m_destencoding) && (m_iconv == ICONV_NULL)) {
|
|
|
|
return true;
|
|
|
|
}
|
2006-06-25 09:05:58 +00:00
|
|
|
|
|
|
|
iconv_t newiconv = iconv_open(m_destencoding.c_str(), tempenc.c_str());
|
2011-10-23 07:01:19 +00:00
|
|
|
if (newiconv == ICONV_NULL)
|
2006-06-25 09:05:58 +00:00
|
|
|
return false;
|
|
|
|
else {
|
2011-10-23 07:01:19 +00:00
|
|
|
if (m_iconv != ICONV_NULL)
|
2006-07-04 23:41:43 +00:00
|
|
|
iconv_close(m_iconv);
|
2006-06-25 09:05:58 +00:00
|
|
|
m_iconv = newiconv;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
}
|
2006-10-27 06:57:43 +00:00
|
|
|
|
2011-10-23 07:01:19 +00:00
|
|
|
FbString StringConvertor::recode(const std::string &src) {
|
2006-06-25 09:05:58 +00:00
|
|
|
#ifdef HAVE_ICONV
|
|
|
|
return FbStringUtil::recode(m_iconv, src);
|
|
|
|
#else
|
|
|
|
return src;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2010-09-08 18:17:21 +00:00
|
|
|
void StringConvertor::reset() {
|
|
|
|
#ifdef HAVE_ICONV
|
2011-10-23 07:01:19 +00:00
|
|
|
if (m_iconv != ICONV_NULL)
|
2010-09-08 18:17:21 +00:00
|
|
|
iconv_close(m_iconv);
|
2011-10-23 07:01:19 +00:00
|
|
|
m_iconv = ICONV_NULL;
|
2010-09-08 18:17:21 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2010-09-04 13:01:33 +00:00
|
|
|
|
2008-04-21 22:43:10 +00:00
|
|
|
} // end namespace FbTk
|