- added some bug fixes to check if lng files are utf-8 or not and deal with it appropriately

2025-09-25 23:19:07 +02:00 · 2011-06-08 07:18:06 +00:00
parent bf9702cdca
commit f65bfe8710
29 changed files with 3072 additions and 83 deletions
--- a/source/shared_lib/sources/util/properties.cpp
+++ b/source/shared_lib/sources/util/properties.cpp
@@ -25,11 +25,21 @@
 #include <shlobj.h>
 #endif

+#include "utf8.h"
+#include "font.h"
+
+//#include <locale>
+//#include <iostream>
+//#include <string>
+//#include <sstream>
+#include "string_utils.h"
+
 #include "leak_dumper.h"

 using namespace std;
 using namespace Shared::PlatformCommon;
 using namespace Shared::Platform;
+using namespace Shared::Graphics;

 namespace Shared{ namespace Util{

@@ -39,42 +49,329 @@ string Properties::applicationPath = "";
 //	class Properties
 // =====================================================

+//wstring widen( const string& str )
+//{
+//      wostringstream wstm ;
+//      wstm.imbue(std::locale("en_US.UTF-8"));
+//      const ctype<wchar_t>& ctfacet =
+//      use_facet< ctype<wchar_t> >( wstm.getloc() ) ;
+//      for( size_t i=0 ; i<str.size() ; ++i )
+//      wstm << ctfacet.widen( str[i] ) ;
+//      return wstm.str() ;
+//}
+
+// Convert a narrow string to a wide string//
+//std::wstring widen(const std::string& str) {
+//	// Make space for wide string
+//	wchar_t* buffer = new wchar_t[str.size() + 1];
+//	// convert ASCII to UNICODE
+//	mbstowcs( buffer, str.c_str(), str.size() );
+//	// NULL terminate it
+//	buffer[str.size()] = 0;
+//	// Clean memory and return it
+//	std::wstring wstr = buffer;
+//	delete [] buffer;
+//	return wstr;
+//}
+// Widen an individual character
+
+//wstring fromUtf8(const char* str, size_t length) {
+//	wchar_t result[4097]= L"";
+//	int len = 0;
+//	for(int i = 0 ; i < length; i++)
+//	{
+//		if (((byte)str[i]) < 0x80)
+//		{
+//			result[len++] = ((byte)str[i]);
+//			continue;
+//		}
+//		if (((byte)str[i]) >= 0xC0)
+//		{
+//			wchar_t c = ((byte)str[i++]) - 0xC0;
+//			while(((byte)str[i]) >= 0x80)
+//				c = (c << 6) | (((byte)str[i++]) - 0x80);
+//			--i;
+//			result[len++] = c;
+//			continue;
+//		}
+//	}
+//	result[len] = 0;
+//	return result;
+//}
+
+//string conv_utf8_iso8859_7(string s) {
+//    int len = s.size();
+//    string out = "";
+//    string curr_char = "";
+//    for(int i=0; i < len; i++) {
+//        curr_char = curr_char + s[i];
+//        if( ( (s[i]) & (128+64) ) == 128) {
+//            //character end found
+//            if ( curr_char.size() == 2) {
+//                // 2-byte character check for it is greek one and convert
+//                if      ((curr_char[0])==205) out = out + (char)(curr_char[1]+16);
+//                else if ((curr_char[0])==206) out = out + (char)(curr_char[1]+48);
+//                else if ((curr_char[0])==207) out = out + (char)(curr_char[1]+112);
+//                else ; // non greek 2-byte character, discard character
+//            } else ;// n-byte character, n>2, discard character
+//            curr_char = "";
+//        }
+//        else if ((s[i]) < 128) {
+//            // character is one byte (ascii)
+//            out = out + curr_char;
+//            curr_char = "";
+//        }
+//    }
+//    return out;
+//}
+
+// Map from the most-significant 6 bits of the first byte to the total number of bytes in a
+// UTF-8 character.
+//static char UTF8_2_ISO_8859_1_len[] =
+//{
+//  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+//  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+//  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */
+//  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6
+//};
+//
+//static char UTF8_2_ISO_8859_1_mask[] = {0x3F, 0x7F, 0x1F, 0x0F, 0x07,
+//0x03, 0x01};
+
+
+/*----------------------------------------------------------------------
+-------
+   Convert a UTF-8 string to a ISO-8859-1 MultiByte string.
+   No more than 'count' bytes will be written to the output buffer.
+   Return the size of the converted string in bytes, excl null
+terminator.
+*/
+//int ldap_x_utf8s_to_iso_8859_1s( char *mbstr, const char *utf8str, size_t count )
+//{
+//  int res = 0;
+//
+//  while (*utf8str != '\0')
+//  {
+//    int           len = UTF8_2_ISO_8859_1_len[(*utf8str >> 2) & 0x3F];
+//    unsigned long u   = *utf8str & UTF8_2_ISO_8859_1_mask[len];
+//
+//    // erroneous
+//    if (len == 0)
+//      len = 5;
+//
+//    for (++utf8str; --len > 0 && (*utf8str != '\0'); ++utf8str)
+//    {
+//      // be sure this is not an unexpected start of a new character
+//      if ((*utf8str & 0xC0) != 0x80)
+//        break;
+//
+//      u = (u << 6) | (*utf8str & 0x3F);
+//    }
+//
+//    if (mbstr != 0 && count != 0)
+//    {
+//      // be sure there is enough space left in the destination buffer
+//      if (res >= count)
+//        return res;
+//
+//      // add the mapped character to the destination string or '?'(0x1A, SUB) if character
+//      // can't be represented in ISO-8859-1
+//      *mbstr++ = (u <= 0xFF ? (char)u : '?');
+//    }
+//    ++res;
+//  }
+//
+//  // add the terminating null character
+//  if (mbstr != 0 && count != 0)
+//  {
+//    // be sure there is enough space left in the destination buffer
+//    if (res >= count)
+//      return res;
+//    *mbstr = 0;
+//  }
+//
+//  return res;
+//} // ldap_x_utf8s_to_iso_8859_1s
+//
+//
+///*----------------------------------------------------------------------
+//-------
+//   Convert a ISO-8859-1 MultiByte string to a UTF-8 string.
+//   No more than 'count' bytes will be written to the output buffer.
+//   Return the size of the converted string in bytes, excl null
+//terminator.
+//*/
+//int ldap_x_iso_8859_1s_to_utf8s(char *utf8str, const char *mbstr, size_t count)
+//{
+//  int res = 0;
+//
+//  // loop until we reach the end of the mb string
+//  for (; *mbstr != '\0'; ++mbstr)
+//  {
+//    // the character needs no mapping if the highest bit is not set
+//    if ((*mbstr & 0x80) == 0)
+//    {
+//      if (utf8str != 0 && count != 0)
+//      {
+//        // be sure there is enough space left in the destination buffer
+//        if (res >= count)
+//          return res;
+//
+//        *utf8str++ = *mbstr;
+//      }
+//      ++res;
+//    }
+//
+//    // otherwise mapping is necessary
+//    else
+//    {
+//      if (utf8str != 0 && count != 0)
+//      {
+//        // be sure there is enough space left in the destination buffer
+//        if (res+1 >= count)
+//          return res;
+//
+//        *utf8str++ = (0xC0 | (0x03 & (*mbstr >> 6)));
+//        *utf8str++ = (0x80 | (0x3F & *mbstr));
+//      }
+//      res += 2;
+//    }
+//  }
+//
+//  // add the terminating null character
+//  if (utf8str != 0 && count != 0)
+//  {
+//    // be sure there is enough space left in the destination buffer
+//    if (res >= count)
+//      return res;
+//    *utf8str = 0;
+//  }
+//
+//  return res;
+//} // ldap_x_iso_8859_1s_to_utf8s
+
 void Properties::load(const string &path, bool clearCurrentProperties) {

+	//wchar_t lineBuffer[maxLine]=L"";
 	char lineBuffer[maxLine]="";
 	string line, key, value;
 	size_t pos=0;
 	this->path= path;

+	//std::locale::global(std::locale(""));
+	bool is_utf8_language = valid_utf8_file(path.c_str());
+
 #if defined(WIN32) && !defined(__MINGW32__)
 	wstring wstr = utf8_decode(path);
 	FILE *fp = _wfopen(wstr.c_str(), L"r");
+	//wifstream fileStream(fp);
 	ifstream fileStream(fp);
 #else
+	//wifstream fileStream;
 	ifstream fileStream;
 	fileStream.open(path.c_str(), ios_base::in);
 #endif
 	
 	if(fileStream.is_open() == false){
 		if(SystemFlags::getSystemSettingType(SystemFlags::debugSystem).enabled) SystemFlags::OutputDebug(SystemFlags::debugSystem,"In [%s::%s Line: %d] path = [%s]\n",__FILE__,__FUNCTION__,__LINE__,path.c_str());
-		throw runtime_error("Can't open propertyMap file: " + path);
+		throw runtime_error("File NOT FOUND, can't open file: [" + path + "]");
 	}

 	if(clearCurrentProperties == true) {
 		propertyMap.clear();
 	}
-	while(!fileStream.eof()){
+
+	while(fileStream.eof() == false) {
+		lineBuffer[0]='\0';
 		fileStream.getline(lineBuffer, maxLine);
 		lineBuffer[maxLine-1]='\0';
-		//printf("\n[%s]\n",lineBuffer);
+
+		//printf("\n[%ls]\n",lineBuffer);
+		//printf("\n[%s]\n",&lineBuffer[0]);
+
+		// If the file is NOT in UTF-8 format convert each line
+		if(is_utf8_language == false && Font::forceLegacyFonts == false) {
+			char *utfStr = String::ConvertToUTF8(&lineBuffer[0]);
+
+			//printf("\nBefore [%s] After [%s]\n",&lineBuffer[0],utfStr);
+
+			memset(&lineBuffer[0],0,maxLine);
+			memcpy(&lineBuffer[0],&utfStr[0],strlen(utfStr));
+		}
+
+		//if(is_utf8_language == true && Font::forceLegacyFonts == true) {
+			//string line = lineBuffer;
+			//wstring wstr = fromUtf8(line.c_str(), line.size());
+
+			//vector <unsigned short> utf16result;
+			//utf8::utf8to16(line.begin(), line.end(), back_inserter(utf16result));
+			//vector <int> utf16result;
+			//utf8::utf8to32(line.begin(), line.end(), back_inserter(utf16result));
+
+			//printf("\nConverted UTF-8 from [%s] to [%s]\n",line.c_str(),utf16result[0]);
+
+			//char newBuf[4097]="";
+			//int newSize = ldap_x_utf8s_to_iso_8859_1s( &newBuf[0], line.c_str(), 4096 );
+
+			//std::wstring wstr = widen(newBuf);
+			//String st(wstr.c_str());
+			//String st(line.c_str());
+
+			//printf("\nConverted UTF-8 from [%s] to [%ls]\n",line.c_str(),wstr.c_str());
+
+ 		    //const wchar_t *wBuf = &szPath[0];
+			//setlocale(LC_ALL, "en_CA.ISO-8559-15");
+			//std::locale::global(std::locale("en_CA.ISO-8559-15"));
+		    //size_t size = 4096;
+		    //char pMBBuffer[4096 + 1]="";
+		    //wcstombs(&pMBBuffer[0], &lineBuffer[0], (size_t)size);// Convert to char* from TCHAR[]
+	  	    //string newStr="";
+	  	    //newStr.assign(&pMBBuffer[0]); // Now assign the char* to the string, and there you have it!!! :)
+	  	    //printf("\nConverted UTF-8 from [%ls] to [%s]\n",&lineBuffer[0],newStr.c_str());
+	  	    //std::locale::global(std::locale(""));
+
+			//char newBuf[4097]="";
+			//int newSize = ldap_x_utf8s_to_iso_8859_1s( &newBuf[0], &pMBBuffer[0], 4096 );
+
+			//String st(&lineBuffer[0]);
+			//printf("\nConverted UTF-8 from [%ls] to [%s]\n",&lineBuffer[0],&newBuf[0]);
+
+			//char newBuf[4097]="";
+			//int newSize = ldap_x_utf8s_to_iso_8859_1s( &newBuf[0], line.c_str(), 4096 );
+
+			//string newStr = conv_utf8_iso8859_7(line);
+			//printf("\nConverted UTF-8 from [%s] to [%s]\n",line.c_str(),newBuf);
+//			for(int i = 0; i < line.size(); ++i) {
+//				printf("to [%c][%d]\n",line[i],line[i]);
+//			}
+			//for(int i = 0; i < newStr.size(); ++i) {
+			//	printf("to [%c][%d]\n",newStr[i],newStr[i]);
+			//}
+
+//			for(int i = 0; i < utf16result.size(); ++i) {
+//				printf("to [%c]\n",utf16result[i]);
+//			}
+//
+			//memset(&lineBuffer[0],0,maxLine);
+			//memcpy(&lineBuffer[0],&newBuf[0],newSize);
+		//}
+		//else {
+			//string line = lineBuffer;
+			//printf("\nNON UTF-8 from [%s]\n",line.c_str());
+			//for(int i = 0; i < line.size(); ++i) {
+			//	printf("to [%c][%d]\n",line[i],line[i]);
+			//}
+		//}

 		//process line if it it not a comment
-		if(lineBuffer[0]!=';'){
+		if(lineBuffer[0] != ';') {
+			//wstring wstr = lineBuffer;
+			//line.assign(wstr.begin(),wstr.end());

 			// gracefully handle win32 \r\n line endings
 			size_t len= strlen(lineBuffer);
-   			if(len > 0 && lineBuffer[len-1] == '\r'){
-				lineBuffer[len-1]= 0;
+   			if(len > 0 && lineBuffer[len-1] == '\r') {
+   				lineBuffer[len-1]= 0;
 			}

 			line= lineBuffer;