Skip to content

Commit d1b1c3e

Browse files
saurabh95swmitra
authored andcommitted
Now BOM is preserved in Win and MAC (adobe#610)
* Now BOM is preserved in Win and MAC * Added error strings for failure in encode/decode and utf-16 * Mac related error string changes * Addressed review comments * minor change * Addressed win review comments
1 parent d1c7e6f commit d1b1c3e

6 files changed

+110
-72
lines changed

appshell/appshell_extensions.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,32 +304,36 @@ class ProcessMessageDelegate : public ClientHandler::ProcessMessageDelegate {
304304
ExtensionString filename = argList->GetString(1);
305305
ExtensionString encoding = argList->GetString(2);
306306
std::string contents = "";
307+
bool preserveBOM = false;
307308

308-
error = ReadFile(filename, encoding, contents);
309+
error = ReadFile(filename, encoding, contents, preserveBOM);
309310

310311
// Set response args for this function
311312
responseArgs->SetString(2, contents);
312313
responseArgs->SetString(3, encoding);
314+
responseArgs->SetBool(4, preserveBOM);
313315
}
314316
} else if (message_name == "WriteFile") {
315317
// Parameters:
316318
// 0: int32 - callback id
317319
// 1: string - filename
318320
// 2: string - data
319321
// 3: string - encoding
320-
if (argList->GetSize() != 4 ||
322+
if (argList->GetSize() != 5 ||
321323
argList->GetType(1) != VTYPE_STRING ||
322324
argList->GetType(2) != VTYPE_STRING ||
323-
argList->GetType(3) != VTYPE_STRING) {
325+
argList->GetType(3) != VTYPE_STRING ||
326+
argList->GetType(4) != VTYPE_BOOL) {
324327
error = ERR_INVALID_PARAMS;
325328
}
326329

327330
if (error == NO_ERROR) {
328331
ExtensionString filename = argList->GetString(1);
329332
std::string contents = argList->GetString(2);
330333
ExtensionString encoding = argList->GetString(3);
334+
bool preserveBOM = argList->GetBool(4);
331335

332-
error = WriteFile(filename, contents, encoding);
336+
error = WriteFile(filename, contents, encoding, preserveBOM);
333337
// No additional response args for this function
334338
}
335339
} else if (message_name == "SetPosixPermissions") {

appshell/appshell_extensions.js

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,21 @@ if (!appshell.app) {
7070
* @constant An unsupported encoding value was specified.
7171
*/
7272
appshell.fs.ERR_UNSUPPORTED_ENCODING = 5;
73+
74+
/**
75+
* @constant File could not be encoded.
76+
*/
77+
appshell.fs.ERR_ENCODE_FILE_FAILED = 18;
78+
79+
/**
80+
* @constant File could not be decoded.
81+
*/
82+
appshell.fs.ERR_DECODE_FILE_FAILED = 19;
83+
84+
/**
85+
* @constant File was encoded with utf-16
86+
*/
87+
appshell.fs.ERR_UNSUPPORTED_UTF16_ENCODING = 20;
7388

7489
/**
7590
* @constant File could not be written.
@@ -394,8 +409,8 @@ if (!appshell.app) {
394409
* @return None. This is an asynchronous call that sends all return information to the callback.
395410
*/
396411
native function WriteFile();
397-
appshell.fs.writeFile = function (path, data, encoding, callback) {
398-
WriteFile(callback || _dummyCallback, path, data, encoding);
412+
appshell.fs.writeFile = function (path, data, encoding, preserveBOM, callback) {
413+
WriteFile(callback || _dummyCallback, path, data, encoding, preserveBOM);
399414
};
400415

401416
/**

appshell/appshell_extensions_mac.mm

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#include <sys/utsname.h>
5757
#include <mach-o/arch.h>
5858

59+
#define UTF8_BOM "\xEF\xBB\xBF"
60+
5961
NSMutableArray* pendingOpenFiles;
6062

6163
@interface ChromeWindowsTerminatedObserver : NSObject
@@ -647,62 +649,53 @@ int32 GetFileInfo(ExtensionString filename, uint32& modtime, bool& isDir, double
647649
return ConvertNSErrorCode(error, true);
648650
}
649651

650-
int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string& contents)
652+
int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string& contents, bool& preserveBOM)
651653
{
652654
if (encoding == "utf8") {
653655
encoding = "UTF-8";
654656
}
655-
NSString* path = [NSString stringWithUTF8String:filename.c_str()];
656-
657-
NSStringEncoding enc;
658657
int32 error = NO_ERROR;
659-
660-
NSString* fileContents = nil;
661-
if (encoding == "UTF-8") {
662-
enc = NSUTF8StringEncoding;
663-
NSError* NSerror = nil;
664-
fileContents = [NSString stringWithContentsOfFile:path encoding:enc error:&NSerror];
665-
}
666-
667-
if (fileContents)
668-
{
669-
contents = [fileContents UTF8String];
670-
return NO_ERROR;
671-
} else {
658+
659+
try {
660+
std::ifstream file(filename.c_str());
661+
std::stringstream ss;
662+
ss << file.rdbuf();
663+
contents = ss.str();
664+
std::string detectedCharSet;
672665
try {
673-
std::ifstream file(filename.c_str());
674-
std::stringstream ss;
675-
ss << file.rdbuf();
676-
contents = ss.str();
677-
std::string detectedCharSet;
678-
try {
679-
if (encoding == "UTF-8") {
680-
CharSetDetect ICUDetector;
681-
ICUDetector(contents.c_str(), contents.size(), detectedCharSet);
682-
}
683-
else {
684-
detectedCharSet = encoding;
685-
}
686-
if (!detectedCharSet.empty()) {
666+
if (encoding == "UTF-8") {
667+
CharSetDetect ICUDetector;
668+
ICUDetector(contents.c_str(), contents.size(), detectedCharSet);
669+
}
670+
else {
671+
detectedCharSet = encoding;
672+
}
673+
if (detectedCharSet == "UTF-16LE" || detectedCharSet == "UTF-16BE") {
674+
return ERR_UNSUPPORTED_UTF16_ENCODING;
675+
}
676+
if (detectedCharSet != "UTF-8") {
677+
try {
687678
std::transform(detectedCharSet.begin(), detectedCharSet.end(), detectedCharSet.begin(), ::toupper);
688679
DecodeContents(contents, detectedCharSet);
689680
encoding = detectedCharSet;
681+
} catch (...) {
682+
error = ERR_DECODE_FILE_FAILED;
690683
}
691-
else {
692-
error = ERR_UNSUPPORTED_ENCODING;
693-
}
694-
} catch (...) {
695-
error = ERR_UNSUPPORTED_ENCODING;
684+
}
685+
else {
686+
CheckAndRemoveUTF8BOM(contents, preserveBOM);
696687
}
697688
} catch (...) {
698-
error = ERR_CANT_READ;
689+
error = ERR_UNSUPPORTED_ENCODING;
699690
}
691+
} catch (...) {
692+
error = ERR_CANT_READ;
700693
}
701694

702695
return error;
703696
}
704697

705-
int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString encoding)
698+
int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString encoding, bool preserveBOM)
706699
{
707700
const char *filenameStr = filename.c_str();
708701
int32 error = NO_ERROR;
@@ -715,8 +708,10 @@ int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString
715708
CharSetEncode ICUEncoder(encoding);
716709
ICUEncoder(contents);
717710
} catch (...) {
718-
error = ERR_CANT_READ;
711+
error = ERR_ENCODE_FILE_FAILED;
719712
}
713+
} else if (encoding == "UTF-8" && preserveBOM) {
714+
contents = UTF8_BOM + contents;
720715
}
721716

722717
try {

appshell/appshell_extensions_platform.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include <unicode/ucsdet.h>
33
#include <unicode/ucnv.h>
44

5+
#define UTF8_BOM "\xEF\xBB\xBF"
6+
57
CharSetDetect::CharSetDetect() {
68
m_charsetDetector_ = NULL;
79
m_icuError = U_ZERO_ERROR;
@@ -88,3 +90,11 @@ void DecodeContents(std::string &contents, const std::string& encoding) {
8890
}
8991
}
9092
#endif
93+
94+
void CheckAndRemoveUTF8BOM(std::string& contents, bool& preserveBOM) {
95+
if (contents.length() >= 3 && contents.substr(0,3) == UTF8_BOM) {
96+
contents.erase(0,3);
97+
preserveBOM = true;
98+
}
99+
}
100+

appshell/appshell_extensions_platform.h

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,30 @@
3838
// Extension error codes. These MUST be in sync with the error
3939
// codes in appshell_extensions.js
4040
#if !defined(OS_WIN) // NO_ERROR is defined on windows
41-
static const int NO_ERROR = 0;
41+
static const int NO_ERROR = 0;
4242
#endif
43-
static const int ERR_UNKNOWN = 1;
44-
static const int ERR_INVALID_PARAMS = 2;
45-
static const int ERR_NOT_FOUND = 3;
46-
static const int ERR_CANT_READ = 4;
47-
static const int ERR_UNSUPPORTED_ENCODING = 5;
48-
static const int ERR_CANT_WRITE = 6;
49-
static const int ERR_OUT_OF_SPACE = 7;
50-
static const int ERR_NOT_FILE = 8;
51-
static const int ERR_NOT_DIRECTORY = 9;
52-
static const int ERR_FILE_EXISTS = 10;
53-
static const int ERR_BROWSER_NOT_INSTALLED = 11;
54-
static const int ERR_CL_TOOLS_CANCELLED = 12;
55-
static const int ERR_CL_TOOLS_RMFAILED = 13;
56-
static const int ERR_CL_TOOLS_MKDIRFAILED = 14;
43+
static const int ERR_UNKNOWN = 1;
44+
static const int ERR_INVALID_PARAMS = 2;
45+
static const int ERR_NOT_FOUND = 3;
46+
static const int ERR_CANT_READ = 4;
47+
static const int ERR_UNSUPPORTED_ENCODING = 5;
48+
static const int ERR_CANT_WRITE = 6;
49+
static const int ERR_OUT_OF_SPACE = 7;
50+
static const int ERR_NOT_FILE = 8;
51+
static const int ERR_NOT_DIRECTORY = 9;
52+
static const int ERR_FILE_EXISTS = 10;
53+
static const int ERR_BROWSER_NOT_INSTALLED = 11;
54+
static const int ERR_CL_TOOLS_CANCELLED = 12;
55+
static const int ERR_CL_TOOLS_RMFAILED = 13;
56+
static const int ERR_CL_TOOLS_MKDIRFAILED = 14;
5757
static const int ERR_CL_TOOLS_SYMLINKFAILED = 15;
58-
static const int ERR_CL_TOOLS_SERVFAILED = 16;
59-
static const int ERR_CL_TOOLS_NOTSUPPORTED = 17;
58+
static const int ERR_CL_TOOLS_SERVFAILED = 16;
59+
static const int ERR_CL_TOOLS_NOTSUPPORTED = 17;
60+
static const int ERR_ENCODE_FILE_FAILED = 18;
61+
static const int ERR_DECODE_FILE_FAILED = 19;
62+
static const int ERR_UNSUPPORTED_UTF16_ENCODING = 20;
6063

61-
static const int ERR_PID_NOT_FOUND = -9999; // negative int to avoid confusion with real PIDs
64+
static const int ERR_PID_NOT_FOUND = -9999; // negative int to avoid confusion with real PIDs
6265

6366
typedef uint8_t u8;
6467
typedef uint16_t u16;
@@ -112,6 +115,8 @@ class CharSetEncode
112115
void DecodeContents(std::string &contents, const std::string& encoding);
113116
#endif
114117

118+
void CheckAndRemoveUTF8BOM(std::string& contents, bool& preserveBOM);
119+
115120
// Native extension code. These are implemented in appshell_extensions_mac.mm
116121
// and appshell_extensions_win.cpp
117122
int32 OpenLiveBrowser(ExtensionString argURL, bool enableRemoteDebugging);
@@ -158,9 +163,9 @@ int32 Rename(ExtensionString oldName, ExtensionString newName);
158163

159164
int32 GetFileInfo(ExtensionString filename, uint32& modtime, bool& isDir, double& size, ExtensionString& realPath);
160165

161-
int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string& contents);
166+
int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string& contents, bool& hasBOM);
162167

163-
int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString encoding);
168+
int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString encoding, bool preserveBOM);
164169

165170
int32 SetPosixPermissions(ExtensionString filename, int32 mode);
166171

@@ -207,4 +212,4 @@ int32 GetMenuPosition(CefRefPtr browser, const ExtensionString& comm
207212

208213
void DragWindow(CefRefPtr browser);
209214

210-
std::string GetSystemUniqueID();
215+
std::string GetSystemUniqueID();

appshell/appshell_extensions_win.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
#define UNICODE_LEFT_ARROW 0x2190
4747
#define UNICODE_DOWN_ARROW 0x2193
4848

49+
#define UTF8_BOM "\xEF\xBB\xBF"
50+
4951
// Forward declarations for functions at the bottom of this file
5052
void ConvertToNativePath(ExtensionString& filename);
5153
void ConvertToUnixPath(ExtensionString& filename);
@@ -858,8 +860,6 @@ CharSetMap charSetMap =
858860
{ "SHIFT_JIS", 932 },
859861
{ "ISO-8859-3", 28593 },
860862
{ "ISO-8859-4", 28594 },
861-
{ "UTF-16LE", 1200 },
862-
{ "UTF-16BE", 1201},
863863
{ "WINDOWS-1257", 1257 },
864864
{ "WINDOWS-1258", 1258 },
865865
{ "GB2312", 936 },
@@ -933,7 +933,7 @@ class ReadFileHandle {
933933

934934

935935

936-
int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string& contents)
936+
int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string& contents, bool& preserveBOM)
937937
{
938938
if (encoding == L"utf8") {
939939
encoding = L"UTF-8";
@@ -985,6 +985,9 @@ int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string&
985985
std::wstring_convertwchar_t>> conv;
986986
detectedCharSet = conv.to_bytes(encoding);
987987
}
988+
if (detectedCharSet == "UTF-16LE" || detectedCharSet == "UTF-16LE") {
989+
return ERR_UNSUPPORTED_UTF16_ENCODING;
990+
}
988991
std::transform(detectedCharSet.begin(), detectedCharSet.end(), detectedCharSet.begin(), ::toupper);
989992
CharSetMap::iterator iter = charSetMap.find(detectedCharSet);
990993

@@ -1001,13 +1004,16 @@ int32 ReadFile(ExtensionString filename, ExtensionString& encoding, std::string&
10011004
error = NO_ERROR;
10021005
}
10031006
catch (...) {
1004-
error = ERR_UNSUPPORTED_ENCODING;
1007+
error = ERR_DECODE_FILE_FAILED;
10051008
}
10061009
}
10071010
} catch (...) {
10081011
error = ERR_UNSUPPORTED_ENCODING;
10091012
}
10101013
}
1014+
if (encoding == L"UTF-8") {
1015+
CheckAndRemoveUTF8BOM(contents, preserveBOM);
1016+
}
10111017
}
10121018
else {
10131019
error = ConvertWinErrorCode(GetLastError(), false);
@@ -1057,7 +1063,7 @@ static void WideToCharSet(const std::wstring &aUTF16string, long codePage, std::
10571063

10581064

10591065

1060-
int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString encoding)
1066+
int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString encoding, bool preserveBOM)
10611067
{
10621068
if (encoding == L"utf8") {
10631069
encoding = L"UTF-8";
@@ -1071,9 +1077,12 @@ int32 WriteFile(ExtensionString filename, std::string contents, ExtensionString
10711077
WideToCharSet(content, iter->second, contents);
10721078
}
10731079
else {
1074-
error = ERR_UNSUPPORTED_ENCODING;
1080+
error = ERR_ENCODE_FILE_FAILED;
10751081
}
10761082
}
1083+
if (encoding == L"UTF-8" && preserveBOM) {
1084+
contents = UTF8_BOM + contents;
1085+
}
10771086

10781087
HANDLE hFile = CreateFile(filename.c_str(), GENERIC_WRITE,
10791088
FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);

0 commit comments

Comments
 (0)