Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
1
TMessagesProj/jni/voip/webrtc/base/i18n/OWNERS
Normal file
1
TMessagesProj/jni/voip/webrtc/base/i18n/OWNERS
Normal file
|
|
@ -0,0 +1 @@
|
|||
jshin@chromium.org
|
||||
29
TMessagesProj/jni/voip/webrtc/base/i18n/base_i18n_export.h
Normal file
29
TMessagesProj/jni/voip/webrtc/base/i18n/base_i18n_export.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_BASE_I18N_EXPORT_H_
|
||||
#define BASE_I18N_BASE_I18N_EXPORT_H_
|
||||
|
||||
#if defined(COMPONENT_BUILD)
|
||||
#if defined(WIN32)
|
||||
|
||||
#if defined(BASE_I18N_IMPLEMENTATION)
|
||||
#define BASE_I18N_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define BASE_I18N_EXPORT __declspec(dllimport)
|
||||
#endif // defined(BASE_I18N_IMPLEMENTATION)
|
||||
|
||||
#else // defined(WIN32)
|
||||
#if defined(BASE_I18N_IMPLEMENTATION)
|
||||
#define BASE_I18N_EXPORT __attribute__((visibility("default")))
|
||||
#else
|
||||
#define BASE_I18N_EXPORT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else // defined(COMPONENT_BUILD)
|
||||
#define BASE_I18N_EXPORT
|
||||
#endif
|
||||
|
||||
#endif // BASE_I18N_BASE_I18N_EXPORT_H_
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/base_i18n_switches.h"
|
||||
|
||||
namespace switches {
|
||||
|
||||
// Force the UI to a specific direction. Valid values are "ltr" (left-to-right)
|
||||
// and "rtl" (right-to-left).
|
||||
const char kForceUIDirection[] = "force-ui-direction";
|
||||
|
||||
// Force the text rendering to a specific direction. Valid values are "ltr"
|
||||
// (left-to-right) and "rtl" (right-to-left). Only tested meaningfully with
|
||||
// RTL.
|
||||
const char kForceTextDirection[] = "force-text-direction";
|
||||
|
||||
const char kForceDirectionLTR[] = "ltr";
|
||||
const char kForceDirectionRTL[] = "rtl";
|
||||
|
||||
} // namespace switches
|
||||
21
TMessagesProj/jni/voip/webrtc/base/i18n/base_i18n_switches.h
Normal file
21
TMessagesProj/jni/voip/webrtc/base/i18n/base_i18n_switches.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_BASE_I18N_SWITCHES_H_
|
||||
#define BASE_I18N_BASE_I18N_SWITCHES_H_
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
|
||||
namespace switches {
|
||||
|
||||
BASE_I18N_EXPORT extern const char kForceUIDirection[];
|
||||
BASE_I18N_EXPORT extern const char kForceTextDirection[];
|
||||
|
||||
// kForce*Direction choices for the switches above.
|
||||
BASE_I18N_EXPORT extern const char kForceDirectionLTR[];
|
||||
BASE_I18N_EXPORT extern const char kForceDirectionRTL[];
|
||||
|
||||
} // namespace switches
|
||||
|
||||
#endif // BASE_I18N_BASE_I18N_SWITCHES_H_
|
||||
203
TMessagesProj/jni/voip/webrtc/base/i18n/break_iterator.cc
Normal file
203
TMessagesProj/jni/voip/webrtc/base/i18n/break_iterator.cc
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/break_iterator.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "third_party/icu/source/common/unicode/ubrk.h"
|
||||
#include "third_party/icu/source/common/unicode/uchar.h"
|
||||
#include "third_party/icu/source/common/unicode/ustring.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
const size_t npos = static_cast<size_t>(-1);
|
||||
|
||||
BreakIterator::BreakIterator(const StringPiece16& str, BreakType break_type)
|
||||
: iter_(nullptr),
|
||||
string_(str),
|
||||
break_type_(break_type),
|
||||
prev_(npos),
|
||||
pos_(0) {}
|
||||
|
||||
BreakIterator::BreakIterator(const StringPiece16& str, const string16& rules)
|
||||
: iter_(nullptr),
|
||||
string_(str),
|
||||
rules_(rules),
|
||||
break_type_(RULE_BASED),
|
||||
prev_(npos),
|
||||
pos_(0) {}
|
||||
|
||||
BreakIterator::~BreakIterator() {
|
||||
if (iter_)
|
||||
ubrk_close(static_cast<UBreakIterator*>(iter_));
|
||||
}
|
||||
|
||||
bool BreakIterator::Init() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError parse_error;
|
||||
UBreakIteratorType break_type;
|
||||
switch (break_type_) {
|
||||
case BREAK_CHARACTER:
|
||||
break_type = UBRK_CHARACTER;
|
||||
break;
|
||||
case BREAK_WORD:
|
||||
break_type = UBRK_WORD;
|
||||
break;
|
||||
case BREAK_SENTENCE:
|
||||
break_type = UBRK_SENTENCE;
|
||||
break;
|
||||
case BREAK_LINE:
|
||||
case BREAK_NEWLINE:
|
||||
case RULE_BASED: // (Keep compiler happy, break_type not used in this case)
|
||||
break_type = UBRK_LINE;
|
||||
break;
|
||||
default:
|
||||
NOTREACHED() << "invalid break_type_";
|
||||
return false;
|
||||
}
|
||||
if (break_type_ == RULE_BASED) {
|
||||
iter_ = ubrk_openRules(rules_.c_str(),
|
||||
static_cast<int32_t>(rules_.length()),
|
||||
string_.data(),
|
||||
static_cast<int32_t>(string_.size()),
|
||||
&parse_error,
|
||||
&status);
|
||||
if (U_FAILURE(status)) {
|
||||
NOTREACHED() << "ubrk_openRules failed to parse rule string at line "
|
||||
<< parse_error.line << ", offset " << parse_error.offset;
|
||||
}
|
||||
} else {
|
||||
iter_ = ubrk_open(break_type, nullptr, string_.data(),
|
||||
static_cast<int32_t>(string_.size()), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
NOTREACHED() << "ubrk_open failed for type " << break_type
|
||||
<< " with error " << status;
|
||||
}
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Move the iterator to the beginning of the string.
|
||||
ubrk_first(static_cast<UBreakIterator*>(iter_));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BreakIterator::Advance() {
|
||||
int32_t pos;
|
||||
int32_t status;
|
||||
prev_ = pos_;
|
||||
switch (break_type_) {
|
||||
case BREAK_CHARACTER:
|
||||
case BREAK_WORD:
|
||||
case BREAK_LINE:
|
||||
case BREAK_SENTENCE:
|
||||
case RULE_BASED:
|
||||
pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
|
||||
if (pos == UBRK_DONE) {
|
||||
pos_ = npos;
|
||||
return false;
|
||||
}
|
||||
pos_ = static_cast<size_t>(pos);
|
||||
return true;
|
||||
case BREAK_NEWLINE:
|
||||
do {
|
||||
pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
|
||||
if (pos == UBRK_DONE)
|
||||
break;
|
||||
pos_ = static_cast<size_t>(pos);
|
||||
status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
|
||||
} while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
|
||||
if (pos == UBRK_DONE && prev_ == pos_) {
|
||||
pos_ = npos;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
NOTREACHED() << "invalid break_type_";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool BreakIterator::SetText(const base::char16* text, const size_t length) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ubrk_setText(static_cast<UBreakIterator*>(iter_),
|
||||
text, length, &status);
|
||||
pos_ = 0; // implicit when ubrk_setText is done
|
||||
prev_ = npos;
|
||||
if (U_FAILURE(status)) {
|
||||
NOTREACHED() << "ubrk_setText failed";
|
||||
return false;
|
||||
}
|
||||
string_ = StringPiece16(text, length);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BreakIterator::IsWord() const {
|
||||
return GetWordBreakStatus() == IS_WORD_BREAK;
|
||||
}
|
||||
|
||||
BreakIterator::WordBreakStatus BreakIterator::GetWordBreakStatus() const {
|
||||
int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
|
||||
if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED)
|
||||
return IS_LINE_OR_CHAR_BREAK;
|
||||
// In ICU 60, trying to advance past the end of the text does not change
|
||||
// |status| so that |pos_| has to be checked as well as |status|.
|
||||
// See http://bugs.icu-project.org/trac/ticket/13447 .
|
||||
return (status == UBRK_WORD_NONE || pos_ == npos) ? IS_SKIPPABLE_WORD
|
||||
: IS_WORD_BREAK;
|
||||
}
|
||||
|
||||
bool BreakIterator::IsEndOfWord(size_t position) const {
|
||||
if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED)
|
||||
return false;
|
||||
|
||||
UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
|
||||
UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
|
||||
int32_t status = ubrk_getRuleStatus(iter);
|
||||
return (!!boundary && status != UBRK_WORD_NONE);
|
||||
}
|
||||
|
||||
bool BreakIterator::IsStartOfWord(size_t position) const {
|
||||
if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED)
|
||||
return false;
|
||||
|
||||
UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
|
||||
UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
|
||||
ubrk_next(iter);
|
||||
int32_t next_status = ubrk_getRuleStatus(iter);
|
||||
return (!!boundary && next_status != UBRK_WORD_NONE);
|
||||
}
|
||||
|
||||
bool BreakIterator::IsSentenceBoundary(size_t position) const {
|
||||
if (break_type_ != BREAK_SENTENCE && break_type_ != RULE_BASED)
|
||||
return false;
|
||||
|
||||
UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
|
||||
return !!ubrk_isBoundary(iter, static_cast<int32_t>(position));
|
||||
}
|
||||
|
||||
bool BreakIterator::IsGraphemeBoundary(size_t position) const {
|
||||
if (break_type_ != BREAK_CHARACTER)
|
||||
return false;
|
||||
|
||||
UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
|
||||
return !!ubrk_isBoundary(iter, static_cast<int32_t>(position));
|
||||
}
|
||||
|
||||
string16 BreakIterator::GetString() const {
|
||||
return GetStringPiece().as_string();
|
||||
}
|
||||
|
||||
StringPiece16 BreakIterator::GetStringPiece() const {
|
||||
DCHECK(prev_ != npos && pos_ != npos);
|
||||
return string_.substr(prev_, pos_ - prev_);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
195
TMessagesProj/jni/voip/webrtc/base/i18n/break_iterator.h
Normal file
195
TMessagesProj/jni/voip/webrtc/base/i18n/break_iterator.h
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_BREAK_ITERATOR_H_
|
||||
#define BASE_I18N_BREAK_ITERATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
// The BreakIterator class iterates through the words, word breaks, and
|
||||
// line breaks in a UTF-16 string.
|
||||
//
|
||||
// It provides several modes, BREAK_WORD, BREAK_LINE, BREAK_NEWLINE, and
|
||||
// BREAK_SENTENCE which modify how characters are aggregated into the returned
|
||||
// string.
|
||||
//
|
||||
// Under BREAK_WORD mode, once a word is encountered any non-word
|
||||
// characters are not included in the returned string (e.g. in the
|
||||
// UTF-16 equivalent of the string " foo bar! ", the word breaks are at
|
||||
// the periods in ". .foo. .bar.!. .").
|
||||
// Note that Chinese/Japanese/Thai do not use spaces between words so that
|
||||
// boundaries can fall in the middle of a continuous run of non-space /
|
||||
// non-punctuation characters.
|
||||
//
|
||||
// Under BREAK_LINE mode, once a line breaking opportunity is encountered,
|
||||
// any non-word characters are included in the returned string, breaking
|
||||
// only when a space-equivalent character or a line breaking opportunity
|
||||
// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ",
|
||||
// the breaks are at the periods in ". .foo .bar! .").
|
||||
//
|
||||
// Note that lines can be broken at any character/syllable/grapheme cluster
|
||||
// boundary in Chinese/Japanese/Korean and at word boundaries in Thai
|
||||
// (Thai does not use spaces between words). Therefore, this is NOT the same
|
||||
// as breaking only at space-equivalent characters where its former
|
||||
// name (BREAK_SPACE) implied.
|
||||
//
|
||||
// Under BREAK_NEWLINE mode, all characters are included in the returned
|
||||
// string, breaking only when a newline-equivalent character is encountered
|
||||
// (eg. in the UTF-16 equivalent of the string "foo\nbar!\n\n", the line
|
||||
// breaks are at the periods in ".foo\n.bar\n.\n.").
|
||||
//
|
||||
// Under BREAK_SENTENCE mode, all characters are included in the returned
|
||||
// string, breaking only on sentence boundaries defined in "Unicode Standard
|
||||
// Annex #29: Text Segmentation." Whitespace immediately following the sentence
|
||||
// is also included. For example, in the UTF-16 equivalent of the string
|
||||
// "foo bar! baz qux?" the breaks are at the periods in ".foo bar! .baz quz?."
|
||||
//
|
||||
// To extract the words from a string, move a BREAK_WORD BreakIterator
|
||||
// through the string and test whether IsWord() is true. E.g.,
|
||||
// BreakIterator iter(str, BreakIterator::BREAK_WORD);
|
||||
// if (!iter.Init())
|
||||
// return false;
|
||||
// while (iter.Advance()) {
|
||||
// if (iter.IsWord()) {
|
||||
// // Region [iter.prev(), iter.pos()) contains a word.
|
||||
// VLOG(1) << "word: " << iter.GetString();
|
||||
// }
|
||||
// }
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
class BASE_I18N_EXPORT BreakIterator {
|
||||
public:
|
||||
enum BreakType {
|
||||
BREAK_WORD,
|
||||
BREAK_LINE,
|
||||
// TODO(jshin): Remove this after reviewing call sites.
|
||||
// If call sites really need break only on space-like characters
|
||||
// implement it separately.
|
||||
BREAK_SPACE = BREAK_LINE,
|
||||
BREAK_NEWLINE,
|
||||
BREAK_CHARACTER,
|
||||
// But don't remove this one!
|
||||
RULE_BASED,
|
||||
BREAK_SENTENCE,
|
||||
};
|
||||
|
||||
enum WordBreakStatus {
|
||||
// The end of text that the iterator recognizes as word characters.
|
||||
// Non-word characters are things like punctuation and spaces.
|
||||
IS_WORD_BREAK,
|
||||
// Characters that the iterator can skip past, such as punctuation,
|
||||
// whitespace, and, if using RULE_BASED mode, characters from another
|
||||
// character set.
|
||||
IS_SKIPPABLE_WORD,
|
||||
// Only used if not in BREAK_WORD or RULE_BASED mode. This is returned for
|
||||
// newlines, line breaks, and character breaks.
|
||||
IS_LINE_OR_CHAR_BREAK
|
||||
};
|
||||
|
||||
// Requires |str| to live as long as the BreakIterator does.
|
||||
BreakIterator(const StringPiece16& str, BreakType break_type);
|
||||
// Make a rule-based iterator. BreakType == RULE_BASED is implied.
|
||||
// TODO(andrewhayden): This signature could easily be misinterpreted as
|
||||
// "(const string16& str, const string16& locale)". We should do something
|
||||
// better.
|
||||
BreakIterator(const StringPiece16& str, const string16& rules);
|
||||
~BreakIterator();
|
||||
|
||||
// Init() must be called before any of the iterators are valid.
|
||||
// Returns false if ICU failed to initialize.
|
||||
bool Init();
|
||||
|
||||
// Advance to the next break. Returns false if we've run past the end of
|
||||
// the string. (Note that the very last "break" is after the final
|
||||
// character in the string, and when we advance to that position it's the
|
||||
// last time Advance() returns true.)
|
||||
bool Advance();
|
||||
|
||||
// Updates the text used by the iterator, resetting the iterator as if
|
||||
// if Init() had been called again. Any old state is lost. Returns true
|
||||
// unless there is an error setting the text.
|
||||
bool SetText(const base::char16* text, const size_t length);
|
||||
|
||||
// Under BREAK_WORD mode, returns true if the break we just hit is the
|
||||
// end of a word. (Otherwise, the break iterator just skipped over e.g.
|
||||
// whitespace or punctuation.) Under BREAK_LINE and BREAK_NEWLINE modes,
|
||||
// this distinction doesn't apply and it always returns false.
|
||||
bool IsWord() const;
|
||||
|
||||
// Under BREAK_WORD mode:
|
||||
// - Returns IS_SKIPPABLE_WORD if non-word characters, such as punctuation or
|
||||
// spaces, are found.
|
||||
// - Returns IS_WORD_BREAK if the break we just hit is the end of a sequence
|
||||
// of word characters.
|
||||
// Under RULE_BASED mode:
|
||||
// - Returns IS_SKIPPABLE_WORD if characters outside the rules' character set
|
||||
// or non-word characters, such as punctuation or spaces, are found.
|
||||
// - Returns IS_WORD_BREAK if the break we just hit is the end of a sequence
|
||||
// of word characters that are in the rules' character set.
|
||||
// Not under BREAK_WORD or RULE_BASED mode:
|
||||
// - Returns IS_LINE_OR_CHAR_BREAK.
|
||||
BreakIterator::WordBreakStatus GetWordBreakStatus() const;
|
||||
|
||||
// Under BREAK_WORD mode, returns true if |position| is at the end of word or
|
||||
// at the start of word. It always returns false under modes that are not
|
||||
// BREAK_WORD or RULE_BASED.
|
||||
bool IsEndOfWord(size_t position) const;
|
||||
bool IsStartOfWord(size_t position) const;
|
||||
|
||||
// Under BREAK_SENTENCE mode, returns true if |position| is at a sentence
|
||||
// boundary. It always returns false under modes that are not BREAK_SENTENCE
|
||||
// or RULE_BASED.
|
||||
bool IsSentenceBoundary(size_t position) const;
|
||||
|
||||
// Under BREAK_CHARACTER mode, returns whether |position| is a Unicode
|
||||
// grapheme boundary.
|
||||
bool IsGraphemeBoundary(size_t position) const;
|
||||
|
||||
// Returns the string between prev() and pos().
|
||||
// Advance() must have been called successfully at least once for pos() to
|
||||
// have advanced to somewhere useful.
|
||||
string16 GetString() const;
|
||||
|
||||
StringPiece16 GetStringPiece() const;
|
||||
|
||||
// Returns the value of pos() returned before Advance() was last called.
|
||||
size_t prev() const { return prev_; }
|
||||
|
||||
// Returns the current break position within the string,
|
||||
// or BreakIterator::npos when done.
|
||||
size_t pos() const { return pos_; }
|
||||
|
||||
private:
|
||||
// ICU iterator, avoiding ICU ubrk.h dependence.
|
||||
// This is actually an ICU UBreakiterator* type, which turns out to be
|
||||
// a typedef for a void* in the ICU headers. Using void* directly prevents
|
||||
// callers from needing access to the ICU public headers directory.
|
||||
void* iter_;
|
||||
|
||||
// The string we're iterating over. Can be changed with SetText(...)
|
||||
StringPiece16 string_;
|
||||
|
||||
// Rules for our iterator. Mutually exclusive with break_type_.
|
||||
const string16 rules_;
|
||||
|
||||
// The breaking style (word/space/newline). Mutually exclusive with rules_
|
||||
BreakType break_type_;
|
||||
|
||||
// Previous and current iterator positions.
|
||||
size_t prev_, pos_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(BreakIterator);
|
||||
};
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_BREAK_ITERATOR_H_
|
||||
|
|
@ -0,0 +1,465 @@
|
|||
// Copyright 2014 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Create a state machine for validating UTF-8. The algorithm in brief:
|
||||
// 1. Convert the complete unicode range of code points, except for the
|
||||
// surrogate code points, to an ordered array of sequences of bytes in
|
||||
// UTF-8.
|
||||
// 2. Convert individual bytes to ranges, starting from the right of each byte
|
||||
// sequence. For each range, ensure the bytes on the left and the ranges
|
||||
// on the right are the identical.
|
||||
// 3. Convert the resulting list of ranges into a state machine, collapsing
|
||||
// identical states.
|
||||
// 4. Convert the state machine to an array of bytes.
|
||||
// 5. Output as a C++ file.
|
||||
//
|
||||
// To use:
|
||||
// $ ninja -C out/Release build_utf8_validator_tables
|
||||
// $ out/Release/build_utf8_validator_tables
|
||||
// --output=base/i18n/utf8_validator_tables.cc
|
||||
// $ git add base/i18n/utf8_validator_tables.cc
|
||||
//
|
||||
// Because the table is not expected to ever change, it is checked into the
|
||||
// repository rather than being regenerated at build time.
|
||||
//
|
||||
// This code uses type uint8_t throughout to represent bytes, to avoid
|
||||
// signed/unsigned char confusion.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/command_line.h"
|
||||
#include "base/files/file_path.h"
|
||||
#include "base/files/file_util.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/numerics/safe_conversions.h"
|
||||
#include "base/stl_util.h"
|
||||
#include "base/strings/stringprintf.h"
|
||||
#include "third_party/icu/source/common/unicode/utf8.h"
|
||||
|
||||
namespace {
|
||||
|
||||
const char kHelpText[] =
|
||||
"Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n";
|
||||
|
||||
const char kProlog[] =
|
||||
"// Copyright 2013 The Chromium Authors. All rights reserved.\n"
|
||||
"// Use of this source code is governed by a BSD-style license that can "
|
||||
"be\n"
|
||||
"// found in the LICENSE file.\n"
|
||||
"\n"
|
||||
"// This file is auto-generated by build_utf8_validator_tables.\n"
|
||||
"// DO NOT EDIT.\n"
|
||||
"\n"
|
||||
"#include \"base/i18n/utf8_validator_tables.h\"\n"
|
||||
"\n"
|
||||
"namespace base {\n"
|
||||
"namespace internal {\n"
|
||||
"\n"
|
||||
"const uint8_t kUtf8ValidatorTables[] = {\n";
|
||||
|
||||
const char kEpilog[] =
|
||||
"};\n"
|
||||
"\n"
|
||||
"const size_t kUtf8ValidatorTablesSize = "
|
||||
"base::size(kUtf8ValidatorTables);\n"
|
||||
"\n"
|
||||
"} // namespace internal\n"
|
||||
"} // namespace base\n";
|
||||
|
||||
// Ranges are inclusive at both ends--they represent [from, to]
|
||||
class Range {
|
||||
public:
|
||||
// Ranges always start with just one byte.
|
||||
explicit Range(uint8_t value) : from_(value), to_(value) {}
|
||||
|
||||
// Range objects are copyable and assignable to be used in STL
|
||||
// containers. Since they only contain non-pointer POD types, the default copy
|
||||
// constructor, assignment operator and destructor will work.
|
||||
|
||||
// Add a byte to the range. We intentionally only support adding a byte at the
|
||||
// end, since that is the only operation the code needs.
|
||||
void AddByte(uint8_t to) {
|
||||
CHECK(to == to_ + 1);
|
||||
to_ = to;
|
||||
}
|
||||
|
||||
uint8_t from() const { return from_; }
|
||||
uint8_t to() const { return to_; }
|
||||
|
||||
bool operator<(const Range& rhs) const {
|
||||
return (from() < rhs.from() || (from() == rhs.from() && to() < rhs.to()));
|
||||
}
|
||||
|
||||
bool operator==(const Range& rhs) const {
|
||||
return from() == rhs.from() && to() == rhs.to();
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t from_;
|
||||
uint8_t to_;
|
||||
};
|
||||
|
||||
// A vector of Ranges is like a simple regular expression--it corresponds to
|
||||
// a set of strings of the same length that have bytes in each position in
|
||||
// the appropriate range.
|
||||
typedef std::vector<Range> StringSet;
|
||||
|
||||
// A UTF-8 "character" is represented by a sequence of bytes.
|
||||
typedef std::vector<uint8_t> Character;
|
||||
|
||||
// In the second stage of the algorithm, we want to convert a large list of
|
||||
// Characters into a small list of StringSets.
|
||||
struct Pair {
|
||||
Character character;
|
||||
StringSet set;
|
||||
};
|
||||
|
||||
typedef std::vector<Pair> PairVector;
|
||||
|
||||
// A class to print a table of numbers in the same style as clang-format.
|
||||
class TablePrinter {
|
||||
public:
|
||||
explicit TablePrinter(FILE* stream)
|
||||
: stream_(stream), values_on_this_line_(0), current_offset_(0) {}
|
||||
|
||||
void PrintValue(uint8_t value) {
|
||||
if (values_on_this_line_ == 0) {
|
||||
fputs(" ", stream_);
|
||||
} else if (values_on_this_line_ == kMaxValuesPerLine) {
|
||||
fprintf(stream_, " // 0x%02x\n ", current_offset_);
|
||||
values_on_this_line_ = 0;
|
||||
}
|
||||
fprintf(stream_, " 0x%02x,", static_cast<int>(value));
|
||||
++values_on_this_line_;
|
||||
++current_offset_;
|
||||
}
|
||||
|
||||
void NewLine() {
|
||||
while (values_on_this_line_ < kMaxValuesPerLine) {
|
||||
fputs(" ", stream_);
|
||||
++values_on_this_line_;
|
||||
}
|
||||
fprintf(stream_, " // 0x%02x\n", current_offset_);
|
||||
values_on_this_line_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
// stdio stream. Not owned.
|
||||
FILE* stream_;
|
||||
|
||||
// Number of values so far printed on this line.
|
||||
int values_on_this_line_;
|
||||
|
||||
// Total values printed so far.
|
||||
int current_offset_;
|
||||
|
||||
static const int kMaxValuesPerLine = 8;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(TablePrinter);
|
||||
};
|
||||
|
||||
// Start by filling a PairVector with characters. The resulting vector goes from
|
||||
// "\x00" to "\xf4\x8f\xbf\xbf".
|
||||
PairVector InitializeCharacters() {
|
||||
PairVector vector;
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (i >= 0xD800 && i < 0xE000) {
|
||||
// Surrogate codepoints are not permitted. Non-character code points are
|
||||
// explicitly permitted.
|
||||
continue;
|
||||
}
|
||||
uint8_t bytes[4];
|
||||
unsigned int offset = 0;
|
||||
UBool is_error = false;
|
||||
U8_APPEND(bytes, offset, base::size(bytes), i, is_error);
|
||||
DCHECK(!is_error);
|
||||
DCHECK_GT(offset, 0u);
|
||||
DCHECK_LE(offset, base::size(bytes));
|
||||
Pair pair = {Character(bytes, bytes + offset), StringSet()};
|
||||
vector.push_back(pair);
|
||||
}
|
||||
return vector;
|
||||
}
|
||||
|
||||
// Construct a new Pair from |character| and the concatenation of |new_range|
|
||||
// and |existing_set|, and append it to |pairs|.
|
||||
void ConstructPairAndAppend(const Character& character,
|
||||
const Range& new_range,
|
||||
const StringSet& existing_set,
|
||||
PairVector* pairs) {
|
||||
Pair new_pair = {character, StringSet(1, new_range)};
|
||||
new_pair.set.insert(
|
||||
new_pair.set.end(), existing_set.begin(), existing_set.end());
|
||||
pairs->push_back(new_pair);
|
||||
}
|
||||
|
||||
// Each pass over the PairVector strips one byte off the right-hand-side of the
|
||||
// characters and adds a range to the set on the right. For example, the first
|
||||
// pass converts the range from "\xe0\xa0\x80" to "\xe0\xa0\xbf" to ("\xe0\xa0",
|
||||
// [\x80-\xbf]), then the second pass converts the range from ("\xe0\xa0",
|
||||
// [\x80-\xbf]) to ("\xe0\xbf", [\x80-\xbf]) to ("\xe0",
|
||||
// [\xa0-\xbf][\x80-\xbf]).
|
||||
void MoveRightMostCharToSet(PairVector* pairs) {
|
||||
PairVector new_pairs;
|
||||
PairVector::const_iterator it = pairs->begin();
|
||||
while (it != pairs->end() && it->character.empty()) {
|
||||
new_pairs.push_back(*it);
|
||||
++it;
|
||||
}
|
||||
CHECK(it != pairs->end());
|
||||
Character unconverted_bytes(it->character.begin(), it->character.end() - 1);
|
||||
Range new_range(it->character.back());
|
||||
StringSet converted = it->set;
|
||||
++it;
|
||||
while (it != pairs->end()) {
|
||||
const Pair& current_pair = *it++;
|
||||
if (current_pair.character.size() == unconverted_bytes.size() + 1 &&
|
||||
std::equal(unconverted_bytes.begin(),
|
||||
unconverted_bytes.end(),
|
||||
current_pair.character.begin()) &&
|
||||
converted == current_pair.set) {
|
||||
// The particular set of UTF-8 codepoints we are validating guarantees
|
||||
// that each byte range will be contiguous. This would not necessarily be
|
||||
// true for an arbitrary set of UTF-8 codepoints.
|
||||
DCHECK_EQ(new_range.to() + 1, current_pair.character.back());
|
||||
new_range.AddByte(current_pair.character.back());
|
||||
continue;
|
||||
}
|
||||
ConstructPairAndAppend(unconverted_bytes, new_range, converted, &new_pairs);
|
||||
unconverted_bytes = Character(current_pair.character.begin(),
|
||||
current_pair.character.end() - 1);
|
||||
new_range = Range(current_pair.character.back());
|
||||
converted = current_pair.set;
|
||||
}
|
||||
ConstructPairAndAppend(unconverted_bytes, new_range, converted, &new_pairs);
|
||||
new_pairs.swap(*pairs);
|
||||
}
|
||||
|
||||
void MoveAllCharsToSets(PairVector* pairs) {
|
||||
// Since each pass of the function moves one character, and UTF-8 sequences
|
||||
// are at most 4 characters long, this simply runs the algorithm four times.
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
MoveRightMostCharToSet(pairs);
|
||||
}
|
||||
#if DCHECK_IS_ON()
|
||||
for (PairVector::const_iterator it = pairs->begin(); it != pairs->end();
|
||||
++it) {
|
||||
DCHECK(it->character.empty());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Logs the generated string sets in regular-expression style, ie. [\x00-\x7f],
|
||||
// [\xc2-\xdf][\x80-\xbf], etc. This can be a useful sanity-check that the
|
||||
// algorithm is working. Use the command-line option
|
||||
// --vmodule=build_utf8_validator_tables=1 to see this output.
|
||||
void LogStringSets(const PairVector& pairs) {
|
||||
for (const auto& pair_it : pairs) {
|
||||
std::string set_as_string;
|
||||
for (auto set_it = pair_it.set.begin(); set_it != pair_it.set.end();
|
||||
++set_it) {
|
||||
set_as_string += base::StringPrintf("[\\x%02x-\\x%02x]",
|
||||
static_cast<int>(set_it->from()),
|
||||
static_cast<int>(set_it->to()));
|
||||
}
|
||||
VLOG(1) << set_as_string;
|
||||
}
|
||||
}
|
||||
|
||||
// A single state in the state machine is represented by a sorted vector of
|
||||
// start bytes and target states. All input bytes in the range between the start
|
||||
// byte and the next entry in the vector (or 0xFF) result in a transition to the
|
||||
// target state.
|
||||
struct StateRange {
|
||||
uint8_t from;
|
||||
uint8_t target_state;
|
||||
};
|
||||
|
||||
typedef std::vector<StateRange> State;
|
||||
|
||||
// Generates a state where all bytes go to state 1 (invalid). This is also used
|
||||
// as an initialiser for other states (since bytes from outside the desired
|
||||
// range are invalid).
|
||||
State GenerateInvalidState() {
|
||||
const StateRange range = {0, 1};
|
||||
return State(1, range);
|
||||
}
|
||||
|
||||
// A map from a state (ie. a set of strings which will match from this state) to
|
||||
// a number (which is an index into the array of states).
|
||||
typedef std::map<StringSet, uint8_t> StateMap;
|
||||
|
||||
// Create a new state corresponding to |set|, add it |states| and |state_map|
|
||||
// and return the index it was given in |states|.
|
||||
uint8_t MakeState(const StringSet& set,
|
||||
std::vector<State>* states,
|
||||
StateMap* state_map) {
|
||||
DCHECK(!set.empty());
|
||||
const Range& range = set.front();
|
||||
const StringSet rest(set.begin() + 1, set.end());
|
||||
const StateMap::const_iterator where = state_map->find(rest);
|
||||
const uint8_t target_state = where == state_map->end()
|
||||
? MakeState(rest, states, state_map)
|
||||
: where->second;
|
||||
DCHECK_LT(0, range.from());
|
||||
DCHECK_LT(range.to(), 0xFF);
|
||||
const StateRange new_state_initializer[] = {
|
||||
{0, 1},
|
||||
{range.from(), target_state},
|
||||
{static_cast<uint8_t>(range.to() + 1), 1}};
|
||||
states->push_back(
|
||||
State(new_state_initializer,
|
||||
new_state_initializer + base::size(new_state_initializer)));
|
||||
const uint8_t new_state_number =
|
||||
base::checked_cast<uint8_t>(states->size() - 1);
|
||||
CHECK(state_map->insert(std::make_pair(set, new_state_number)).second);
|
||||
return new_state_number;
|
||||
}
|
||||
|
||||
std::vector<State> GenerateStates(const PairVector& pairs) {
|
||||
// States 0 and 1 are the initial/valid state and invalid state, respectively.
|
||||
std::vector<State> states(2, GenerateInvalidState());
|
||||
StateMap state_map;
|
||||
state_map.insert(std::make_pair(StringSet(), 0));
|
||||
for (auto it = pairs.begin(); it != pairs.end(); ++it) {
|
||||
DCHECK(it->character.empty());
|
||||
DCHECK(!it->set.empty());
|
||||
const Range& range = it->set.front();
|
||||
const StringSet rest(it->set.begin() + 1, it->set.end());
|
||||
const StateMap::const_iterator where = state_map.find(rest);
|
||||
const uint8_t target_state = where == state_map.end()
|
||||
? MakeState(rest, &states, &state_map)
|
||||
: where->second;
|
||||
if (states[0].back().from == range.from()) {
|
||||
DCHECK_EQ(1, states[0].back().target_state);
|
||||
states[0].back().target_state = target_state;
|
||||
DCHECK_LT(range.to(), 0xFF);
|
||||
const StateRange new_range = {static_cast<uint8_t>(range.to() + 1), 1};
|
||||
states[0].push_back(new_range);
|
||||
} else {
|
||||
DCHECK_LT(range.to(), 0xFF);
|
||||
const StateRange new_range_initializer[] = {
|
||||
{range.from(), target_state},
|
||||
{static_cast<uint8_t>(range.to() + 1), 1}};
|
||||
states[0].insert(
|
||||
states[0].end(), new_range_initializer,
|
||||
new_range_initializer + base::size(new_range_initializer));
|
||||
}
|
||||
}
|
||||
return states;
|
||||
}
|
||||
|
||||
// Output the generated states as a C++ table. Two tricks are used to compact
|
||||
// the table: each state in the table starts with a shift value which indicates
|
||||
// how many bits we can discard from the right-hand-side of the byte before
|
||||
// doing the table lookup. Secondly, only the state-transitions for bytes
|
||||
// with the top-bit set are included in the table; bytes without the top-bit set
|
||||
// are just ASCII and are handled directly by the code.
|
||||
void PrintStates(const std::vector<State>& states, FILE* stream) {
|
||||
// First calculate the start-offset of each state. This allows the state
|
||||
// machine to jump directly to the correct offset, avoiding an extra
|
||||
// indirection. State 0 starts at offset 0.
|
||||
std::vector<uint8_t> state_offset(1, 0);
|
||||
std::vector<uint8_t> shifts;
|
||||
uint8_t pos = 0;
|
||||
|
||||
for (const auto& state_it : states) {
|
||||
// We want to set |shift| to the (0-based) index of the least-significant
|
||||
// set bit in any of the ranges for this state, since this tells us how many
|
||||
// bits we can discard and still determine what range a byte lies in. Sadly
|
||||
// it appears that ffs() is not portable, so we do it clumsily.
|
||||
uint8_t shift = 7;
|
||||
for (auto range_it = state_it.begin(); range_it != state_it.end();
|
||||
++range_it) {
|
||||
while (shift > 0 && range_it->from % (1 << shift) != 0) {
|
||||
--shift;
|
||||
}
|
||||
}
|
||||
shifts.push_back(shift);
|
||||
pos += 1 + (1 << (7 - shift));
|
||||
state_offset.push_back(pos);
|
||||
}
|
||||
|
||||
DCHECK_EQ(129, state_offset[1]);
|
||||
|
||||
fputs(kProlog, stream);
|
||||
TablePrinter table_printer(stream);
|
||||
|
||||
for (uint8_t state_index = 0; state_index < states.size(); ++state_index) {
|
||||
const uint8_t shift = shifts[state_index];
|
||||
uint8_t next_range = 0;
|
||||
uint8_t target_state = 1;
|
||||
fprintf(stream,
|
||||
" // State %d, offset 0x%02x\n",
|
||||
static_cast<int>(state_index),
|
||||
static_cast<int>(state_offset[state_index]));
|
||||
table_printer.PrintValue(shift);
|
||||
for (int i = 0; i < 0x100; i += (1 << shift)) {
|
||||
if (next_range < states[state_index].size() &&
|
||||
states[state_index][next_range].from == i) {
|
||||
target_state = states[state_index][next_range].target_state;
|
||||
++next_range;
|
||||
}
|
||||
if (i >= 0x80) {
|
||||
table_printer.PrintValue(state_offset[target_state]);
|
||||
}
|
||||
}
|
||||
table_printer.NewLine();
|
||||
}
|
||||
|
||||
fputs(kEpilog, stream);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
base::CommandLine::Init(argc, argv);
|
||||
logging::LoggingSettings settings;
|
||||
settings.logging_dest =
|
||||
logging::LOG_TO_SYSTEM_DEBUG_LOG | logging::LOG_TO_STDERR;
|
||||
logging::InitLogging(settings);
|
||||
if (base::CommandLine::ForCurrentProcess()->HasSwitch("help")) {
|
||||
fwrite(kHelpText, 1, base::size(kHelpText), stdout);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
base::FilePath filename =
|
||||
base::CommandLine::ForCurrentProcess()->GetSwitchValuePath("output");
|
||||
|
||||
FILE* output = stdout;
|
||||
if (!filename.empty()) {
|
||||
output = base::OpenFile(filename, "wb");
|
||||
if (!output)
|
||||
PLOG(FATAL) << "Couldn't open '" << filename.AsUTF8Unsafe()
|
||||
<< "' for writing";
|
||||
}
|
||||
|
||||
// Step 1: Enumerate the characters
|
||||
PairVector pairs = InitializeCharacters();
|
||||
// Step 2: Convert to sets.
|
||||
MoveAllCharsToSets(&pairs);
|
||||
if (VLOG_IS_ON(1)) {
|
||||
LogStringSets(pairs);
|
||||
}
|
||||
// Step 3: Generate states.
|
||||
std::vector<State> states = GenerateStates(pairs);
|
||||
// Step 4/5: Print output
|
||||
PrintStates(states, output);
|
||||
|
||||
if (!filename.empty()) {
|
||||
if (!base::CloseFile(output))
|
||||
PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe()
|
||||
<< "'";
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
90
TMessagesProj/jni/voip/webrtc/base/i18n/case_conversion.cc
Normal file
90
TMessagesProj/jni/voip/webrtc/base/i18n/case_conversion.cc
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/case_conversion.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/numerics/safe_conversions.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "third_party/icu/source/common/unicode/uchar.h"
|
||||
#include "third_party/icu/source/common/unicode/unistr.h"
|
||||
#include "third_party/icu/source/common/unicode/ustring.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
namespace {
|
||||
|
||||
// Provides a uniform interface for upper/lower/folding which take take
|
||||
// slightly varying parameters.
|
||||
typedef int32_t (*CaseMapperFunction)(UChar* dest, int32_t dest_capacity,
|
||||
const UChar* src, int32_t src_length,
|
||||
UErrorCode* error);
|
||||
|
||||
int32_t ToUpperMapper(UChar* dest, int32_t dest_capacity,
|
||||
const UChar* src, int32_t src_length,
|
||||
UErrorCode* error) {
|
||||
// Use default locale.
|
||||
return u_strToUpper(dest, dest_capacity, src, src_length, nullptr, error);
|
||||
}
|
||||
|
||||
int32_t ToLowerMapper(UChar* dest, int32_t dest_capacity,
|
||||
const UChar* src, int32_t src_length,
|
||||
UErrorCode* error) {
|
||||
// Use default locale.
|
||||
return u_strToLower(dest, dest_capacity, src, src_length, nullptr, error);
|
||||
}
|
||||
|
||||
int32_t FoldCaseMapper(UChar* dest, int32_t dest_capacity,
|
||||
const UChar* src, int32_t src_length,
|
||||
UErrorCode* error) {
|
||||
return u_strFoldCase(dest, dest_capacity, src, src_length,
|
||||
U_FOLD_CASE_DEFAULT, error);
|
||||
}
|
||||
|
||||
// Provides similar functionality as UnicodeString::caseMap but on string16.
|
||||
string16 CaseMap(StringPiece16 string, CaseMapperFunction case_mapper) {
|
||||
string16 dest;
|
||||
if (string.empty())
|
||||
return dest;
|
||||
|
||||
// Provide an initial guess that the string length won't change. The typical
|
||||
// strings we use will very rarely change length in this process, so don't
|
||||
// optimize for that case.
|
||||
dest.resize(string.size());
|
||||
|
||||
UErrorCode error;
|
||||
do {
|
||||
error = U_ZERO_ERROR;
|
||||
|
||||
// ICU won't terminate the string if there's not enough room for the null
|
||||
// terminator, but will otherwise. So we don't need to save room for that.
|
||||
// Don't use WriteInto, which assumes null terminators.
|
||||
int32_t new_length = case_mapper(
|
||||
&dest[0], saturated_cast<int32_t>(dest.size()),
|
||||
string.data(), saturated_cast<int32_t>(string.size()),
|
||||
&error);
|
||||
dest.resize(new_length);
|
||||
} while (error == U_BUFFER_OVERFLOW_ERROR);
|
||||
return dest;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
string16 ToLower(StringPiece16 string) {
|
||||
return CaseMap(string, &ToLowerMapper);
|
||||
}
|
||||
|
||||
string16 ToUpper(StringPiece16 string) {
|
||||
return CaseMap(string, &ToUpperMapper);
|
||||
}
|
||||
|
||||
string16 FoldCase(StringPiece16 string) {
|
||||
return CaseMap(string, &FoldCaseMapper);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
48
TMessagesProj/jni/voip/webrtc/base/i18n/case_conversion.h
Normal file
48
TMessagesProj/jni/voip/webrtc/base/i18n/case_conversion.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_CASE_CONVERSION_H_
|
||||
#define BASE_I18N_CASE_CONVERSION_H_
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// UNICODE CASE-HANDLING ADVICE
|
||||
//
|
||||
// In English it's always safe to convert to upper-case or lower-case text
|
||||
// and get a good answer. But some languages have rules specific to those
|
||||
// locales. One example is the Turkish I:
|
||||
// http://www.i18nguy.com/unicode/turkish-i18n.html
|
||||
//
|
||||
// ToLower/ToUpper use the current ICU locale which will take into account
|
||||
// the user language preference. Use this when dealing with user typing.
|
||||
//
|
||||
// FoldCase canonicalizes to a standardized form independent of the current
|
||||
// locale. Use this when comparing general Unicode strings that don't
|
||||
// necessarily belong in the user's current locale (like commands, protocol
|
||||
// names, other strings from the web) for case-insensitive equality.
|
||||
//
|
||||
// Note that case conversions will change the length of the string in some
|
||||
// not-uncommon cases. Never assume that the output is the same length as
|
||||
// the input.
|
||||
|
||||
// Returns the lower case equivalent of string. Uses ICU's current locale.
|
||||
BASE_I18N_EXPORT string16 ToLower(StringPiece16 string);
|
||||
|
||||
// Returns the upper case equivalent of string. Uses ICU's current locale.
|
||||
BASE_I18N_EXPORT string16 ToUpper(StringPiece16 string);
|
||||
|
||||
// Convert the given string to a canonical case, independent of the current
|
||||
// locale. For ASCII the canonical form is lower case.
|
||||
// See http://unicode.org/faq/casemap_charprop.html#2
|
||||
BASE_I18N_EXPORT string16 FoldCase(StringPiece16 string);
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_CASE_CONVERSION_H_
|
||||
152
TMessagesProj/jni/voip/webrtc/base/i18n/char_iterator.cc
Normal file
152
TMessagesProj/jni/voip/webrtc/base/i18n/char_iterator.cc
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/char_iterator.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "third_party/icu/source/common/unicode/utf16.h"
|
||||
#include "third_party/icu/source/common/unicode/utf8.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// UTF8CharIterator ------------------------------------------------------------
|
||||
|
||||
UTF8CharIterator::UTF8CharIterator(const std::string* str)
|
||||
: str_(reinterpret_cast<const uint8_t*>(str->data())),
|
||||
len_(str->size()),
|
||||
array_pos_(0),
|
||||
next_pos_(0),
|
||||
char_pos_(0),
|
||||
char_(0) {
|
||||
if (len_)
|
||||
U8_NEXT(str_, next_pos_, len_, char_);
|
||||
}
|
||||
|
||||
UTF8CharIterator::~UTF8CharIterator() = default;
|
||||
|
||||
bool UTF8CharIterator::Advance() {
|
||||
if (array_pos_ >= len_)
|
||||
return false;
|
||||
|
||||
array_pos_ = next_pos_;
|
||||
char_pos_++;
|
||||
if (next_pos_ < len_)
|
||||
U8_NEXT(str_, next_pos_, len_, char_);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// UTF16CharIterator -----------------------------------------------------------
|
||||
|
||||
UTF16CharIterator::UTF16CharIterator(const string16* str)
|
||||
: UTF16CharIterator(str, 0) {}
|
||||
|
||||
UTF16CharIterator::UTF16CharIterator(const char16* str, size_t str_len)
|
||||
: UTF16CharIterator(str, str_len, 0) {}
|
||||
|
||||
UTF16CharIterator::UTF16CharIterator(UTF16CharIterator&& to_move) = default;
|
||||
|
||||
UTF16CharIterator::~UTF16CharIterator() = default;
|
||||
|
||||
UTF16CharIterator& UTF16CharIterator::operator=(UTF16CharIterator&& to_move) =
|
||||
default;
|
||||
|
||||
// static
|
||||
UTF16CharIterator UTF16CharIterator::LowerBound(const string16* str,
|
||||
size_t array_index) {
|
||||
return LowerBound(reinterpret_cast<const char16*>(str->data()), str->length(),
|
||||
array_index);
|
||||
}
|
||||
|
||||
// static
|
||||
UTF16CharIterator UTF16CharIterator::LowerBound(const char16* str,
|
||||
size_t length,
|
||||
size_t array_index) {
|
||||
DCHECK_LE(array_index, length);
|
||||
U16_SET_CP_START(str, 0, array_index);
|
||||
return UTF16CharIterator(str, length, array_index);
|
||||
}
|
||||
|
||||
// static
|
||||
UTF16CharIterator UTF16CharIterator::UpperBound(const string16* str,
|
||||
size_t array_index) {
|
||||
return UpperBound(reinterpret_cast<const char16*>(str->data()), str->length(),
|
||||
array_index);
|
||||
}
|
||||
|
||||
// static
|
||||
UTF16CharIterator UTF16CharIterator::UpperBound(const char16* str,
|
||||
size_t length,
|
||||
size_t array_index) {
|
||||
DCHECK_LE(array_index, length);
|
||||
U16_SET_CP_LIMIT(str, 0, array_index, length);
|
||||
return UTF16CharIterator(str, length, array_index);
|
||||
}
|
||||
|
||||
int32_t UTF16CharIterator::NextCodePoint() const {
|
||||
if (next_pos_ >= len_)
|
||||
return 0;
|
||||
|
||||
UChar32 c;
|
||||
U16_GET(str_, 0, next_pos_, len_, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
int32_t UTF16CharIterator::PreviousCodePoint() const {
|
||||
if (array_pos_ <= 0)
|
||||
return 0;
|
||||
|
||||
uint32_t pos = array_pos_;
|
||||
UChar32 c;
|
||||
U16_PREV(str_, 0, pos, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
bool UTF16CharIterator::Advance() {
|
||||
if (array_pos_ >= len_)
|
||||
return false;
|
||||
|
||||
array_pos_ = next_pos_;
|
||||
char_offset_++;
|
||||
if (next_pos_ < len_)
|
||||
ReadChar();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UTF16CharIterator::Rewind() {
|
||||
if (array_pos_ <= 0)
|
||||
return false;
|
||||
|
||||
next_pos_ = array_pos_;
|
||||
char_offset_--;
|
||||
U16_PREV(str_, 0, array_pos_, char_);
|
||||
return true;
|
||||
}
|
||||
|
||||
UTF16CharIterator::UTF16CharIterator(const string16* str, int32_t initial_pos)
|
||||
: UTF16CharIterator(str->data(), str->length(), initial_pos) {}
|
||||
|
||||
UTF16CharIterator::UTF16CharIterator(const char16* str,
|
||||
size_t str_len,
|
||||
int32_t initial_pos)
|
||||
: str_(str),
|
||||
len_(str_len),
|
||||
array_pos_(initial_pos),
|
||||
next_pos_(initial_pos),
|
||||
char_offset_(0),
|
||||
char_(0) {
|
||||
// This has the side-effect of advancing |next_pos_|.
|
||||
if (array_pos_ < len_)
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
void UTF16CharIterator::ReadChar() {
|
||||
// This is actually a huge macro, so is worth having in a separate function.
|
||||
U16_NEXT(str_, next_pos_, len_, char_);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
175
TMessagesProj/jni/voip/webrtc/base/i18n/char_iterator.h
Normal file
175
TMessagesProj/jni/voip/webrtc/base/i18n/char_iterator.h
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_CHAR_ITERATOR_H_
|
||||
#define BASE_I18N_CHAR_ITERATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/gtest_prod_util.h"
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
// The CharIterator classes iterate through the characters in UTF8 and
|
||||
// UTF16 strings. Example usage:
|
||||
//
|
||||
// UTF8CharIterator iter(&str);
|
||||
// while (!iter.end()) {
|
||||
// VLOG(1) << iter.get();
|
||||
// iter.Advance();
|
||||
// }
|
||||
|
||||
#if defined(OS_WIN)
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
class BASE_I18N_EXPORT UTF8CharIterator {
|
||||
public:
|
||||
// Requires |str| to live as long as the UTF8CharIterator does.
|
||||
explicit UTF8CharIterator(const std::string* str);
|
||||
~UTF8CharIterator();
|
||||
|
||||
// Return the starting array index of the current character within the
|
||||
// string.
|
||||
int32_t array_pos() const { return array_pos_; }
|
||||
|
||||
// Return the logical index of the current character, independent of the
|
||||
// number of bytes each character takes.
|
||||
int32_t char_pos() const { return char_pos_; }
|
||||
|
||||
// Return the current char.
|
||||
int32_t get() const { return char_; }
|
||||
|
||||
// Returns true if we're at the end of the string.
|
||||
bool end() const { return array_pos_ == len_; }
|
||||
|
||||
// Advance to the next actual character. Returns false if we're at the
|
||||
// end of the string.
|
||||
bool Advance();
|
||||
|
||||
private:
|
||||
// The string we're iterating over.
|
||||
const uint8_t* str_;
|
||||
|
||||
// The length of the encoded string.
|
||||
int32_t len_;
|
||||
|
||||
// Array index.
|
||||
int32_t array_pos_;
|
||||
|
||||
// The next array index.
|
||||
int32_t next_pos_;
|
||||
|
||||
// Character index.
|
||||
int32_t char_pos_;
|
||||
|
||||
// The current character.
|
||||
int32_t char_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
|
||||
};
|
||||
|
||||
class BASE_I18N_EXPORT UTF16CharIterator {
|
||||
public:
|
||||
// Requires |str| to live as long as the UTF16CharIterator does.
|
||||
explicit UTF16CharIterator(const string16* str);
|
||||
UTF16CharIterator(const char16* str, size_t str_len);
|
||||
UTF16CharIterator(UTF16CharIterator&& to_move);
|
||||
~UTF16CharIterator();
|
||||
UTF16CharIterator& operator=(UTF16CharIterator&& to_move);
|
||||
|
||||
// Returns an iterator starting on the unicode character at offset
|
||||
// |array_index| into the string, or the previous array offset if
|
||||
// |array_index| is the second half of a surrogate pair.
|
||||
static UTF16CharIterator LowerBound(const string16* str, size_t array_index);
|
||||
static UTF16CharIterator LowerBound(const char16* str,
|
||||
size_t str_len,
|
||||
size_t array_index);
|
||||
|
||||
// Returns an iterator starting on the unicode character at offset
|
||||
// |array_index| into the string, or the next offset if |array_index| is the
|
||||
// second half of a surrogate pair.
|
||||
static UTF16CharIterator UpperBound(const string16* str, size_t array_index);
|
||||
static UTF16CharIterator UpperBound(const char16* str,
|
||||
size_t str_len,
|
||||
size_t array_index);
|
||||
|
||||
// Return the starting array index of the current character within the
|
||||
// string.
|
||||
int32_t array_pos() const { return array_pos_; }
|
||||
|
||||
// Returns the offset in code points from the initial iterator position, which
|
||||
// could be negative if Rewind() is called. The initial value is always zero,
|
||||
// regardless of how the iterator is constructed.
|
||||
int32_t char_offset() const { return char_offset_; }
|
||||
|
||||
// Returns the code point at the current position.
|
||||
int32_t get() const { return char_; }
|
||||
|
||||
// Returns the code point (i.e. the full Unicode character, not half of a
|
||||
// surrogate pair) following the current one. Should not be called if end() is
|
||||
// true. If the current code point is the last one in the string, returns
|
||||
// zero.
|
||||
int32_t NextCodePoint() const;
|
||||
|
||||
// Returns the code point (i.e. the full Unicode character, not half of a
|
||||
// surrogate pair) preceding the current one. Should not be called if start()
|
||||
// is true.
|
||||
int32_t PreviousCodePoint() const;
|
||||
|
||||
// Returns true if we're at the start of the string.
|
||||
bool start() const { return array_pos_ == 0; }
|
||||
|
||||
// Returns true if we're at the end of the string.
|
||||
bool end() const { return array_pos_ == len_; }
|
||||
|
||||
// Advances to the next actual character. Returns false if we're at the
|
||||
// end of the string.
|
||||
bool Advance();
|
||||
|
||||
// Moves to the previous actual character. Returns false if we're at the start
|
||||
// of the string.
|
||||
bool Rewind();
|
||||
|
||||
private:
|
||||
UTF16CharIterator(const string16* str, int32_t initial_pos);
|
||||
UTF16CharIterator(const char16* str, size_t str_len, int32_t initial_pos);
|
||||
|
||||
// Fills in the current character we found and advances to the next
|
||||
// character, updating all flags as necessary.
|
||||
void ReadChar();
|
||||
|
||||
// The string we're iterating over.
|
||||
const char16* str_;
|
||||
|
||||
// The length of the encoded string.
|
||||
int32_t len_;
|
||||
|
||||
// Array index.
|
||||
int32_t array_pos_;
|
||||
|
||||
// The next array index.
|
||||
int32_t next_pos_;
|
||||
|
||||
// Character offset from the initial position of the iterator.
|
||||
int32_t char_offset_;
|
||||
|
||||
// The current character.
|
||||
int32_t char_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
|
||||
};
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_CHAR_ITERATOR_H_
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/character_encoding.h"
|
||||
|
||||
#include "base/macros.h"
|
||||
#include "third_party/icu/source/common/unicode/ucnv.h"
|
||||
|
||||
namespace base {
|
||||
namespace {
|
||||
|
||||
// An array of all supported canonical encoding names.
|
||||
const char* const kCanonicalEncodingNames[] = {
|
||||
"Big5", "EUC-JP", "EUC-KR", "gb18030",
|
||||
"GBK", "IBM866", "ISO-2022-JP", "ISO-8859-10",
|
||||
"ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "ISO-8859-16",
|
||||
"ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5",
|
||||
"ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-8-I",
|
||||
"KOI8-R", "KOI8-U", "macintosh", "Shift_JIS",
|
||||
"UTF-16LE", "UTF-8", "windows-1250", "windows-1251",
|
||||
"windows-1252", "windows-1253", "windows-1254", "windows-1255",
|
||||
"windows-1256", "windows-1257", "windows-1258", "windows-874"};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string GetCanonicalEncodingNameByAliasName(const std::string& alias_name) {
|
||||
for (auto* encoding_name : kCanonicalEncodingNames) {
|
||||
if (alias_name == encoding_name)
|
||||
return alias_name;
|
||||
}
|
||||
static const char* kStandards[3] = {"HTML", "MIME", "IANA"};
|
||||
for (auto* standard : kStandards) {
|
||||
UErrorCode error_code = U_ZERO_ERROR;
|
||||
const char* canonical_name =
|
||||
ucnv_getStandardName(alias_name.c_str(), standard, &error_code);
|
||||
if (U_SUCCESS(error_code) && canonical_name)
|
||||
return canonical_name;
|
||||
}
|
||||
return std::string();
|
||||
}
|
||||
} // namespace base
|
||||
20
TMessagesProj/jni/voip/webrtc/base/i18n/character_encoding.h
Normal file
20
TMessagesProj/jni/voip/webrtc/base/i18n/character_encoding.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_CHARACTER_ENCODING_H_
|
||||
#define BASE_I18N_CHARACTER_ENCODING_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Return canonical encoding name according to the encoding alias name.
|
||||
BASE_I18N_EXPORT std::string GetCanonicalEncodingNameByAliasName(
|
||||
const std::string& alias_name);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_CHARACTER_ENCODING_H_
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/encoding_detection.h"
|
||||
|
||||
#include "build/build_config.h"
|
||||
#include "third_party/ced/src/compact_enc_det/compact_enc_det.h"
|
||||
|
||||
// third_party/ced/src/util/encodings/encodings.h, which is included
|
||||
// by the include above, undefs UNICODE because that is a macro used
|
||||
// internally in ced. If we later in the same translation unit do
|
||||
// anything related to Windows or Windows headers those will then use
|
||||
// the ASCII versions which we do not want. To avoid that happening in
|
||||
// jumbo builds, we redefine UNICODE again here.
|
||||
#if defined(OS_WIN)
|
||||
#define UNICODE 1
|
||||
#endif // OS_WIN
|
||||
|
||||
namespace base {
|
||||
|
||||
bool DetectEncoding(const std::string& text, std::string* encoding) {
|
||||
int consumed_bytes;
|
||||
bool is_reliable;
|
||||
Encoding enc = CompactEncDet::DetectEncoding(
|
||||
text.c_str(), text.length(), nullptr, nullptr, nullptr,
|
||||
UNKNOWN_ENCODING,
|
||||
UNKNOWN_LANGUAGE,
|
||||
CompactEncDet::QUERY_CORPUS, // plain text
|
||||
false, // Include 7-bit encodings
|
||||
&consumed_bytes,
|
||||
&is_reliable);
|
||||
|
||||
if (enc == UNKNOWN_ENCODING)
|
||||
return false;
|
||||
|
||||
*encoding = MimeEncodingName(enc);
|
||||
return true;
|
||||
}
|
||||
} // namespace base
|
||||
21
TMessagesProj/jni/voip/webrtc/base/i18n/encoding_detection.h
Normal file
21
TMessagesProj/jni/voip/webrtc/base/i18n/encoding_detection.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_ENCODING_DETECTION_H_
|
||||
#define BASE_I18N_ENCODING_DETECTION_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Detect encoding of |text| and put the name of encoding in |encoding|.
|
||||
// Returns true on success.
|
||||
BASE_I18N_EXPORT bool DetectEncoding(const std::string& text,
|
||||
std::string* encoding) WARN_UNUSED_RESULT;
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_ENCODING_DETECTION_H_
|
||||
179
TMessagesProj/jni/voip/webrtc/base/i18n/file_util_icu.cc
Normal file
179
TMessagesProj/jni/voip/webrtc/base/i18n/file_util_icu.cc
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// File utilities that use the ICU library go in this file.
|
||||
|
||||
#include "base/i18n/file_util_icu.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "base/files/file_path.h"
|
||||
#include "base/i18n/icu_string_conversions.h"
|
||||
#include "base/i18n/string_compare.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/memory/singleton.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/sys_string_conversions.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "build/build_config.h"
|
||||
#include "third_party/icu/source/common/unicode/uniset.h"
|
||||
#include "third_party/icu/source/i18n/unicode/coll.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
namespace {
|
||||
|
||||
class IllegalCharacters {
|
||||
public:
|
||||
static IllegalCharacters* GetInstance() {
|
||||
return Singleton<IllegalCharacters>::get();
|
||||
}
|
||||
|
||||
bool DisallowedEverywhere(UChar32 ucs4) {
|
||||
return !!illegal_anywhere_->contains(ucs4);
|
||||
}
|
||||
|
||||
bool DisallowedLeadingOrTrailing(UChar32 ucs4) {
|
||||
return !!illegal_at_ends_->contains(ucs4);
|
||||
}
|
||||
|
||||
bool IsAllowedName(const string16& s) {
|
||||
return s.empty() || (!!illegal_anywhere_->containsNone(
|
||||
icu::UnicodeString(s.c_str(), s.size())) &&
|
||||
!illegal_at_ends_->contains(*s.begin()) &&
|
||||
!illegal_at_ends_->contains(*s.rbegin()));
|
||||
}
|
||||
|
||||
private:
|
||||
friend class Singleton<IllegalCharacters>;
|
||||
friend struct DefaultSingletonTraits<IllegalCharacters>;
|
||||
|
||||
IllegalCharacters();
|
||||
~IllegalCharacters() = default;
|
||||
|
||||
// set of characters considered invalid anywhere inside a filename.
|
||||
std::unique_ptr<icu::UnicodeSet> illegal_anywhere_;
|
||||
|
||||
// set of characters considered invalid at either end of a filename.
|
||||
std::unique_ptr<icu::UnicodeSet> illegal_at_ends_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);
|
||||
};
|
||||
|
||||
IllegalCharacters::IllegalCharacters() {
|
||||
UErrorCode everywhere_status = U_ZERO_ERROR;
|
||||
UErrorCode ends_status = U_ZERO_ERROR;
|
||||
// Control characters, formatting characters, non-characters, path separators,
|
||||
// and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
|
||||
// See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
|
||||
// and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
|
||||
// Note that code points in the "Other, Format" (Cf) category are ignored on
|
||||
// HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being
|
||||
// legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is
|
||||
// also excluded due to the possibility of interacting poorly with short
|
||||
// filenames on VFAT. (Related to CVE-2014-9390)
|
||||
illegal_anywhere_.reset(new icu::UnicodeSet(
|
||||
UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"),
|
||||
everywhere_status));
|
||||
illegal_at_ends_.reset(new icu::UnicodeSet(
|
||||
UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status));
|
||||
DCHECK(U_SUCCESS(everywhere_status));
|
||||
DCHECK(U_SUCCESS(ends_status));
|
||||
|
||||
// Add non-characters. If this becomes a performance bottleneck by
|
||||
// any chance, do not add these to |set| and change IsFilenameLegal()
|
||||
// to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling
|
||||
// IsAllowedName().
|
||||
illegal_anywhere_->add(0xFDD0, 0xFDEF);
|
||||
for (int i = 0; i <= 0x10; ++i) {
|
||||
int plane_base = 0x10000 * i;
|
||||
illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF);
|
||||
}
|
||||
illegal_anywhere_->freeze();
|
||||
illegal_at_ends_->freeze();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool IsFilenameLegal(const string16& file_name) {
|
||||
return IllegalCharacters::GetInstance()->IsAllowedName(file_name);
|
||||
}
|
||||
|
||||
void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
|
||||
char replace_char) {
|
||||
IllegalCharacters* illegal = IllegalCharacters::GetInstance();
|
||||
|
||||
DCHECK(!(illegal->DisallowedEverywhere(replace_char)));
|
||||
DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char)));
|
||||
|
||||
int cursor = 0; // The ICU macros expect an int.
|
||||
while (cursor < static_cast<int>(file_name->size())) {
|
||||
int char_begin = cursor;
|
||||
uint32_t code_point;
|
||||
#if defined(OS_WIN)
|
||||
// Windows uses UTF-16 encoding for filenames.
|
||||
U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
|
||||
code_point);
|
||||
#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
|
||||
// Mac and Chrome OS use UTF-8 encoding for filenames.
|
||||
// Linux doesn't actually define file system encoding. Try to parse as
|
||||
// UTF-8.
|
||||
U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
|
||||
code_point);
|
||||
#else
|
||||
#error Unsupported platform
|
||||
#endif
|
||||
|
||||
if (illegal->DisallowedEverywhere(code_point) ||
|
||||
((char_begin == 0 || cursor == static_cast<int>(file_name->length())) &&
|
||||
illegal->DisallowedLeadingOrTrailing(code_point))) {
|
||||
file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
|
||||
// We just made the potentially multi-byte/word char into one that only
|
||||
// takes one byte/word, so need to adjust the cursor to point to the next
|
||||
// character again.
|
||||
cursor = char_begin + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
|
||||
UErrorCode error_code = U_ZERO_ERROR;
|
||||
// Use the default collator. The default locale should have been properly
|
||||
// set by the time this constructor is called.
|
||||
std::unique_ptr<icu::Collator> collator(
|
||||
icu::Collator::createInstance(error_code));
|
||||
DCHECK(U_SUCCESS(error_code));
|
||||
// Make it case-sensitive.
|
||||
collator->setStrength(icu::Collator::TERTIARY);
|
||||
|
||||
#if defined(OS_WIN)
|
||||
return CompareString16WithCollator(*collator, AsStringPiece16(a.value()),
|
||||
AsStringPiece16(b.value())) == UCOL_LESS;
|
||||
|
||||
#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
|
||||
// On linux, the file system encoding is not defined. We assume
|
||||
// SysNativeMBToWide takes care of it.
|
||||
return CompareString16WithCollator(
|
||||
*collator, WideToUTF16(SysNativeMBToWide(a.value())),
|
||||
WideToUTF16(SysNativeMBToWide(b.value()))) == UCOL_LESS;
|
||||
#endif
|
||||
}
|
||||
|
||||
void NormalizeFileNameEncoding(FilePath* file_name) {
|
||||
#if defined(OS_CHROMEOS)
|
||||
std::string normalized_str;
|
||||
if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), kCodepageUTF8,
|
||||
&normalized_str) &&
|
||||
!normalized_str.empty()) {
|
||||
*file_name = file_name->DirName().Append(FilePath(normalized_str));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
58
TMessagesProj/jni/voip/webrtc/base/i18n/file_util_icu.h
Normal file
58
TMessagesProj/jni/voip/webrtc/base/i18n/file_util_icu.h
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_FILE_UTIL_ICU_H_
|
||||
#define BASE_I18N_FILE_UTIL_ICU_H_
|
||||
|
||||
// File utilities that use the ICU library go in this file.
|
||||
|
||||
#include "base/files/file_path.h"
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// Returns true if file_name does not have any illegal character. The input
|
||||
// param has the same restriction as that for ReplaceIllegalCharacters.
|
||||
BASE_I18N_EXPORT bool IsFilenameLegal(const string16& file_name);
|
||||
|
||||
// Replaces characters in |file_name| that are illegal for file names with
|
||||
// |replace_char|. |file_name| must not be a full or relative path, but just the
|
||||
// file name component (since slashes are considered illegal). Any leading or
|
||||
// trailing whitespace or periods in |file_name| is also replaced with the
|
||||
// |replace_char|.
|
||||
//
|
||||
// Example:
|
||||
// "bad:file*name?.txt" will be turned into "bad_file_name_.txt" when
|
||||
// |replace_char| is '_'.
|
||||
//
|
||||
// Warning: Do not use this function as the sole means of sanitizing a filename.
|
||||
// While the resulting filename itself would be legal, it doesn't necessarily
|
||||
// mean that the file will behave safely. On Windows, certain reserved names
|
||||
// refer to devices rather than files (E.g. LPT1), and some filenames could be
|
||||
// interpreted as shell namespace extensions (E.g. Foo.{<GUID>}).
|
||||
//
|
||||
// On Windows, Chrome OS and Mac, the file system encoding is already known and
|
||||
// parsed as UTF-8 and UTF-16 accordingly.
|
||||
// On Linux, the file name will be parsed as UTF8.
|
||||
// TODO(asanka): Move full filename sanitization logic here.
|
||||
BASE_I18N_EXPORT void ReplaceIllegalCharactersInPath(
|
||||
FilePath::StringType* file_name,
|
||||
char replace_char);
|
||||
|
||||
// Compares two filenames using the current locale information. This can be
|
||||
// used to sort directory listings. It behaves like "operator<" for use in
|
||||
// std::sort.
|
||||
BASE_I18N_EXPORT bool LocaleAwareCompareFilenames(const FilePath& a,
|
||||
const FilePath& b);
|
||||
|
||||
// Calculates the canonical file-system representation of |file_name| base name.
|
||||
// Modifies |file_name| in place. No-op if not on ChromeOS.
|
||||
BASE_I18N_EXPORT void NormalizeFileNameEncoding(FilePath* file_name);
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_FILE_UTIL_ICU_H_
|
||||
13
TMessagesProj/jni/voip/webrtc/base/i18n/i18n_constants.cc
Normal file
13
TMessagesProj/jni/voip/webrtc/base/i18n/i18n_constants.cc
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/i18n_constants.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
const char kCodepageLatin1[] = "ISO-8859-1";
|
||||
const char kCodepageUTF8[] = "UTF-8";
|
||||
|
||||
} // namespace base
|
||||
|
||||
21
TMessagesProj/jni/voip/webrtc/base/i18n/i18n_constants.h
Normal file
21
TMessagesProj/jni/voip/webrtc/base/i18n/i18n_constants.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_I18N_CONSTANTS_H_
|
||||
#define BASE_I18N_I18N_CONSTANTS_H_
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Names of codepages (charsets) understood by icu.
|
||||
BASE_I18N_EXPORT extern const char kCodepageLatin1[]; // a.k.a. ISO 8859-1
|
||||
BASE_I18N_EXPORT extern const char kCodepageUTF8[];
|
||||
|
||||
// The other possible options are UTF-16BE and UTF-16LE, but they are unused in
|
||||
// Chromium as of this writing.
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_I18N_CONSTANTS_H_
|
||||
|
|
@ -0,0 +1,224 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/icu_string_conversions.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "third_party/icu/source/common/unicode/normalizer2.h"
|
||||
#include "third_party/icu/source/common/unicode/ucnv.h"
|
||||
#include "third_party/icu/source/common/unicode/ucnv_cb.h"
|
||||
#include "third_party/icu/source/common/unicode/ucnv_err.h"
|
||||
#include "third_party/icu/source/common/unicode/ustring.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
// ToUnicodeCallbackSubstitute() is based on UCNV_TO_U_CALLBACK_SUBSTITUTE
|
||||
// in source/common/ucnv_err.c.
|
||||
|
||||
// Copyright (c) 1995-2006 International Business Machines Corporation
|
||||
// and others
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||||
// copy of this software and associated documentation files (the "Software"),
|
||||
// to deal in the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, provided that the above copyright notice(s) and
|
||||
// this permission notice appear in all copies of the Software and that
|
||||
// both the above copyright notice(s) and this permission notice appear in
|
||||
// supporting documentation.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
// INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
// OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
// OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
// OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// Except as contained in this notice, the name of a copyright holder
|
||||
// shall not be used in advertising or otherwise to promote the sale, use
|
||||
// or other dealings in this Software without prior written authorization
|
||||
// of the copyright holder.
|
||||
|
||||
// ___________________________________________________________________________
|
||||
//
|
||||
// All trademarks and registered trademarks mentioned herein are the property
|
||||
// of their respective owners.
|
||||
|
||||
void ToUnicodeCallbackSubstitute(const void* context,
|
||||
UConverterToUnicodeArgs *to_args,
|
||||
const char* code_units,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err) {
|
||||
static const UChar kReplacementChar = 0xFFFD;
|
||||
if (reason <= UCNV_IRREGULAR) {
|
||||
if (context == nullptr ||
|
||||
(*(reinterpret_cast<const char*>(context)) == 'i' &&
|
||||
reason == UCNV_UNASSIGNED)) {
|
||||
*err = U_ZERO_ERROR;
|
||||
ucnv_cbToUWriteUChars(to_args, &kReplacementChar, 1, 0, err);
|
||||
}
|
||||
// else the caller must have set the error code accordingly.
|
||||
}
|
||||
// else ignore the reset, close and clone calls.
|
||||
}
|
||||
|
||||
bool ConvertFromUTF16(UConverter* converter,
|
||||
base::StringPiece16 src,
|
||||
OnStringConversionError::Type on_error,
|
||||
std::string* encoded) {
|
||||
int encoded_max_length = UCNV_GET_MAX_BYTES_FOR_STRING(
|
||||
src.length(), ucnv_getMaxCharSize(converter));
|
||||
encoded->resize(encoded_max_length);
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
// Setup our error handler.
|
||||
switch (on_error) {
|
||||
case OnStringConversionError::FAIL:
|
||||
ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, nullptr,
|
||||
nullptr, nullptr, &status);
|
||||
break;
|
||||
case OnStringConversionError::SKIP:
|
||||
case OnStringConversionError::SUBSTITUTE:
|
||||
ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, nullptr,
|
||||
nullptr, nullptr, &status);
|
||||
break;
|
||||
default:
|
||||
NOTREACHED();
|
||||
}
|
||||
|
||||
// ucnv_fromUChars returns size not including terminating null
|
||||
int actual_size =
|
||||
ucnv_fromUChars(converter, &(*encoded)[0], encoded_max_length, src.data(),
|
||||
src.length(), &status);
|
||||
encoded->resize(actual_size);
|
||||
ucnv_close(converter);
|
||||
if (U_SUCCESS(status))
|
||||
return true;
|
||||
encoded->clear(); // Make sure the output is empty on error.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set up our error handler for ToUTF-16 converters
|
||||
void SetUpErrorHandlerForToUChars(OnStringConversionError::Type on_error,
|
||||
UConverter* converter, UErrorCode* status) {
|
||||
switch (on_error) {
|
||||
case OnStringConversionError::FAIL:
|
||||
ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr,
|
||||
nullptr, status);
|
||||
break;
|
||||
case OnStringConversionError::SKIP:
|
||||
ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_SKIP, nullptr, nullptr,
|
||||
nullptr, status);
|
||||
break;
|
||||
case OnStringConversionError::SUBSTITUTE:
|
||||
ucnv_setToUCallBack(converter, ToUnicodeCallbackSubstitute, nullptr,
|
||||
nullptr, nullptr, status);
|
||||
break;
|
||||
default:
|
||||
NOTREACHED();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Codepage <-> Wide/UTF-16 ---------------------------------------------------
|
||||
|
||||
bool UTF16ToCodepage(base::StringPiece16 utf16,
|
||||
const char* codepage_name,
|
||||
OnStringConversionError::Type on_error,
|
||||
std::string* encoded) {
|
||||
encoded->clear();
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UConverter* converter = ucnv_open(codepage_name, &status);
|
||||
if (!U_SUCCESS(status))
|
||||
return false;
|
||||
|
||||
return ConvertFromUTF16(converter, utf16, on_error, encoded);
|
||||
}
|
||||
|
||||
bool CodepageToUTF16(base::StringPiece encoded,
|
||||
const char* codepage_name,
|
||||
OnStringConversionError::Type on_error,
|
||||
string16* utf16) {
|
||||
utf16->clear();
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UConverter* converter = ucnv_open(codepage_name, &status);
|
||||
if (!U_SUCCESS(status))
|
||||
return false;
|
||||
|
||||
// Even in the worst case, the maximum length in 2-byte units of UTF-16
|
||||
// output would be at most the same as the number of bytes in input. There
|
||||
// is no single-byte encoding in which a character is mapped to a
|
||||
// non-BMP character requiring two 2-byte units.
|
||||
//
|
||||
// Moreover, non-BMP characters in legacy multibyte encodings
|
||||
// (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
|
||||
// BOCU and SCSU, but we don't care about them.
|
||||
size_t uchar_max_length = encoded.length() + 1;
|
||||
|
||||
SetUpErrorHandlerForToUChars(on_error, converter, &status);
|
||||
std::unique_ptr<char16[]> buffer(new char16[uchar_max_length]);
|
||||
int actual_size = ucnv_toUChars(converter, buffer.get(),
|
||||
static_cast<int>(uchar_max_length), encoded.data(),
|
||||
static_cast<int>(encoded.length()), &status);
|
||||
ucnv_close(converter);
|
||||
if (!U_SUCCESS(status)) {
|
||||
utf16->clear(); // Make sure the output is empty on error.
|
||||
return false;
|
||||
}
|
||||
|
||||
utf16->assign(buffer.get(), actual_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConvertToUtf8AndNormalize(base::StringPiece text,
|
||||
const std::string& charset,
|
||||
std::string* result) {
|
||||
result->clear();
|
||||
string16 utf16;
|
||||
if (!CodepageToUTF16(text, charset.c_str(), OnStringConversionError::FAIL,
|
||||
&utf16))
|
||||
return false;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const icu::Normalizer2* normalizer = icu::Normalizer2::getNFCInstance(status);
|
||||
DCHECK(U_SUCCESS(status));
|
||||
if (U_FAILURE(status))
|
||||
return false;
|
||||
int32_t utf16_length = static_cast<int32_t>(utf16.length());
|
||||
icu::UnicodeString normalized(utf16.data(), utf16_length);
|
||||
int32_t normalized_prefix_length =
|
||||
normalizer->spanQuickCheckYes(normalized, status);
|
||||
if (normalized_prefix_length < utf16_length) {
|
||||
icu::UnicodeString un_normalized(normalized, normalized_prefix_length);
|
||||
normalized.truncate(normalized_prefix_length);
|
||||
normalizer->normalizeSecondAndAppend(normalized, un_normalized, status);
|
||||
}
|
||||
if (U_FAILURE(status))
|
||||
return false;
|
||||
normalized.toUTF8String(*result);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_ICU_STRING_CONVERSIONS_H_
|
||||
#define BASE_I18N_ICU_STRING_CONVERSIONS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/i18n/i18n_constants.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Defines the error handling modes of UTF16ToCodepage and CodepageToUTF16.
|
||||
class OnStringConversionError {
|
||||
public:
|
||||
enum Type {
|
||||
// The function will return failure. The output buffer will be empty.
|
||||
FAIL,
|
||||
|
||||
// The offending characters are skipped and the conversion will proceed as
|
||||
// if they did not exist.
|
||||
SKIP,
|
||||
|
||||
// When converting to Unicode, the offending byte sequences are substituted
|
||||
// by Unicode replacement character (U+FFFD). When converting from Unicode,
|
||||
// this is the same as SKIP.
|
||||
SUBSTITUTE,
|
||||
};
|
||||
|
||||
private:
|
||||
OnStringConversionError() = delete;
|
||||
};
|
||||
|
||||
// Converts between UTF-16 strings and the encoding specified. If the
|
||||
// encoding doesn't exist or the encoding fails (when on_error is FAIL),
|
||||
// returns false.
|
||||
BASE_I18N_EXPORT bool UTF16ToCodepage(base::StringPiece16 utf16,
|
||||
const char* codepage_name,
|
||||
OnStringConversionError::Type on_error,
|
||||
std::string* encoded);
|
||||
BASE_I18N_EXPORT bool CodepageToUTF16(base::StringPiece encoded,
|
||||
const char* codepage_name,
|
||||
OnStringConversionError::Type on_error,
|
||||
string16* utf16);
|
||||
|
||||
// Converts from any codepage to UTF-8 and ensures the resulting UTF-8 is
|
||||
// normalized.
|
||||
BASE_I18N_EXPORT bool ConvertToUtf8AndNormalize(base::StringPiece text,
|
||||
const std::string& charset,
|
||||
std::string* result);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_ICU_STRING_CONVERSIONS_H_
|
||||
612
TMessagesProj/jni/voip/webrtc/base/i18n/icu_util.cc
Normal file
612
TMessagesProj/jni/voip/webrtc/base/i18n/icu_util.cc
Normal file
|
|
@ -0,0 +1,612 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/icu_util.h"
|
||||
|
||||
#if defined(OS_WIN)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/debug/alias.h"
|
||||
#include "base/environment.h"
|
||||
#include "base/files/file_path.h"
|
||||
#include "base/files/file_util.h"
|
||||
#include "base/files/memory_mapped_file.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/metrics/histogram_functions.h"
|
||||
#include "base/metrics/metrics_hashes.h"
|
||||
#include "base/path_service.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/sys_string_conversions.h"
|
||||
#include "build/build_config.h"
|
||||
#include "build/chromecast_buildflags.h"
|
||||
#include "third_party/icu/source/common/unicode/putil.h"
|
||||
#include "third_party/icu/source/common/unicode/udata.h"
|
||||
#include "third_party/icu/source/common/unicode/utrace.h"
|
||||
|
||||
#if defined(OS_ANDROID)
|
||||
#include "base/android/apk_assets.h"
|
||||
#include "base/android/timezone_utils.h"
|
||||
#endif
|
||||
|
||||
#if defined(OS_IOS)
|
||||
#include "base/ios/ios_util.h"
|
||||
#endif
|
||||
|
||||
#if defined(OS_MACOSX)
|
||||
#include "base/mac/foundation_util.h"
|
||||
#endif
|
||||
|
||||
#if defined(OS_FUCHSIA)
|
||||
#include "base/fuchsia/intl_profile_watcher.h"
|
||||
#endif
|
||||
|
||||
#if defined(OS_ANDROID) || defined(OS_FUCHSIA)
|
||||
#include "third_party/icu/source/common/unicode/unistr.h"
|
||||
#endif
|
||||
|
||||
#if defined(OS_ANDROID) || defined(OS_FUCHSIA) || \
|
||||
(defined(OS_LINUX) && !BUILDFLAG(IS_CHROMECAST))
|
||||
#include "third_party/icu/source/i18n/unicode/timezone.h"
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
#if !defined(OS_NACL)
|
||||
namespace {
|
||||
|
||||
#if DCHECK_IS_ON()
|
||||
// Assert that we are not called more than once. Even though calling this
|
||||
// function isn't harmful (ICU can handle it), being called twice probably
|
||||
// indicates a programming error.
|
||||
bool g_check_called_once = true;
|
||||
bool g_called_once = false;
|
||||
#endif // DCHECK_IS_ON()
|
||||
|
||||
#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
|
||||
|
||||
// To debug http://crbug.com/445616.
|
||||
int g_debug_icu_last_error;
|
||||
int g_debug_icu_load;
|
||||
int g_debug_icu_pf_error_details;
|
||||
int g_debug_icu_pf_last_error;
|
||||
#if defined(OS_WIN)
|
||||
wchar_t g_debug_icu_pf_filename[_MAX_PATH];
|
||||
#endif // OS_WIN
|
||||
// Use an unversioned file name to simplify a icu version update down the road.
|
||||
// No need to change the filename in multiple places (gyp files, windows
|
||||
// build pkg configurations, etc). 'l' stands for Little Endian.
|
||||
// This variable is exported through the header file.
|
||||
const char kIcuDataFileName[] = "icudtl.dat";
|
||||
const char kIcuExtraDataFileName[] = "icudtl_extra.dat";
|
||||
|
||||
// Time zone data loading.
|
||||
// For now, only Fuchsia has a meaningful use case for this feature, so it is
|
||||
// only implemented for OS_FUCHSIA.
|
||||
#if defined(OS_FUCHSIA)
|
||||
// The environment variable used to point the ICU data loader to the directory
|
||||
// containing time zone data. This is available from ICU version 54. The env
|
||||
// variable approach is antiquated by today's standards (2019), but is the
|
||||
// recommended way to configure ICU.
|
||||
//
|
||||
// See for details: http://userguide.icu-project.org/datetime/timezone
|
||||
const char kIcuTimeZoneEnvVariable[] = "ICU_TIMEZONE_FILES_DIR";
|
||||
|
||||
// We assume that Fuchsia will provide time zone data at this path for Chromium
|
||||
// to load, and that the path will be timely updated when Fuchsia needs to
|
||||
// uprev the ICU version it is using. There are unit tests that will fail at
|
||||
// Fuchsia roll time in case either Chromium or Fuchsia get upgraded to
|
||||
// mutually incompatible ICU versions. That should be enough to alert the
|
||||
// developers of the need to keep ICU library versions in ICU and Fuchsia in
|
||||
// reasonable sync.
|
||||
const char kIcuTimeZoneDataDir[] = "/config/data/tzdata/icu/44/le";
|
||||
#endif // defined(OS_FUCHSIA)
|
||||
|
||||
#if defined(OS_ANDROID)
|
||||
const char kAssetsPathPrefix[] = "assets/";
|
||||
#endif // defined(OS_ANDROID)
|
||||
|
||||
// File handle intentionally never closed. Not using File here because its
|
||||
// Windows implementation guards against two instances owning the same
|
||||
// PlatformFile (which we allow since we know it is never freed).
|
||||
PlatformFile g_icudtl_pf = kInvalidPlatformFile;
|
||||
MemoryMappedFile* g_icudtl_mapped_file = nullptr;
|
||||
MemoryMappedFile::Region g_icudtl_region;
|
||||
PlatformFile g_icudtl_extra_pf = kInvalidPlatformFile;
|
||||
MemoryMappedFile* g_icudtl_extra_mapped_file = nullptr;
|
||||
MemoryMappedFile::Region g_icudtl_extra_region;
|
||||
|
||||
#if defined(OS_FUCHSIA)
|
||||
// The directory from which the ICU data loader will be configured to load time
|
||||
// zone data. It is only changed by SetIcuTimeZoneDataDirForTesting().
|
||||
const char* g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
|
||||
#endif // defined(OS_FUCHSIA)
|
||||
|
||||
struct PfRegion {
|
||||
public:
|
||||
PlatformFile pf;
|
||||
MemoryMappedFile::Region region;
|
||||
};
|
||||
|
||||
std::unique_ptr<PfRegion> OpenIcuDataFile(const std::string& filename) {
|
||||
auto result = std::make_unique<PfRegion>();
|
||||
#if defined(OS_ANDROID)
|
||||
result->pf =
|
||||
android::OpenApkAsset(kAssetsPathPrefix + filename, &result->region);
|
||||
if (result->pf != -1) {
|
||||
return result;
|
||||
}
|
||||
#endif // defined(OS_ANDROID)
|
||||
// For unit tests, data file is located on disk, so try there as a fallback.
|
||||
#if !defined(OS_MACOSX)
|
||||
FilePath data_path;
|
||||
if (!PathService::Get(DIR_ASSETS, &data_path)) {
|
||||
LOG(ERROR) << "Can't find " << filename;
|
||||
return nullptr;
|
||||
}
|
||||
#if defined(OS_WIN)
|
||||
// TODO(brucedawson): http://crbug.com/445616
|
||||
wchar_t tmp_buffer[_MAX_PATH] = {0};
|
||||
wcscpy_s(tmp_buffer, data_path.value().c_str());
|
||||
debug::Alias(tmp_buffer);
|
||||
#endif
|
||||
data_path = data_path.AppendASCII(filename);
|
||||
|
||||
#if defined(OS_WIN)
|
||||
// TODO(brucedawson): http://crbug.com/445616
|
||||
wchar_t tmp_buffer2[_MAX_PATH] = {0};
|
||||
wcscpy_s(tmp_buffer2, data_path.value().c_str());
|
||||
debug::Alias(tmp_buffer2);
|
||||
#endif
|
||||
|
||||
#else // !defined(OS_MACOSX)
|
||||
// Assume it is in the framework bundle's Resources directory.
|
||||
ScopedCFTypeRef<CFStringRef> data_file_name(SysUTF8ToCFStringRef(filename));
|
||||
FilePath data_path = mac::PathForFrameworkBundleResource(data_file_name);
|
||||
#if defined(OS_IOS)
|
||||
FilePath override_data_path = ios::FilePathOfEmbeddedICU();
|
||||
if (!override_data_path.empty()) {
|
||||
data_path = override_data_path;
|
||||
}
|
||||
#endif // !defined(OS_IOS)
|
||||
if (data_path.empty()) {
|
||||
LOG(ERROR) << filename << " not found in bundle";
|
||||
return nullptr;
|
||||
}
|
||||
#endif // !defined(OS_MACOSX)
|
||||
File file(data_path, File::FLAG_OPEN | File::FLAG_READ);
|
||||
if (file.IsValid()) {
|
||||
// TODO(brucedawson): http://crbug.com/445616.
|
||||
g_debug_icu_pf_last_error = 0;
|
||||
g_debug_icu_pf_error_details = 0;
|
||||
#if defined(OS_WIN)
|
||||
g_debug_icu_pf_filename[0] = 0;
|
||||
#endif // OS_WIN
|
||||
|
||||
result->pf = file.TakePlatformFile();
|
||||
result->region = MemoryMappedFile::Region::kWholeFile;
|
||||
}
|
||||
#if defined(OS_WIN)
|
||||
else {
|
||||
// TODO(brucedawson): http://crbug.com/445616.
|
||||
g_debug_icu_pf_last_error = ::GetLastError();
|
||||
g_debug_icu_pf_error_details = file.error_details();
|
||||
wcscpy_s(g_debug_icu_pf_filename, data_path.value().c_str());
|
||||
}
|
||||
#endif // OS_WIN
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void LazyOpenIcuDataFile() {
|
||||
if (g_icudtl_pf != kInvalidPlatformFile) {
|
||||
return;
|
||||
}
|
||||
auto pf_region = OpenIcuDataFile(kIcuDataFileName);
|
||||
if (!pf_region) {
|
||||
return;
|
||||
}
|
||||
g_icudtl_pf = pf_region->pf;
|
||||
g_icudtl_region = pf_region->region;
|
||||
}
|
||||
|
||||
// Configures ICU to load external time zone data, if appropriate.
|
||||
void InitializeExternalTimeZoneData() {
|
||||
#if defined(OS_FUCHSIA)
|
||||
if (!base::DirectoryExists(base::FilePath(g_icu_time_zone_data_dir))) {
|
||||
// TODO(https://crbug.com/1061262): Make this FATAL unless expected.
|
||||
PLOG(WARNING) << "Could not open: '" << g_icu_time_zone_data_dir
|
||||
<< "'. Using built-in timezone database";
|
||||
return;
|
||||
}
|
||||
|
||||
// Set the environment variable to override the location used by ICU.
|
||||
// Loading can still fail if the directory is empty or its data is invalid.
|
||||
std::unique_ptr<base::Environment> env = base::Environment::Create();
|
||||
env->SetVar(kIcuTimeZoneEnvVariable, g_icu_time_zone_data_dir);
|
||||
#endif // defined(OS_FUCHSIA)
|
||||
}
|
||||
|
||||
int LoadIcuData(PlatformFile data_fd,
|
||||
const MemoryMappedFile::Region& data_region,
|
||||
std::unique_ptr<MemoryMappedFile>* out_mapped_data_file,
|
||||
UErrorCode* out_error_code) {
|
||||
InitializeExternalTimeZoneData();
|
||||
|
||||
if (data_fd == kInvalidPlatformFile) {
|
||||
LOG(ERROR) << "Invalid file descriptor to ICU data received.";
|
||||
return 1; // To debug http://crbug.com/445616.
|
||||
}
|
||||
|
||||
out_mapped_data_file->reset(new MemoryMappedFile());
|
||||
if (!(*out_mapped_data_file)->Initialize(File(data_fd), data_region)) {
|
||||
LOG(ERROR) << "Couldn't mmap icu data file";
|
||||
return 2; // To debug http://crbug.com/445616.
|
||||
}
|
||||
|
||||
(*out_error_code) = U_ZERO_ERROR;
|
||||
udata_setCommonData(const_cast<uint8_t*>((*out_mapped_data_file)->data()),
|
||||
out_error_code);
|
||||
if (U_FAILURE(*out_error_code)) {
|
||||
LOG(ERROR) << "Failed to initialize ICU with data file: "
|
||||
<< u_errorName(*out_error_code);
|
||||
return 3; // To debug http://crbug.com/445616.
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool InitializeICUWithFileDescriptorInternal(
|
||||
PlatformFile data_fd,
|
||||
const MemoryMappedFile::Region& data_region) {
|
||||
// This can be called multiple times in tests.
|
||||
if (g_icudtl_mapped_file) {
|
||||
g_debug_icu_load = 0; // To debug http://crbug.com/445616.
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<MemoryMappedFile> mapped_file;
|
||||
UErrorCode err;
|
||||
g_debug_icu_load = LoadIcuData(data_fd, data_region, &mapped_file, &err);
|
||||
if (g_debug_icu_load == 1 || g_debug_icu_load == 2) {
|
||||
return false;
|
||||
}
|
||||
g_icudtl_mapped_file = mapped_file.release();
|
||||
|
||||
if (g_debug_icu_load == 3) {
|
||||
g_debug_icu_last_error = err;
|
||||
}
|
||||
|
||||
// Never try to load ICU data from files.
|
||||
udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
|
||||
return U_SUCCESS(err);
|
||||
}
|
||||
|
||||
bool InitializeICUFromDataFile() {
|
||||
// If the ICU data directory is set, ICU won't actually load the data until
|
||||
// it is needed. This can fail if the process is sandboxed at that time.
|
||||
// Instead, we map the file in and hand off the data so the sandbox won't
|
||||
// cause any problems.
|
||||
LazyOpenIcuDataFile();
|
||||
bool result =
|
||||
InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region);
|
||||
|
||||
#if defined(OS_WIN)
|
||||
int debug_icu_load = g_debug_icu_load;
|
||||
debug::Alias(&debug_icu_load);
|
||||
int debug_icu_last_error = g_debug_icu_last_error;
|
||||
debug::Alias(&debug_icu_last_error);
|
||||
int debug_icu_pf_last_error = g_debug_icu_pf_last_error;
|
||||
debug::Alias(&debug_icu_pf_last_error);
|
||||
int debug_icu_pf_error_details = g_debug_icu_pf_error_details;
|
||||
debug::Alias(&debug_icu_pf_error_details);
|
||||
wchar_t debug_icu_pf_filename[_MAX_PATH] = {0};
|
||||
wcscpy_s(debug_icu_pf_filename, g_debug_icu_pf_filename);
|
||||
debug::Alias(&debug_icu_pf_filename);
|
||||
CHECK(result); // TODO(brucedawson): http://crbug.com/445616
|
||||
#endif // defined(OS_WIN)
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
|
||||
|
||||
// Explicitly initialize ICU's time zone if necessary.
|
||||
// On some platforms, the time zone must be explicitly initialized zone rather
|
||||
// than relying on ICU's internal initialization.
|
||||
void InitializeIcuTimeZone() {
|
||||
#if defined(OS_ANDROID)
|
||||
// On Android, we can't leave it up to ICU to set the default time zone
|
||||
// because ICU's time zone detection does not work in many time zones (e.g.
|
||||
// Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host
|
||||
// time zone and set the ICU default time zone accordingly in advance of
|
||||
// actual use. See crbug.com/722821 and
|
||||
// https://ssl.icu-project.org/trac/ticket/13208 .
|
||||
string16 zone_id = android::GetDefaultTimeZoneId();
|
||||
icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone(
|
||||
icu::UnicodeString(FALSE, zone_id.data(), zone_id.length())));
|
||||
#elif defined(OS_FUCHSIA)
|
||||
// The platform-specific mechanisms used by ICU's detectHostTimeZone() to
|
||||
// determine the default time zone will not work on Fuchsia. Therefore,
|
||||
// proactively set the default system.
|
||||
// This is also required by TimeZoneMonitorFuchsia::ProfileMayHaveChanged(),
|
||||
// which uses the current default to detect whether the time zone changed in
|
||||
// the new profile.
|
||||
// If the system time zone cannot be obtained or is not understood by ICU,
|
||||
// the "unknown" time zone will be returned by createTimeZone() and used.
|
||||
std::string zone_id =
|
||||
fuchsia::IntlProfileWatcher::GetPrimaryTimeZoneIdForIcuInitialization();
|
||||
icu::TimeZone::adoptDefault(
|
||||
icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(zone_id)));
|
||||
#elif defined(OS_LINUX) && !BUILDFLAG(IS_CHROMECAST)
|
||||
// To respond to the time zone change properly, the default time zone
|
||||
// cache in ICU has to be populated on starting up.
|
||||
// See TimeZoneMonitorLinux::NotifyClientsFromImpl().
|
||||
std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault());
|
||||
#endif // defined(OS_ANDROID)
|
||||
}
|
||||
|
||||
const char kICUDataFile[] = "ICU.DataFile";
|
||||
const char kICUCreateInstance[] = "ICU.CreateInstance";
|
||||
|
||||
enum class ICUCreateInstance {
|
||||
kCharacterBreakIterator = 0,
|
||||
kWordBreakIterator = 1,
|
||||
kLineBreakIterator = 2,
|
||||
kLineBreakIteratorTypeLoose = 3,
|
||||
kLineBreakIteratorTypeNormal = 4,
|
||||
kLineBreakIteratorTypeStrict = 5,
|
||||
kSentenceBreakIterator = 6,
|
||||
kTitleBreakIterator = 7,
|
||||
kThaiBreakEngine = 8,
|
||||
kLaoBreakEngine = 9,
|
||||
kBurmeseBreakEngine = 10,
|
||||
kKhmerBreakEngine = 11,
|
||||
kChineseJapaneseBreakEngine = 12,
|
||||
|
||||
kMaxValue = kChineseJapaneseBreakEngine
|
||||
};
|
||||
|
||||
// Callback functions to report the opening of ICU Data File, and creation of
|
||||
// key objects to UMA. This help us to understand what built-in ICU data files
|
||||
// are rarely used in the user's machines and the distribution of ICU usage.
|
||||
static void U_CALLCONV TraceICUEntry(const void*, int32_t fn_number) {
|
||||
switch (fn_number) {
|
||||
case UTRACE_UBRK_CREATE_CHARACTER:
|
||||
base::UmaHistogramEnumeration(kICUCreateInstance,
|
||||
ICUCreateInstance::kCharacterBreakIterator);
|
||||
break;
|
||||
case UTRACE_UBRK_CREATE_SENTENCE:
|
||||
base::UmaHistogramEnumeration(kICUCreateInstance,
|
||||
ICUCreateInstance::kSentenceBreakIterator);
|
||||
break;
|
||||
case UTRACE_UBRK_CREATE_TITLE:
|
||||
base::UmaHistogramEnumeration(kICUCreateInstance,
|
||||
ICUCreateInstance::kTitleBreakIterator);
|
||||
break;
|
||||
case UTRACE_UBRK_CREATE_WORD:
|
||||
base::UmaHistogramEnumeration(kICUCreateInstance,
|
||||
ICUCreateInstance::kWordBreakIterator);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void U_CALLCONV TraceICUData(const void* context,
|
||||
int32_t fn_number,
|
||||
int32_t level,
|
||||
const char* fmt,
|
||||
va_list args) {
|
||||
switch (fn_number) {
|
||||
case UTRACE_UDATA_DATA_FILE: {
|
||||
std::string icu_data_file_name(va_arg(args, const char*));
|
||||
va_end(args);
|
||||
// Skip icu version specified prefix if exist.
|
||||
// path is prefixed with icu version prefix such as "icudt65l-".
|
||||
// Histogram only the part after the -.
|
||||
if (icu_data_file_name.find("icudt") == 0) {
|
||||
size_t dash = icu_data_file_name.find("-");
|
||||
if (dash != std::string::npos) {
|
||||
icu_data_file_name = icu_data_file_name.substr(dash + 1);
|
||||
}
|
||||
}
|
||||
// UmaHistogramSparse should track less than 100 values.
|
||||
// We currently have about total 55 built-in data files inside ICU
|
||||
// so it fit the UmaHistogramSparse usage.
|
||||
int hash = base::HashMetricName(icu_data_file_name);
|
||||
base::UmaHistogramSparse(kICUDataFile, hash);
|
||||
return;
|
||||
}
|
||||
case UTRACE_UBRK_CREATE_LINE: {
|
||||
const char* lb_type = va_arg(args, const char*);
|
||||
va_end(args);
|
||||
ICUCreateInstance value;
|
||||
switch (lb_type[0]) {
|
||||
case '\0':
|
||||
value = ICUCreateInstance::kLineBreakIterator;
|
||||
break;
|
||||
case 'l':
|
||||
DCHECK(strcmp(lb_type, "loose") == 0);
|
||||
value = ICUCreateInstance::kLineBreakIteratorTypeLoose;
|
||||
break;
|
||||
case 'n':
|
||||
DCHECK(strcmp(lb_type, "normal") == 0);
|
||||
value = ICUCreateInstance::kLineBreakIteratorTypeNormal;
|
||||
break;
|
||||
case 's':
|
||||
DCHECK(strcmp(lb_type, "strict") == 0);
|
||||
value = ICUCreateInstance::kLineBreakIteratorTypeStrict;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
base::UmaHistogramEnumeration(kICUCreateInstance, value);
|
||||
return;
|
||||
}
|
||||
case UTRACE_UBRK_CREATE_BREAK_ENGINE: {
|
||||
const char* script = va_arg(args, const char*);
|
||||
va_end(args);
|
||||
ICUCreateInstance value;
|
||||
switch (script[0]) {
|
||||
case 'H':
|
||||
DCHECK(strcmp(script, "Hani") == 0);
|
||||
value = ICUCreateInstance::kChineseJapaneseBreakEngine;
|
||||
break;
|
||||
case 'K':
|
||||
DCHECK(strcmp(script, "Khmr") == 0);
|
||||
value = ICUCreateInstance::kKhmerBreakEngine;
|
||||
break;
|
||||
case 'L':
|
||||
DCHECK(strcmp(script, "Laoo") == 0);
|
||||
value = ICUCreateInstance::kLaoBreakEngine;
|
||||
break;
|
||||
case 'M':
|
||||
DCHECK(strcmp(script, "Mymr") == 0);
|
||||
value = ICUCreateInstance::kBurmeseBreakEngine;
|
||||
break;
|
||||
case 'T':
|
||||
DCHECK(strcmp(script, "Thai") == 0);
|
||||
value = ICUCreateInstance::kThaiBreakEngine;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
base::UmaHistogramEnumeration(kICUCreateInstance, value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Common initialization to run regardless of how ICU is initialized.
|
||||
// There are multiple exposed InitializeIcu* functions. This should be called
|
||||
// as at the end of (the last functions in the sequence of) these functions.
|
||||
bool DoCommonInitialization() {
|
||||
// TODO(jungshik): Some callers do not care about tz at all. If necessary,
|
||||
// add a boolean argument to this function to init the default tz only
|
||||
// when requested.
|
||||
InitializeIcuTimeZone();
|
||||
|
||||
const void* context = nullptr;
|
||||
utrace_setFunctions(context, TraceICUEntry, nullptr, TraceICUData);
|
||||
utrace_setLevel(UTRACE_VERBOSE);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
|
||||
bool InitializeExtraICUWithFileDescriptor(
|
||||
PlatformFile data_fd,
|
||||
const MemoryMappedFile::Region& data_region) {
|
||||
if (g_icudtl_pf != kInvalidPlatformFile) {
|
||||
// Must call InitializeExtraICUWithFileDescriptor() before
|
||||
// InitializeICUWithFileDescriptor().
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<MemoryMappedFile> mapped_file;
|
||||
UErrorCode err;
|
||||
if (LoadIcuData(data_fd, data_region, &mapped_file, &err) != 0) {
|
||||
return false;
|
||||
}
|
||||
g_icudtl_extra_mapped_file = mapped_file.release();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InitializeICUWithFileDescriptor(
|
||||
PlatformFile data_fd,
|
||||
const MemoryMappedFile::Region& data_region) {
|
||||
#if DCHECK_IS_ON()
|
||||
DCHECK(!g_check_called_once || !g_called_once);
|
||||
g_called_once = true;
|
||||
#endif
|
||||
if (!InitializeICUWithFileDescriptorInternal(data_fd, data_region))
|
||||
return false;
|
||||
|
||||
return DoCommonInitialization();
|
||||
}
|
||||
|
||||
PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) {
|
||||
CHECK_NE(g_icudtl_pf, kInvalidPlatformFile);
|
||||
*out_region = g_icudtl_region;
|
||||
return g_icudtl_pf;
|
||||
}
|
||||
|
||||
PlatformFile GetIcuExtraDataFileHandle(MemoryMappedFile::Region* out_region) {
|
||||
if (g_icudtl_extra_pf == kInvalidPlatformFile) {
|
||||
return kInvalidPlatformFile;
|
||||
}
|
||||
*out_region = g_icudtl_extra_region;
|
||||
return g_icudtl_extra_pf;
|
||||
}
|
||||
|
||||
bool InitializeExtraICU() {
|
||||
if (g_icudtl_pf != kInvalidPlatformFile) {
|
||||
// Must call InitializeExtraICU() before InitializeICU().
|
||||
return false;
|
||||
}
|
||||
auto pf_region = OpenIcuDataFile(kIcuExtraDataFileName);
|
||||
if (!pf_region) {
|
||||
return false;
|
||||
}
|
||||
g_icudtl_extra_pf = pf_region->pf;
|
||||
g_icudtl_extra_region = pf_region->region;
|
||||
std::unique_ptr<MemoryMappedFile> mapped_file;
|
||||
UErrorCode err;
|
||||
if (LoadIcuData(g_icudtl_extra_pf, g_icudtl_extra_region, &mapped_file,
|
||||
&err) != 0) {
|
||||
return false;
|
||||
}
|
||||
g_icudtl_extra_mapped_file = mapped_file.release();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ResetGlobalsForTesting() {
|
||||
g_icudtl_pf = kInvalidPlatformFile;
|
||||
g_icudtl_mapped_file = nullptr;
|
||||
g_icudtl_extra_pf = kInvalidPlatformFile;
|
||||
g_icudtl_extra_mapped_file = nullptr;
|
||||
#if defined(OS_FUCHSIA)
|
||||
g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
|
||||
#endif // defined(OS_FUCHSIA)
|
||||
}
|
||||
|
||||
#if defined(OS_FUCHSIA)
|
||||
// |dir| must remain valid until ResetGlobalsForTesting() is called.
|
||||
void SetIcuTimeZoneDataDirForTesting(const char* dir) {
|
||||
g_icu_time_zone_data_dir = dir;
|
||||
}
|
||||
#endif // defined(OS_FUCHSIA)
|
||||
#endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
|
||||
|
||||
bool InitializeICU() {
|
||||
#if DCHECK_IS_ON()
|
||||
DCHECK(!g_check_called_once || !g_called_once);
|
||||
g_called_once = true;
|
||||
#endif
|
||||
|
||||
#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
|
||||
// The ICU data is statically linked.
|
||||
#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
|
||||
if (!InitializeICUFromDataFile())
|
||||
return false;
|
||||
#else
|
||||
#error Unsupported ICU_UTIL_DATA_IMPL value
|
||||
#endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
|
||||
|
||||
return DoCommonInitialization();
|
||||
}
|
||||
|
||||
void AllowMultipleInitializeCallsForTesting() {
|
||||
#if DCHECK_IS_ON()
|
||||
g_check_called_once = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // !defined(OS_NACL)
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
66
TMessagesProj/jni/voip/webrtc/base/i18n/icu_util.h
Normal file
66
TMessagesProj/jni/voip/webrtc/base/i18n/icu_util.h
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_ICU_UTIL_H_
|
||||
#define BASE_I18N_ICU_UTIL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/files/memory_mapped_file.h"
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#define ICU_UTIL_DATA_FILE 0
|
||||
#define ICU_UTIL_DATA_STATIC 1
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
#if !defined(OS_NACL)
|
||||
// Call this function to load ICU's data tables for the current process. This
|
||||
// function should be called before ICU is used.
|
||||
BASE_I18N_EXPORT bool InitializeICU();
|
||||
|
||||
#if ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE
|
||||
// Loads ICU's extra data tables from disk for the current process. If used must
|
||||
// be called before InitializeICU().
|
||||
BASE_I18N_EXPORT bool InitializeExtraICU();
|
||||
|
||||
// Returns the PlatformFile and Region that was initialized by InitializeICU()
|
||||
// or InitializeExtraICU(). Use with InitializeICUWithFileDescriptor() or
|
||||
// InitializeExtraICUWithFileDescriptor().
|
||||
BASE_I18N_EXPORT PlatformFile GetIcuDataFileHandle(
|
||||
MemoryMappedFile::Region* out_region);
|
||||
BASE_I18N_EXPORT PlatformFile
|
||||
GetIcuExtraDataFileHandle(MemoryMappedFile::Region* out_region);
|
||||
|
||||
// Loads ICU data file from file descriptor passed by browser process to
|
||||
// initialize ICU in render processes.
|
||||
BASE_I18N_EXPORT bool InitializeICUWithFileDescriptor(
|
||||
PlatformFile data_fd,
|
||||
const MemoryMappedFile::Region& data_region);
|
||||
|
||||
// Loads ICU extra data file from file descriptor passed by browser process to
|
||||
// initialize ICU in render processes. If used must be called before
|
||||
// InitializeICUWithFileDescriptor().
|
||||
BASE_I18N_EXPORT bool InitializeExtraICUWithFileDescriptor(
|
||||
PlatformFile data_fd,
|
||||
const MemoryMappedFile::Region& data_region);
|
||||
|
||||
BASE_I18N_EXPORT void ResetGlobalsForTesting();
|
||||
|
||||
#if defined(OS_FUCHSIA)
|
||||
// Overrides the directory used by ICU for external time zone data.
|
||||
BASE_I18N_EXPORT void SetIcuTimeZoneDataDirForTesting(const char* dir);
|
||||
#endif // defined(OS_FUCHSIA)
|
||||
#endif // ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE
|
||||
|
||||
// In a test binary, initialize functions might be called twice.
|
||||
BASE_I18N_EXPORT void AllowMultipleInitializeCallsForTesting();
|
||||
#endif // !defined(OS_NACL)
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_ICU_UTIL_H_
|
||||
142
TMessagesProj/jni/voip/webrtc/base/i18n/message_formatter.cc
Normal file
142
TMessagesProj/jni/voip/webrtc/base/i18n/message_formatter.cc
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/message_formatter.h"
|
||||
|
||||
#include "base/i18n/unicodestring.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/numerics/safe_conversions.h"
|
||||
#include "base/time/time.h"
|
||||
#include "third_party/icu/source/common/unicode/unistr.h"
|
||||
#include "third_party/icu/source/common/unicode/utypes.h"
|
||||
#include "third_party/icu/source/i18n/unicode/fmtable.h"
|
||||
#include "third_party/icu/source/i18n/unicode/msgfmt.h"
|
||||
|
||||
using icu::UnicodeString;
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
namespace {
|
||||
UnicodeString UnicodeStringFromStringPiece(StringPiece str) {
|
||||
return UnicodeString::fromUTF8(
|
||||
icu::StringPiece(str.data(), base::checked_cast<int32_t>(str.size())));
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
namespace internal {
|
||||
MessageArg::MessageArg() : formattable(nullptr) {}
|
||||
|
||||
MessageArg::MessageArg(const char* s)
|
||||
: formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {}
|
||||
|
||||
MessageArg::MessageArg(StringPiece s)
|
||||
: formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {}
|
||||
|
||||
MessageArg::MessageArg(const std::string& s)
|
||||
: formattable(new icu::Formattable(UnicodeString::fromUTF8(s))) {}
|
||||
|
||||
MessageArg::MessageArg(const string16& s)
|
||||
: formattable(new icu::Formattable(UnicodeString(s.data(), s.size()))) {}
|
||||
|
||||
MessageArg::MessageArg(int i) : formattable(new icu::Formattable(i)) {}
|
||||
|
||||
MessageArg::MessageArg(int64_t i) : formattable(new icu::Formattable(i)) {}
|
||||
|
||||
MessageArg::MessageArg(double d) : formattable(new icu::Formattable(d)) {}
|
||||
|
||||
MessageArg::MessageArg(const Time& t)
|
||||
: formattable(new icu::Formattable(static_cast<UDate>(t.ToJsTime()))) {}
|
||||
|
||||
MessageArg::~MessageArg() = default;
|
||||
|
||||
// Tests if this argument has a value, and if so increments *count.
|
||||
bool MessageArg::has_value(int *count) const {
|
||||
if (formattable == nullptr)
|
||||
return false;
|
||||
|
||||
++*count;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
string16 MessageFormatter::FormatWithNumberedArgs(
|
||||
StringPiece16 msg,
|
||||
const internal::MessageArg& arg0,
|
||||
const internal::MessageArg& arg1,
|
||||
const internal::MessageArg& arg2,
|
||||
const internal::MessageArg& arg3,
|
||||
const internal::MessageArg& arg4,
|
||||
const internal::MessageArg& arg5,
|
||||
const internal::MessageArg& arg6) {
|
||||
int32_t args_count = 0;
|
||||
icu::Formattable args[] = {
|
||||
arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(),
|
||||
arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(),
|
||||
arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(),
|
||||
arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(),
|
||||
arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(),
|
||||
arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(),
|
||||
arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(),
|
||||
};
|
||||
|
||||
UnicodeString msg_string(msg.data(), msg.size());
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
icu::MessageFormat format(msg_string, error);
|
||||
icu::UnicodeString formatted;
|
||||
icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE);
|
||||
format.format(args, args_count, formatted, ignore, error);
|
||||
if (U_FAILURE(error)) {
|
||||
LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with "
|
||||
<< u_errorName(error);
|
||||
return string16();
|
||||
}
|
||||
return i18n::UnicodeStringToString16(formatted);
|
||||
}
|
||||
|
||||
string16 MessageFormatter::FormatWithNamedArgs(
|
||||
StringPiece16 msg,
|
||||
StringPiece name0, const internal::MessageArg& arg0,
|
||||
StringPiece name1, const internal::MessageArg& arg1,
|
||||
StringPiece name2, const internal::MessageArg& arg2,
|
||||
StringPiece name3, const internal::MessageArg& arg3,
|
||||
StringPiece name4, const internal::MessageArg& arg4,
|
||||
StringPiece name5, const internal::MessageArg& arg5,
|
||||
StringPiece name6, const internal::MessageArg& arg6) {
|
||||
icu::UnicodeString names[] = {
|
||||
UnicodeStringFromStringPiece(name0),
|
||||
UnicodeStringFromStringPiece(name1),
|
||||
UnicodeStringFromStringPiece(name2),
|
||||
UnicodeStringFromStringPiece(name3),
|
||||
UnicodeStringFromStringPiece(name4),
|
||||
UnicodeStringFromStringPiece(name5),
|
||||
UnicodeStringFromStringPiece(name6),
|
||||
};
|
||||
int32_t args_count = 0;
|
||||
icu::Formattable args[] = {
|
||||
arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(),
|
||||
arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(),
|
||||
arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(),
|
||||
arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(),
|
||||
arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(),
|
||||
arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(),
|
||||
arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(),
|
||||
};
|
||||
|
||||
UnicodeString msg_string(msg.data(), msg.size());
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
icu::MessageFormat format(msg_string, error);
|
||||
|
||||
icu::UnicodeString formatted;
|
||||
format.format(names, args, args_count, formatted, error);
|
||||
if (U_FAILURE(error)) {
|
||||
LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with "
|
||||
<< u_errorName(error);
|
||||
return string16();
|
||||
}
|
||||
return i18n::UnicodeStringToString16(formatted);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
128
TMessagesProj/jni/voip/webrtc/base/i18n/message_formatter.h
Normal file
128
TMessagesProj/jni/voip/webrtc/base/i18n/message_formatter.h
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_MESSAGE_FORMATTER_H_
|
||||
#define BASE_I18N_MESSAGE_FORMATTER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "third_party/icu/source/common/unicode/uversion.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class Formattable;
|
||||
U_NAMESPACE_END
|
||||
|
||||
namespace base {
|
||||
|
||||
class Time;
|
||||
|
||||
namespace i18n {
|
||||
|
||||
class MessageFormatter;
|
||||
|
||||
namespace internal {
|
||||
|
||||
class BASE_I18N_EXPORT MessageArg {
|
||||
public:
|
||||
MessageArg(const char* s);
|
||||
MessageArg(StringPiece s);
|
||||
MessageArg(const std::string& s);
|
||||
MessageArg(const string16& s);
|
||||
MessageArg(int i);
|
||||
MessageArg(int64_t i);
|
||||
MessageArg(double d);
|
||||
MessageArg(const Time& t);
|
||||
~MessageArg();
|
||||
|
||||
private:
|
||||
friend class base::i18n::MessageFormatter;
|
||||
MessageArg();
|
||||
// Tests if this argument has a value, and if so increments *count.
|
||||
bool has_value(int* count) const;
|
||||
std::unique_ptr<icu::Formattable> formattable;
|
||||
DISALLOW_COPY_AND_ASSIGN(MessageArg);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// Message Formatter with the ICU message format syntax support.
|
||||
// It can format strings (UTF-8 and UTF-16), numbers and base::Time with
|
||||
// plural, gender and other 'selectors' support. This is handy if you
|
||||
// have multiple parameters of differnt types and some of them require
|
||||
// plural or gender/selector support.
|
||||
//
|
||||
// To use this API for locale-sensitive formatting, retrieve a 'message
|
||||
// template' in the ICU message format from a message bundle (e.g. with
|
||||
// l10n_util::GetStringUTF16()) and pass it to FormatWith{Named,Numbered}Args.
|
||||
//
|
||||
// MessageFormat specs:
|
||||
// http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html
|
||||
// http://icu-project.org/apiref/icu4c/classicu_1_1DecimalFormat.html#details
|
||||
// Examples:
|
||||
// http://userguide.icu-project.org/formatparse/messages
|
||||
// message_formatter_unittest.cc
|
||||
// go/plurals inside Google.
|
||||
// TODO(jshin): Document this API in md format docs.
|
||||
// Caveat:
|
||||
// When plural/select/gender is used along with other format specifiers such
|
||||
// as date or number, plural/select/gender should be at the top level. It's
|
||||
// not an ICU restriction but a constraint imposed by Google's translation
|
||||
// infrastructure. Message A does not work. It must be revised to Message B.
|
||||
//
|
||||
// A.
|
||||
// Rated <ph name="RATING">{0, number,0.0}<ex>3.2</ex></ph>
|
||||
// by {1, plural, =1{a user} other{# users}}
|
||||
//
|
||||
// B.
|
||||
// {1, plural,
|
||||
// =1{Rated <ph name="RATING">{0, number,0.0}<ex>3.2</ex></ph>
|
||||
// by a user.}
|
||||
// other{Rated <ph name="RATING">{0, number,0.0}<ex>3.2</ex></ph>
|
||||
// by # users.}}
|
||||
|
||||
class BASE_I18N_EXPORT MessageFormatter {
|
||||
public:
|
||||
static string16 FormatWithNamedArgs(
|
||||
StringPiece16 msg,
|
||||
StringPiece name0 = StringPiece(),
|
||||
const internal::MessageArg& arg0 = internal::MessageArg(),
|
||||
StringPiece name1 = StringPiece(),
|
||||
const internal::MessageArg& arg1 = internal::MessageArg(),
|
||||
StringPiece name2 = StringPiece(),
|
||||
const internal::MessageArg& arg2 = internal::MessageArg(),
|
||||
StringPiece name3 = StringPiece(),
|
||||
const internal::MessageArg& arg3 = internal::MessageArg(),
|
||||
StringPiece name4 = StringPiece(),
|
||||
const internal::MessageArg& arg4 = internal::MessageArg(),
|
||||
StringPiece name5 = StringPiece(),
|
||||
const internal::MessageArg& arg5 = internal::MessageArg(),
|
||||
StringPiece name6 = StringPiece(),
|
||||
const internal::MessageArg& arg6 = internal::MessageArg());
|
||||
|
||||
static string16 FormatWithNumberedArgs(
|
||||
StringPiece16 msg,
|
||||
const internal::MessageArg& arg0 = internal::MessageArg(),
|
||||
const internal::MessageArg& arg1 = internal::MessageArg(),
|
||||
const internal::MessageArg& arg2 = internal::MessageArg(),
|
||||
const internal::MessageArg& arg3 = internal::MessageArg(),
|
||||
const internal::MessageArg& arg4 = internal::MessageArg(),
|
||||
const internal::MessageArg& arg5 = internal::MessageArg(),
|
||||
const internal::MessageArg& arg6 = internal::MessageArg());
|
||||
|
||||
private:
|
||||
MessageFormatter() = delete;
|
||||
DISALLOW_COPY_AND_ASSIGN(MessageFormatter);
|
||||
};
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_MESSAGE_FORMATTER_H_
|
||||
93
TMessagesProj/jni/voip/webrtc/base/i18n/number_formatting.cc
Normal file
93
TMessagesProj/jni/voip/webrtc/base/i18n/number_formatting.cc
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/number_formatting.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "base/format_macros.h"
|
||||
#include "base/i18n/message_formatter.h"
|
||||
#include "base/i18n/unicodestring.h"
|
||||
#include "base/lazy_instance.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/stringprintf.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "third_party/icu/source/common/unicode/ustring.h"
|
||||
#include "third_party/icu/source/i18n/unicode/numfmt.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
// A simple wrapper around icu::NumberFormat that allows for resetting it
|
||||
// (as LazyInstance does not).
|
||||
struct NumberFormatWrapper {
|
||||
NumberFormatWrapper() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
// There's no ICU call to destroy a NumberFormat object other than
|
||||
// operator delete, so use the default Delete, which calls operator delete.
|
||||
// This can cause problems if a different allocator is used by this file
|
||||
// than by ICU.
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
number_format.reset(icu::NumberFormat::createInstance(status));
|
||||
DCHECK(U_SUCCESS(status));
|
||||
}
|
||||
|
||||
std::unique_ptr<icu::NumberFormat> number_format;
|
||||
};
|
||||
|
||||
LazyInstance<NumberFormatWrapper>::DestructorAtExit g_number_format_int =
|
||||
LAZY_INSTANCE_INITIALIZER;
|
||||
LazyInstance<NumberFormatWrapper>::DestructorAtExit g_number_format_float =
|
||||
LAZY_INSTANCE_INITIALIZER;
|
||||
|
||||
} // namespace
|
||||
|
||||
string16 FormatNumber(int64_t number) {
|
||||
icu::NumberFormat* number_format =
|
||||
g_number_format_int.Get().number_format.get();
|
||||
|
||||
if (!number_format) {
|
||||
// As a fallback, just return the raw number in a string.
|
||||
return ASCIIToUTF16(StringPrintf("%" PRId64, number));
|
||||
}
|
||||
icu::UnicodeString ustr;
|
||||
number_format->format(number, ustr);
|
||||
|
||||
return i18n::UnicodeStringToString16(ustr);
|
||||
}
|
||||
|
||||
string16 FormatDouble(double number, int fractional_digits) {
|
||||
icu::NumberFormat* number_format =
|
||||
g_number_format_float.Get().number_format.get();
|
||||
|
||||
if (!number_format) {
|
||||
// As a fallback, just return the raw number in a string.
|
||||
return ASCIIToUTF16(StringPrintf("%f", number));
|
||||
}
|
||||
number_format->setMaximumFractionDigits(fractional_digits);
|
||||
number_format->setMinimumFractionDigits(fractional_digits);
|
||||
icu::UnicodeString ustr;
|
||||
number_format->format(number, ustr);
|
||||
|
||||
return i18n::UnicodeStringToString16(ustr);
|
||||
}
|
||||
|
||||
string16 FormatPercent(int number) {
|
||||
return i18n::MessageFormatter::FormatWithNumberedArgs(
|
||||
ASCIIToUTF16("{0,number,percent}"), static_cast<double>(number) / 100.0);
|
||||
}
|
||||
|
||||
void ResetFormattersForTesting() {
|
||||
g_number_format_int.Get().Reset();
|
||||
g_number_format_float.Get().Reset();
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
34
TMessagesProj/jni/voip/webrtc/base/i18n/number_formatting.h
Normal file
34
TMessagesProj/jni/voip/webrtc/base/i18n/number_formatting.h
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_NUMBER_FORMATTING_H_
|
||||
#define BASE_I18N_NUMBER_FORMATTING_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Return a number formatted with separators in the user's locale.
|
||||
// Ex: FormatNumber(1234567) => "1,234,567" in English, "1.234.567" in German
|
||||
BASE_I18N_EXPORT string16 FormatNumber(int64_t number);
|
||||
|
||||
// Return a number formatted with separators in the user's locale.
|
||||
// Ex: FormatDouble(1234567.8, 1)
|
||||
// => "1,234,567.8" in English, "1.234.567,8" in German
|
||||
BASE_I18N_EXPORT string16 FormatDouble(double number, int fractional_digits);
|
||||
|
||||
// Return a percentage formatted with space and symbol in the user's locale.
|
||||
// Ex: FormatPercent(12) => "12%" in English, "12 %" in Romanian
|
||||
BASE_I18N_EXPORT string16 FormatPercent(int number);
|
||||
|
||||
// Causes cached formatters to be discarded and recreated. Only useful for
|
||||
// testing.
|
||||
BASE_I18N_EXPORT void ResetFormattersForTesting();
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_NUMBER_FORMATTING_H_
|
||||
497
TMessagesProj/jni/voip/webrtc/base/i18n/rtl.cc
Normal file
497
TMessagesProj/jni/voip/webrtc/base/i18n/rtl.cc
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/rtl.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "base/command_line.h"
|
||||
#include "base/files/file_path.h"
|
||||
#include "base/i18n/base_i18n_switches.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/stl_util.h"
|
||||
#include "base/strings/string_split.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/sys_string_conversions.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "build/build_config.h"
|
||||
#include "third_party/icu/source/common/unicode/locid.h"
|
||||
#include "third_party/icu/source/common/unicode/uchar.h"
|
||||
#include "third_party/icu/source/common/unicode/uscript.h"
|
||||
#include "third_party/icu/source/i18n/unicode/coll.h"
|
||||
|
||||
#if defined(OS_IOS)
|
||||
#include "base/debug/crash_logging.h"
|
||||
#include "base/ios/ios_util.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
// Extract language, country and variant, but ignore keywords. For example,
|
||||
// en-US, ca@valencia, ca-ES@valencia.
|
||||
std::string GetLocaleString(const icu::Locale& locale) {
|
||||
const char* language = locale.getLanguage();
|
||||
const char* country = locale.getCountry();
|
||||
const char* variant = locale.getVariant();
|
||||
|
||||
std::string result =
|
||||
(language != nullptr && *language != '\0') ? language : "und";
|
||||
|
||||
if (country != nullptr && *country != '\0') {
|
||||
result += '-';
|
||||
result += country;
|
||||
}
|
||||
|
||||
if (variant != nullptr && *variant != '\0')
|
||||
result += '@' + base::ToLowerASCII(variant);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
|
||||
// directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
|
||||
// http://unicode.org/reports/tr9/ for more information.
|
||||
base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
|
||||
static bool has_switch = base::CommandLine::ForCurrentProcess()->HasSwitch(
|
||||
switches::kForceTextDirection);
|
||||
if (has_switch) {
|
||||
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
|
||||
std::string force_flag =
|
||||
command_line->GetSwitchValueASCII(switches::kForceTextDirection);
|
||||
|
||||
if (force_flag == switches::kForceDirectionRTL)
|
||||
return base::i18n::RIGHT_TO_LEFT;
|
||||
if (force_flag == switches::kForceDirectionLTR)
|
||||
return base::i18n::LEFT_TO_RIGHT;
|
||||
}
|
||||
// Now that we have the character, we use ICU in order to query for the
|
||||
// appropriate Unicode BiDi character type.
|
||||
int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
|
||||
switch (property) {
|
||||
case U_RIGHT_TO_LEFT:
|
||||
case U_RIGHT_TO_LEFT_ARABIC:
|
||||
case U_RIGHT_TO_LEFT_EMBEDDING:
|
||||
case U_RIGHT_TO_LEFT_OVERRIDE:
|
||||
return base::i18n::RIGHT_TO_LEFT;
|
||||
case U_LEFT_TO_RIGHT:
|
||||
case U_LEFT_TO_RIGHT_EMBEDDING:
|
||||
case U_LEFT_TO_RIGHT_OVERRIDE:
|
||||
return base::i18n::LEFT_TO_RIGHT;
|
||||
}
|
||||
return base::i18n::UNKNOWN_DIRECTION;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// Represents the locale-specific ICU text direction.
|
||||
static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
|
||||
|
||||
// Convert the ICU default locale to a string.
|
||||
std::string GetConfiguredLocale() {
|
||||
return GetLocaleString(icu::Locale::getDefault());
|
||||
}
|
||||
|
||||
// Convert the ICU canonicalized locale to a string.
|
||||
std::string GetCanonicalLocale(const std::string& locale) {
|
||||
return GetLocaleString(icu::Locale::createCanonical(locale.c_str()));
|
||||
}
|
||||
|
||||
// Convert Chrome locale name to ICU locale name
|
||||
std::string ICULocaleName(const std::string& locale_string) {
|
||||
// If not Spanish, just return it.
|
||||
if (locale_string.substr(0, 2) != "es")
|
||||
return locale_string;
|
||||
// Expand es to es-ES.
|
||||
if (LowerCaseEqualsASCII(locale_string, "es"))
|
||||
return "es-ES";
|
||||
// Map es-419 (Latin American Spanish) to es-FOO depending on the system
|
||||
// locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
|
||||
// to es-MX (the most populous in Spanish-speaking Latin America).
|
||||
if (LowerCaseEqualsASCII(locale_string, "es-419")) {
|
||||
const icu::Locale& locale = icu::Locale::getDefault();
|
||||
std::string language = locale.getLanguage();
|
||||
const char* country = locale.getCountry();
|
||||
if (LowerCaseEqualsASCII(language, "es") &&
|
||||
!LowerCaseEqualsASCII(country, "es")) {
|
||||
language += '-';
|
||||
language += country;
|
||||
return language;
|
||||
}
|
||||
return "es-MX";
|
||||
}
|
||||
// Currently, Chrome has only "es" and "es-419", but later we may have
|
||||
// more specific "es-RR".
|
||||
return locale_string;
|
||||
}
|
||||
|
||||
void SetICUDefaultLocale(const std::string& locale_string) {
|
||||
#if defined(OS_IOS)
|
||||
static base::debug::CrashKeyString* crash_key_locale =
|
||||
base::debug::AllocateCrashKeyString("icu_locale_input",
|
||||
base::debug::CrashKeySize::Size256);
|
||||
base::debug::SetCrashKeyString(crash_key_locale, locale_string);
|
||||
#endif
|
||||
icu::Locale locale(ICULocaleName(locale_string).c_str());
|
||||
UErrorCode error_code = U_ZERO_ERROR;
|
||||
const char* lang = locale.getLanguage();
|
||||
if (lang != nullptr && *lang != '\0') {
|
||||
icu::Locale::setDefault(locale, error_code);
|
||||
} else {
|
||||
LOG(ERROR) << "Failed to set the ICU default locale to " << locale_string
|
||||
<< ". Falling back to en-US.";
|
||||
icu::Locale::setDefault(icu::Locale::getUS(), error_code);
|
||||
}
|
||||
g_icu_text_direction = UNKNOWN_DIRECTION;
|
||||
}
|
||||
|
||||
bool IsRTL() {
|
||||
return ICUIsRTL();
|
||||
}
|
||||
|
||||
void SetRTLForTesting(bool rtl) {
|
||||
SetICUDefaultLocale(rtl ? "he" : "en");
|
||||
DCHECK_EQ(rtl, IsRTL());
|
||||
}
|
||||
|
||||
bool ICUIsRTL() {
|
||||
if (g_icu_text_direction == UNKNOWN_DIRECTION) {
|
||||
const icu::Locale& locale = icu::Locale::getDefault();
|
||||
g_icu_text_direction = GetTextDirectionForLocaleInStartUp(locale.getName());
|
||||
}
|
||||
return g_icu_text_direction == RIGHT_TO_LEFT;
|
||||
}
|
||||
|
||||
TextDirection GetForcedTextDirection() {
|
||||
// On iOS, check for RTL forcing.
|
||||
#if defined(OS_IOS)
|
||||
if (base::ios::IsInForcedRTL())
|
||||
return base::i18n::RIGHT_TO_LEFT;
|
||||
#endif
|
||||
|
||||
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
|
||||
if (command_line->HasSwitch(switches::kForceUIDirection)) {
|
||||
std::string force_flag =
|
||||
command_line->GetSwitchValueASCII(switches::kForceUIDirection);
|
||||
|
||||
if (force_flag == switches::kForceDirectionLTR)
|
||||
return base::i18n::LEFT_TO_RIGHT;
|
||||
|
||||
if (force_flag == switches::kForceDirectionRTL)
|
||||
return base::i18n::RIGHT_TO_LEFT;
|
||||
}
|
||||
|
||||
return base::i18n::UNKNOWN_DIRECTION;
|
||||
}
|
||||
|
||||
TextDirection GetTextDirectionForLocaleInStartUp(const char* locale_name) {
|
||||
// Check for direction forcing.
|
||||
TextDirection forced_direction = GetForcedTextDirection();
|
||||
if (forced_direction != UNKNOWN_DIRECTION)
|
||||
return forced_direction;
|
||||
|
||||
// This list needs to be updated in alphabetical order if we add more RTL
|
||||
// locales.
|
||||
static const char kRTLLanguageCodes[][3] = {"ar", "fa", "he", "iw", "ur"};
|
||||
std::vector<StringPiece> locale_split =
|
||||
SplitStringPiece(locale_name, "-_", KEEP_WHITESPACE, SPLIT_WANT_ALL);
|
||||
const StringPiece& language_code = locale_split[0];
|
||||
if (std::binary_search(kRTLLanguageCodes,
|
||||
kRTLLanguageCodes + base::size(kRTLLanguageCodes),
|
||||
language_code))
|
||||
return RIGHT_TO_LEFT;
|
||||
return LEFT_TO_RIGHT;
|
||||
}
|
||||
|
||||
TextDirection GetTextDirectionForLocale(const char* locale_name) {
|
||||
// Check for direction forcing.
|
||||
TextDirection forced_direction = GetForcedTextDirection();
|
||||
if (forced_direction != UNKNOWN_DIRECTION)
|
||||
return forced_direction;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
|
||||
DCHECK(U_SUCCESS(status));
|
||||
// Treat anything other than RTL as LTR.
|
||||
return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
|
||||
}
|
||||
|
||||
TextDirection GetFirstStrongCharacterDirection(const string16& text) {
|
||||
const UChar* string = text.c_str();
|
||||
size_t length = text.length();
|
||||
size_t position = 0;
|
||||
while (position < length) {
|
||||
UChar32 character;
|
||||
size_t next_position = position;
|
||||
U16_NEXT(string, next_position, length, character);
|
||||
TextDirection direction = GetCharacterDirection(character);
|
||||
if (direction != UNKNOWN_DIRECTION)
|
||||
return direction;
|
||||
position = next_position;
|
||||
}
|
||||
return LEFT_TO_RIGHT;
|
||||
}
|
||||
|
||||
TextDirection GetLastStrongCharacterDirection(const string16& text) {
|
||||
const UChar* string = text.c_str();
|
||||
size_t position = text.length();
|
||||
while (position > 0) {
|
||||
UChar32 character;
|
||||
size_t prev_position = position;
|
||||
U16_PREV(string, 0, prev_position, character);
|
||||
TextDirection direction = GetCharacterDirection(character);
|
||||
if (direction != UNKNOWN_DIRECTION)
|
||||
return direction;
|
||||
position = prev_position;
|
||||
}
|
||||
return LEFT_TO_RIGHT;
|
||||
}
|
||||
|
||||
TextDirection GetStringDirection(const string16& text) {
|
||||
const UChar* string = text.c_str();
|
||||
size_t length = text.length();
|
||||
size_t position = 0;
|
||||
|
||||
TextDirection result(UNKNOWN_DIRECTION);
|
||||
while (position < length) {
|
||||
UChar32 character;
|
||||
size_t next_position = position;
|
||||
U16_NEXT(string, next_position, length, character);
|
||||
TextDirection direction = GetCharacterDirection(character);
|
||||
if (direction != UNKNOWN_DIRECTION) {
|
||||
if (result != UNKNOWN_DIRECTION && result != direction)
|
||||
return UNKNOWN_DIRECTION;
|
||||
result = direction;
|
||||
}
|
||||
position = next_position;
|
||||
}
|
||||
|
||||
// Handle the case of a string not containing any strong directionality
|
||||
// characters defaulting to LEFT_TO_RIGHT.
|
||||
if (result == UNKNOWN_DIRECTION)
|
||||
return LEFT_TO_RIGHT;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(OS_WIN)
|
||||
bool AdjustStringForLocaleDirection(string16* text) {
|
||||
if (!IsRTL() || text->empty())
|
||||
return false;
|
||||
|
||||
// Marking the string as LTR if the locale is RTL and the string does not
|
||||
// contain strong RTL characters. Otherwise, mark the string as RTL.
|
||||
bool has_rtl_chars = StringContainsStrongRTLChars(*text);
|
||||
if (!has_rtl_chars)
|
||||
WrapStringWithLTRFormatting(text);
|
||||
else
|
||||
WrapStringWithRTLFormatting(text);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UnadjustStringForLocaleDirection(string16* text) {
|
||||
if (!IsRTL() || text->empty())
|
||||
return false;
|
||||
|
||||
*text = StripWrappingBidiControlCharacters(*text);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
bool AdjustStringForLocaleDirection(string16* text) {
|
||||
// On OS X & GTK the directionality of a label is determined by the first
|
||||
// strongly directional character.
|
||||
// However, we want to make sure that in an LTR-language-UI all strings are
|
||||
// left aligned and vice versa.
|
||||
// A problem can arise if we display a string which starts with user input.
|
||||
// User input may be of the opposite directionality to the UI. So the whole
|
||||
// string will be displayed in the opposite directionality, e.g. if we want to
|
||||
// display in an LTR UI [such as US English]:
|
||||
//
|
||||
// EMAN_NOISNETXE is now installed.
|
||||
//
|
||||
// Since EXTENSION_NAME begins with a strong RTL char, the label's
|
||||
// directionality will be set to RTL and the string will be displayed visually
|
||||
// as:
|
||||
//
|
||||
// .is now installed EMAN_NOISNETXE
|
||||
//
|
||||
// In order to solve this issue, we prepend an LRM to the string. An LRM is a
|
||||
// strongly directional LTR char.
|
||||
// We also append an LRM at the end, which ensures that we're in an LTR
|
||||
// context.
|
||||
|
||||
// Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
|
||||
// box so there is no issue with displaying zero-width bidi control characters
|
||||
// on any system. Thus no need for the !IsRTL() check here.
|
||||
if (text->empty())
|
||||
return false;
|
||||
|
||||
bool ui_direction_is_rtl = IsRTL();
|
||||
|
||||
bool has_rtl_chars = StringContainsStrongRTLChars(*text);
|
||||
if (!ui_direction_is_rtl && has_rtl_chars) {
|
||||
WrapStringWithRTLFormatting(text);
|
||||
text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
|
||||
kLeftToRightMark);
|
||||
text->push_back(kLeftToRightMark);
|
||||
} else if (ui_direction_is_rtl && has_rtl_chars) {
|
||||
WrapStringWithRTLFormatting(text);
|
||||
text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
|
||||
kRightToLeftMark);
|
||||
text->push_back(kRightToLeftMark);
|
||||
} else if (ui_direction_is_rtl) {
|
||||
WrapStringWithLTRFormatting(text);
|
||||
text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
|
||||
kRightToLeftMark);
|
||||
text->push_back(kRightToLeftMark);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UnadjustStringForLocaleDirection(string16* text) {
|
||||
if (text->empty())
|
||||
return false;
|
||||
|
||||
size_t begin_index = 0;
|
||||
char16 begin = text->at(begin_index);
|
||||
if (begin == kLeftToRightMark ||
|
||||
begin == kRightToLeftMark) {
|
||||
++begin_index;
|
||||
}
|
||||
|
||||
size_t end_index = text->length() - 1;
|
||||
char16 end = text->at(end_index);
|
||||
if (end == kLeftToRightMark ||
|
||||
end == kRightToLeftMark) {
|
||||
--end_index;
|
||||
}
|
||||
|
||||
string16 unmarked_text =
|
||||
text->substr(begin_index, end_index - begin_index + 1);
|
||||
*text = StripWrappingBidiControlCharacters(unmarked_text);
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // !OS_WIN
|
||||
|
||||
void EnsureTerminatedDirectionalFormatting(string16* text) {
|
||||
int count = 0;
|
||||
for (auto c : *text) {
|
||||
if (c == kLeftToRightEmbeddingMark || c == kRightToLeftEmbeddingMark ||
|
||||
c == kLeftToRightOverride || c == kRightToLeftOverride) {
|
||||
++count;
|
||||
} else if (c == kPopDirectionalFormatting && count > 0) {
|
||||
--count;
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < count; j++)
|
||||
text->push_back(kPopDirectionalFormatting);
|
||||
}
|
||||
|
||||
void SanitizeUserSuppliedString(string16* text) {
|
||||
EnsureTerminatedDirectionalFormatting(text);
|
||||
AdjustStringForLocaleDirection(text);
|
||||
}
|
||||
|
||||
bool StringContainsStrongRTLChars(const string16& text) {
|
||||
const UChar* string = text.c_str();
|
||||
size_t length = text.length();
|
||||
size_t position = 0;
|
||||
while (position < length) {
|
||||
UChar32 character;
|
||||
size_t next_position = position;
|
||||
U16_NEXT(string, next_position, length, character);
|
||||
|
||||
// Now that we have the character, we use ICU in order to query for the
|
||||
// appropriate Unicode BiDi character type.
|
||||
int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
|
||||
if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
|
||||
return true;
|
||||
|
||||
position = next_position;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void WrapStringWithLTRFormatting(string16* text) {
|
||||
if (text->empty())
|
||||
return;
|
||||
|
||||
// Inserting an LRE (Left-To-Right Embedding) mark as the first character.
|
||||
text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
|
||||
kLeftToRightEmbeddingMark);
|
||||
|
||||
// Inserting a PDF (Pop Directional Formatting) mark as the last character.
|
||||
text->push_back(kPopDirectionalFormatting);
|
||||
}
|
||||
|
||||
void WrapStringWithRTLFormatting(string16* text) {
|
||||
if (text->empty())
|
||||
return;
|
||||
|
||||
// Inserting an RLE (Right-To-Left Embedding) mark as the first character.
|
||||
text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
|
||||
kRightToLeftEmbeddingMark);
|
||||
|
||||
// Inserting a PDF (Pop Directional Formatting) mark as the last character.
|
||||
text->push_back(kPopDirectionalFormatting);
|
||||
}
|
||||
|
||||
void WrapPathWithLTRFormatting(const FilePath& path,
|
||||
string16* rtl_safe_path) {
|
||||
// Wrap the overall path with LRE-PDF pair which essentialy marks the
|
||||
// string as a Left-To-Right string.
|
||||
// Inserting an LRE (Left-To-Right Embedding) mark as the first character.
|
||||
rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
|
||||
#if defined(OS_MACOSX)
|
||||
rtl_safe_path->append(UTF8ToUTF16(path.value()));
|
||||
#elif defined(OS_WIN)
|
||||
rtl_safe_path->append(AsString16(path.value()));
|
||||
#else // defined(OS_POSIX) && !defined(OS_MACOSX)
|
||||
std::wstring wide_path = base::SysNativeMBToWide(path.value());
|
||||
rtl_safe_path->append(WideToUTF16(wide_path));
|
||||
#endif
|
||||
// Inserting a PDF (Pop Directional Formatting) mark as the last character.
|
||||
rtl_safe_path->push_back(kPopDirectionalFormatting);
|
||||
}
|
||||
|
||||
string16 GetDisplayStringInLTRDirectionality(const string16& text) {
|
||||
// Always wrap the string in RTL UI (it may be appended to RTL string).
|
||||
// Also wrap strings with an RTL first strong character direction in LTR UI.
|
||||
if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
|
||||
string16 text_mutable(text);
|
||||
WrapStringWithLTRFormatting(&text_mutable);
|
||||
return text_mutable;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
string16 StripWrappingBidiControlCharacters(const string16& text) {
|
||||
if (text.empty())
|
||||
return text;
|
||||
size_t begin_index = 0;
|
||||
char16 begin = text[begin_index];
|
||||
if (begin == kLeftToRightEmbeddingMark ||
|
||||
begin == kRightToLeftEmbeddingMark ||
|
||||
begin == kLeftToRightOverride ||
|
||||
begin == kRightToLeftOverride)
|
||||
++begin_index;
|
||||
size_t end_index = text.length() - 1;
|
||||
if (text[end_index] == kPopDirectionalFormatting)
|
||||
--end_index;
|
||||
return text.substr(begin_index, end_index - begin_index + 1);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
171
TMessagesProj/jni/voip/webrtc/base/i18n/rtl.h
Normal file
171
TMessagesProj/jni/voip/webrtc/base/i18n/rtl.h
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_RTL_H_
|
||||
#define BASE_I18N_RTL_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
class FilePath;
|
||||
|
||||
namespace i18n {
|
||||
|
||||
const char16 kRightToLeftMark = 0x200F;
|
||||
const char16 kLeftToRightMark = 0x200E;
|
||||
const char16 kLeftToRightEmbeddingMark = 0x202A;
|
||||
const char16 kRightToLeftEmbeddingMark = 0x202B;
|
||||
const char16 kPopDirectionalFormatting = 0x202C;
|
||||
const char16 kLeftToRightOverride = 0x202D;
|
||||
const char16 kRightToLeftOverride = 0x202E;
|
||||
|
||||
// Locale.java mirrored this enum TextDirection. Please keep in sync.
|
||||
enum TextDirection {
|
||||
UNKNOWN_DIRECTION = 0,
|
||||
RIGHT_TO_LEFT = 1,
|
||||
LEFT_TO_RIGHT = 2,
|
||||
TEXT_DIRECTION_MAX = LEFT_TO_RIGHT,
|
||||
};
|
||||
|
||||
// Get the locale that the currently running process has been configured to use.
|
||||
// The return value is of the form language[-country] (e.g., en-US) where the
|
||||
// language is the 2 or 3 letter code from ISO-639.
|
||||
BASE_I18N_EXPORT std::string GetConfiguredLocale();
|
||||
|
||||
// Canonicalize a string (eg. a POSIX locale string) to a Chrome locale name.
|
||||
BASE_I18N_EXPORT std::string GetCanonicalLocale(const std::string& locale);
|
||||
|
||||
// Sets the default locale of ICU.
|
||||
// Once the application locale of Chrome in GetApplicationLocale is determined,
|
||||
// the default locale of ICU need to be changed to match the application locale
|
||||
// so that ICU functions work correctly in a locale-dependent manner.
|
||||
// This is handy in that we don't have to call GetApplicationLocale()
|
||||
// everytime we call locale-dependent ICU APIs as long as we make sure
|
||||
// that this is called before any locale-dependent API is called.
|
||||
BASE_I18N_EXPORT void SetICUDefaultLocale(const std::string& locale_string);
|
||||
|
||||
// Returns true if the application text direction is right-to-left.
|
||||
BASE_I18N_EXPORT bool IsRTL();
|
||||
|
||||
// A test utility function to set the application default text direction.
|
||||
BASE_I18N_EXPORT void SetRTLForTesting(bool rtl);
|
||||
|
||||
// Returns whether the text direction for the default ICU locale is RTL. This
|
||||
// assumes that SetICUDefaultLocale has been called to set the default locale to
|
||||
// the UI locale of Chrome.
|
||||
// NOTE: Generally, you should call IsRTL() instead of this.
|
||||
BASE_I18N_EXPORT bool ICUIsRTL();
|
||||
|
||||
// Gets the explicitly forced text direction for debugging. If no forcing is
|
||||
// applied, returns UNKNOWN_DIRECTION.
|
||||
BASE_I18N_EXPORT TextDirection GetForcedTextDirection();
|
||||
|
||||
// Returns the text direction for |locale_name|.
|
||||
// As a startup optimization, this method checks the locale against a list of
|
||||
// Chrome-supported RTL locales.
|
||||
BASE_I18N_EXPORT TextDirection
|
||||
GetTextDirectionForLocaleInStartUp(const char* locale_name);
|
||||
|
||||
// Returns the text direction for |locale_name|.
|
||||
BASE_I18N_EXPORT TextDirection GetTextDirectionForLocale(
|
||||
const char* locale_name);
|
||||
|
||||
// Given the string in |text|, returns the directionality of the first or last
|
||||
// character with strong directionality in the string. If no character in the
|
||||
// text has strong directionality, LEFT_TO_RIGHT is returned. The Bidi
|
||||
// character types L, LRE, LRO, R, AL, RLE, and RLO are considered as strong
|
||||
// directionality characters. Please refer to http://unicode.org/reports/tr9/
|
||||
// for more information.
|
||||
BASE_I18N_EXPORT TextDirection GetFirstStrongCharacterDirection(
|
||||
const string16& text);
|
||||
BASE_I18N_EXPORT TextDirection GetLastStrongCharacterDirection(
|
||||
const string16& text);
|
||||
|
||||
// Given the string in |text|, returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if all the
|
||||
// strong directionality characters in the string are of the same
|
||||
// directionality. It returns UNKNOWN_DIRECTION if the string contains a mix of
|
||||
// LTR and RTL strong directionality characters. Defaults to LEFT_TO_RIGHT if
|
||||
// the string does not contain directionality characters. Please refer to
|
||||
// http://unicode.org/reports/tr9/ for more information.
|
||||
BASE_I18N_EXPORT TextDirection GetStringDirection(const string16& text);
|
||||
|
||||
// Given the string in |text|, this function modifies the string in place with
|
||||
// the appropriate Unicode formatting marks that mark the string direction
|
||||
// (either left-to-right or right-to-left). The function checks both the current
|
||||
// locale and the contents of the string in order to determine the direction of
|
||||
// the returned string. The function returns true if the string in |text| was
|
||||
// properly adjusted.
|
||||
//
|
||||
// Certain LTR strings are not rendered correctly when the context is RTL. For
|
||||
// example, the string "Foo!" will appear as "!Foo" if it is rendered as is in
|
||||
// an RTL context. Calling this function will make sure the returned localized
|
||||
// string is always treated as a right-to-left string. This is done by
|
||||
// inserting certain Unicode formatting marks into the returned string.
|
||||
//
|
||||
// ** Notes about the Windows version of this function:
|
||||
// TODO(idana) bug 6806: this function adjusts the string in question only
|
||||
// if the current locale is right-to-left. The function does not take care of
|
||||
// the opposite case (an RTL string displayed in an LTR context) since
|
||||
// adjusting the string involves inserting Unicode formatting characters that
|
||||
// Windows does not handle well unless right-to-left language support is
|
||||
// installed. Since the English version of Windows doesn't have right-to-left
|
||||
// language support installed by default, inserting the direction Unicode mark
|
||||
// results in Windows displaying squares.
|
||||
BASE_I18N_EXPORT bool AdjustStringForLocaleDirection(string16* text);
|
||||
|
||||
// Undoes the actions of the above function (AdjustStringForLocaleDirection).
|
||||
BASE_I18N_EXPORT bool UnadjustStringForLocaleDirection(string16* text);
|
||||
|
||||
// Ensures |text| contains no unterminated directional formatting characters, by
|
||||
// appending the appropriate pop-directional-formatting characters to the end of
|
||||
// |text|.
|
||||
BASE_I18N_EXPORT void EnsureTerminatedDirectionalFormatting(string16* text);
|
||||
|
||||
// Sanitizes the |text| by terminating any directional override/embedding
|
||||
// characters and then adjusting the string for locale direction.
|
||||
BASE_I18N_EXPORT void SanitizeUserSuppliedString(string16* text);
|
||||
|
||||
// Returns true if the string contains at least one character with strong right
|
||||
// to left directionality; that is, a character with either R or AL Unicode
|
||||
// BiDi character type.
|
||||
BASE_I18N_EXPORT bool StringContainsStrongRTLChars(const string16& text);
|
||||
|
||||
// Wraps a string with an LRE-PDF pair which essentialy marks the string as a
|
||||
// Left-To-Right string. Doing this is useful in order to make sure LTR
|
||||
// strings are rendered properly in an RTL context.
|
||||
BASE_I18N_EXPORT void WrapStringWithLTRFormatting(string16* text);
|
||||
|
||||
// Wraps a string with an RLE-PDF pair which essentialy marks the string as a
|
||||
// Right-To-Left string. Doing this is useful in order to make sure RTL
|
||||
// strings are rendered properly in an LTR context.
|
||||
BASE_I18N_EXPORT void WrapStringWithRTLFormatting(string16* text);
|
||||
|
||||
// Wraps file path to get it to display correctly in RTL UI. All filepaths
|
||||
// should be passed through this function before display in UI for RTL locales.
|
||||
BASE_I18N_EXPORT void WrapPathWithLTRFormatting(const FilePath& path,
|
||||
string16* rtl_safe_path);
|
||||
|
||||
// Return the string in |text| wrapped with LRE (Left-To-Right Embedding) and
|
||||
// PDF (Pop Directional Formatting) marks, if needed for UI display purposes.
|
||||
BASE_I18N_EXPORT string16 GetDisplayStringInLTRDirectionality(
|
||||
const string16& text) WARN_UNUSED_RESULT;
|
||||
|
||||
// Strip the beginning (U+202A..U+202B, U+202D..U+202E) and/or ending (U+202C)
|
||||
// explicit bidi control characters from |text|, if there are any. Otherwise,
|
||||
// return the text itself. Explicit bidi control characters display and have
|
||||
// semantic effect. They can be deleted so they might not always appear in a
|
||||
// pair.
|
||||
BASE_I18N_EXPORT string16 StripWrappingBidiControlCharacters(
|
||||
const string16& text) WARN_UNUSED_RESULT;
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_RTL_H_
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2014 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This implementation doesn't use ICU. The ICU macros are oriented towards
|
||||
// character-at-a-time processing, whereas byte-at-a-time processing is easier
|
||||
// with streaming input.
|
||||
|
||||
#include "base/i18n/streaming_utf8_validator.h"
|
||||
|
||||
#include "base/i18n/utf8_validator_tables.h"
|
||||
#include "base/logging.h"
|
||||
|
||||
namespace base {
|
||||
namespace {
|
||||
|
||||
uint8_t StateTableLookup(uint8_t offset) {
|
||||
DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
|
||||
return internal::kUtf8ValidatorTables[offset];
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data,
|
||||
size_t size) {
|
||||
// Copy |state_| into a local variable so that the compiler doesn't have to be
|
||||
// careful of aliasing.
|
||||
uint8_t state = state_;
|
||||
for (const char* p = data; p != data + size; ++p) {
|
||||
if ((*p & 0x80) == 0) {
|
||||
if (state == 0)
|
||||
continue;
|
||||
state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
|
||||
break;
|
||||
}
|
||||
const uint8_t shift_amount = StateTableLookup(state);
|
||||
const uint8_t shifted_char = (*p & 0x7F) >> shift_amount;
|
||||
state = StateTableLookup(state + shifted_char + 1);
|
||||
// State may be INVALID here, but this code is optimised for the case of
|
||||
// valid UTF-8 and it is more efficient (by about 2%) to not attempt an
|
||||
// early loop exit unless we hit an ASCII character.
|
||||
}
|
||||
state_ = state;
|
||||
return state == 0 ? VALID_ENDPOINT
|
||||
: state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
|
||||
? INVALID
|
||||
: VALID_MIDPOINT;
|
||||
}
|
||||
|
||||
void StreamingUtf8Validator::Reset() {
|
||||
state_ = 0u;
|
||||
}
|
||||
|
||||
bool StreamingUtf8Validator::Validate(const std::string& string) {
|
||||
return StreamingUtf8Validator().AddBytes(string.data(), string.size()) ==
|
||||
VALID_ENDPOINT;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2014 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// A streaming validator for UTF-8. Validation is based on the definition in
|
||||
// RFC-3629. In particular, it does not reject the invalid characters rejected
|
||||
// by base::IsStringUTF8().
|
||||
//
|
||||
// The implementation detects errors on the first possible byte.
|
||||
|
||||
#ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
|
||||
#define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/macros.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
class BASE_I18N_EXPORT StreamingUtf8Validator {
|
||||
public:
|
||||
// The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it
|
||||
// processes characters it alternates between VALID_ENDPOINT and
|
||||
// VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the
|
||||
// state changes permanently to INVALID.
|
||||
enum State {
|
||||
VALID_ENDPOINT,
|
||||
VALID_MIDPOINT,
|
||||
INVALID
|
||||
};
|
||||
|
||||
StreamingUtf8Validator() : state_(0u) {}
|
||||
// Trivial destructor intentionally omitted.
|
||||
|
||||
// Validate |size| bytes starting at |data|. If the concatenation of all calls
|
||||
// to AddBytes() since this object was constructed or reset is a valid UTF-8
|
||||
// string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8
|
||||
// string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was
|
||||
// present, returns INVALID.
|
||||
State AddBytes(const char* data, size_t size);
|
||||
|
||||
// Return the object to a freshly-constructed state so that it can be re-used.
|
||||
void Reset();
|
||||
|
||||
// Validate a complete string using the same criteria. Returns true if the
|
||||
// string only contains complete, valid UTF-8 codepoints.
|
||||
static bool Validate(const std::string& string);
|
||||
|
||||
private:
|
||||
// The current state of the validator. Value 0 is the initial/valid state.
|
||||
// The state is stored as an offset into |kUtf8ValidatorTables|. The special
|
||||
// state |kUtf8InvalidState| is invalid.
|
||||
uint8_t state_;
|
||||
|
||||
// This type could be made copyable but there is currently no use-case for
|
||||
// it.
|
||||
DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator);
|
||||
};
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
|
||||
29
TMessagesProj/jni/voip/webrtc/base/i18n/string_compare.cc
Normal file
29
TMessagesProj/jni/voip/webrtc/base/i18n/string_compare.cc
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/string_compare.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "third_party/icu/source/common/unicode/unistr.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// Compares the character data stored in two different string16 strings by
|
||||
// specified Collator instance.
|
||||
UCollationResult CompareString16WithCollator(const icu::Collator& collator,
|
||||
StringPiece16 lhs,
|
||||
StringPiece16 rhs) {
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
UCollationResult result = collator.compare(
|
||||
icu::UnicodeString(FALSE, lhs.data(), static_cast<int>(lhs.length())),
|
||||
icu::UnicodeString(FALSE, rhs.data(), static_cast<int>(rhs.length())),
|
||||
error);
|
||||
DCHECK(U_SUCCESS(error));
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
28
TMessagesProj/jni/voip/webrtc/base/i18n/string_compare.h
Normal file
28
TMessagesProj/jni/voip/webrtc/base/i18n/string_compare.h
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_STRING_COMPARE_H_
|
||||
#define BASE_I18N_STRING_COMPARE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "third_party/icu/source/i18n/unicode/coll.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// Compares the two strings using the specified collator.
|
||||
BASE_I18N_EXPORT UCollationResult
|
||||
CompareString16WithCollator(const icu::Collator& collator,
|
||||
const StringPiece16 lhs,
|
||||
const StringPiece16 rhs);
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_STRING_COMPARE_H_
|
||||
111
TMessagesProj/jni/voip/webrtc/base/i18n/string_search.cc
Normal file
111
TMessagesProj/jni/voip/webrtc/base/i18n/string_search.cc
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/i18n/string_search.h"
|
||||
#include "base/logging.h"
|
||||
|
||||
#include "third_party/icu/source/i18n/unicode/usearch.h"
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
FixedPatternStringSearch::FixedPatternStringSearch(const string16& find_this,
|
||||
bool case_sensitive)
|
||||
: find_this_(find_this) {
|
||||
// usearch_open requires a valid string argument to be searched, even if we
|
||||
// want to set it by usearch_setText afterwards. So, supplying a dummy text.
|
||||
const string16& dummy = find_this_;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(),
|
||||
dummy.size(), uloc_getDefault(),
|
||||
nullptr, // breakiter
|
||||
&status);
|
||||
if (U_SUCCESS(status)) {
|
||||
// http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
|
||||
// Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
|
||||
// differences. Set comparison level to UCOL_TERTIARY to include all
|
||||
// comparison differences.
|
||||
// Diacritical differences on the same base letter represent a
|
||||
// secondary difference.
|
||||
// Uppercase and lowercase versions of the same character represents a
|
||||
// tertiary difference.
|
||||
UCollator* collator = usearch_getCollator(search_);
|
||||
ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
|
||||
usearch_reset(search_);
|
||||
}
|
||||
}
|
||||
|
||||
FixedPatternStringSearch::~FixedPatternStringSearch() {
|
||||
if (search_)
|
||||
usearch_close(search_);
|
||||
}
|
||||
|
||||
bool FixedPatternStringSearch::Search(const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length,
|
||||
bool forward_search) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
usearch_setText(search_, in_this.data(), in_this.size(), &status);
|
||||
|
||||
// Default to basic substring search if usearch fails. According to
|
||||
// http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
|
||||
// if either |find_this| or |in_this| are empty. In either case basic
|
||||
// substring search will give the correct return value.
|
||||
if (!U_SUCCESS(status)) {
|
||||
size_t index = in_this.find(find_this_);
|
||||
if (index == string16::npos)
|
||||
return false;
|
||||
if (match_index)
|
||||
*match_index = index;
|
||||
if (match_length)
|
||||
*match_length = find_this_.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t index = forward_search ? usearch_first(search_, &status)
|
||||
: usearch_last(search_, &status);
|
||||
if (!U_SUCCESS(status) || index == USEARCH_DONE)
|
||||
return false;
|
||||
if (match_index)
|
||||
*match_index = static_cast<size_t>(index);
|
||||
if (match_length)
|
||||
*match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
|
||||
return true;
|
||||
}
|
||||
|
||||
FixedPatternStringSearchIgnoringCaseAndAccents::
|
||||
FixedPatternStringSearchIgnoringCaseAndAccents(const string16& find_this)
|
||||
: base_search_(find_this, /*case_sensitive=*/false) {}
|
||||
|
||||
bool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
|
||||
const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length) {
|
||||
return base_search_.Search(in_this, match_index, match_length,
|
||||
/*forward_search=*/true);
|
||||
}
|
||||
|
||||
bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
|
||||
const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length) {
|
||||
return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
|
||||
in_this, match_index, match_length);
|
||||
}
|
||||
|
||||
bool StringSearch(const string16& find_this,
|
||||
const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length,
|
||||
bool case_sensitive,
|
||||
bool forward_search) {
|
||||
return FixedPatternStringSearch(find_this, case_sensitive)
|
||||
.Search(in_this, match_index, match_length, forward_search);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
93
TMessagesProj/jni/voip/webrtc/base/i18n/string_search.h
Normal file
93
TMessagesProj/jni/voip/webrtc/base/i18n/string_search.h
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_STRING_SEARCH_H_
|
||||
#define BASE_I18N_STRING_SEARCH_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
struct UStringSearch;
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
// Returns true if |in_this| contains |find_this|. If |match_index| or
|
||||
// |match_length| are non-NULL, they are assigned the start position and total
|
||||
// length of the match.
|
||||
//
|
||||
// Only differences between base letters are taken into consideration. Case and
|
||||
// accent differences are ignored. Please refer to 'primary level' in
|
||||
// http://userguide.icu-project.org/collation/concepts for additional details.
|
||||
BASE_I18N_EXPORT
|
||||
bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
|
||||
const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length);
|
||||
|
||||
// Returns true if |in_this| contains |find_this|. If |match_index| or
|
||||
// |match_length| are non-NULL, they are assigned the start position and total
|
||||
// length of the match.
|
||||
//
|
||||
// When |case_sensitive| is false, only differences between base letters are
|
||||
// taken into consideration. Case and accent differences are ignored.
|
||||
// Please refer to 'primary level' in
|
||||
// http://userguide.icu-project.org/collation/concepts for additional details.
|
||||
// When |forward_search| is true, finds the first instance of |find_this|,
|
||||
// otherwise finds the last instance
|
||||
BASE_I18N_EXPORT
|
||||
bool StringSearch(const string16& find_this,
|
||||
const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length,
|
||||
bool case_sensitive,
|
||||
bool forward_search);
|
||||
|
||||
// This class is for speeding up multiple StringSearch()
|
||||
// with the same |find_this| argument. |find_this| is passed as the constructor
|
||||
// argument, and precomputation for searching is done only at that time.
|
||||
class BASE_I18N_EXPORT FixedPatternStringSearch {
|
||||
public:
|
||||
explicit FixedPatternStringSearch(const string16& find_this,
|
||||
bool case_sensitive);
|
||||
~FixedPatternStringSearch();
|
||||
|
||||
// Returns true if |in_this| contains |find_this|. If |match_index| or
|
||||
// |match_length| are non-NULL, they are assigned the start position and total
|
||||
// length of the match.
|
||||
bool Search(const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length,
|
||||
bool forward_search);
|
||||
|
||||
private:
|
||||
string16 find_this_;
|
||||
UStringSearch* search_;
|
||||
};
|
||||
|
||||
// This class is for speeding up multiple StringSearchIgnoringCaseAndAccents()
|
||||
// with the same |find_this| argument. |find_this| is passed as the constructor
|
||||
// argument, and precomputation for searching is done only at that time.
|
||||
class BASE_I18N_EXPORT FixedPatternStringSearchIgnoringCaseAndAccents {
|
||||
public:
|
||||
explicit FixedPatternStringSearchIgnoringCaseAndAccents(
|
||||
const string16& find_this);
|
||||
|
||||
// Returns true if |in_this| contains |find_this|. If |match_index| or
|
||||
// |match_length| are non-NULL, they are assigned the start position and total
|
||||
// length of the match.
|
||||
bool Search(const string16& in_this,
|
||||
size_t* match_index,
|
||||
size_t* match_length);
|
||||
|
||||
private:
|
||||
FixedPatternStringSearch base_search_;
|
||||
};
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_STRING_SEARCH_H_
|
||||
296
TMessagesProj/jni/voip/webrtc/base/i18n/time_formatting.cc
Normal file
296
TMessagesProj/jni/voip/webrtc/base/i18n/time_formatting.cc
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/time_formatting.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "base/i18n/unicodestring.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "base/time/time.h"
|
||||
#include "third_party/icu/source/common/unicode/utypes.h"
|
||||
#include "third_party/icu/source/i18n/unicode/datefmt.h"
|
||||
#include "third_party/icu/source/i18n/unicode/dtitvfmt.h"
|
||||
#include "third_party/icu/source/i18n/unicode/dtptngen.h"
|
||||
#include "third_party/icu/source/i18n/unicode/fmtable.h"
|
||||
#include "third_party/icu/source/i18n/unicode/measfmt.h"
|
||||
#include "third_party/icu/source/i18n/unicode/smpdtfmt.h"
|
||||
|
||||
namespace base {
|
||||
namespace {
|
||||
|
||||
string16 TimeFormat(const icu::DateFormat* formatter,
|
||||
const Time& time) {
|
||||
DCHECK(formatter);
|
||||
icu::UnicodeString date_string;
|
||||
|
||||
formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string);
|
||||
return i18n::UnicodeStringToString16(date_string);
|
||||
}
|
||||
|
||||
string16 TimeFormatWithoutAmPm(const icu::DateFormat* formatter,
|
||||
const Time& time) {
|
||||
DCHECK(formatter);
|
||||
icu::UnicodeString time_string;
|
||||
|
||||
icu::FieldPosition ampm_field(icu::DateFormat::kAmPmField);
|
||||
formatter->format(
|
||||
static_cast<UDate>(time.ToDoubleT() * 1000), time_string, ampm_field);
|
||||
int ampm_length = ampm_field.getEndIndex() - ampm_field.getBeginIndex();
|
||||
if (ampm_length) {
|
||||
int begin = ampm_field.getBeginIndex();
|
||||
// Doesn't include any spacing before the field.
|
||||
if (begin)
|
||||
begin--;
|
||||
time_string.removeBetween(begin, ampm_field.getEndIndex());
|
||||
}
|
||||
return i18n::UnicodeStringToString16(time_string);
|
||||
}
|
||||
|
||||
icu::SimpleDateFormat CreateSimpleDateFormatter(const char* pattern) {
|
||||
// Generate a locale-dependent format pattern. The generator will take
|
||||
// care of locale-dependent formatting issues like which separator to
|
||||
// use (some locales use '.' instead of ':'), and where to put the am/pm
|
||||
// marker.
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
std::unique_ptr<icu::DateTimePatternGenerator> generator(
|
||||
icu::DateTimePatternGenerator::createInstance(status));
|
||||
DCHECK(U_SUCCESS(status));
|
||||
icu::UnicodeString generated_pattern =
|
||||
generator->getBestPattern(icu::UnicodeString(pattern), status);
|
||||
DCHECK(U_SUCCESS(status));
|
||||
|
||||
// Then, format the time using the generated pattern.
|
||||
icu::SimpleDateFormat formatter(generated_pattern, status);
|
||||
DCHECK(U_SUCCESS(status));
|
||||
|
||||
return formatter;
|
||||
}
|
||||
|
||||
UMeasureFormatWidth DurationWidthToMeasureWidth(DurationFormatWidth width) {
|
||||
switch (width) {
|
||||
case DURATION_WIDTH_WIDE: return UMEASFMT_WIDTH_WIDE;
|
||||
case DURATION_WIDTH_SHORT: return UMEASFMT_WIDTH_SHORT;
|
||||
case DURATION_WIDTH_NARROW: return UMEASFMT_WIDTH_NARROW;
|
||||
case DURATION_WIDTH_NUMERIC: return UMEASFMT_WIDTH_NUMERIC;
|
||||
}
|
||||
NOTREACHED();
|
||||
return UMEASFMT_WIDTH_COUNT;
|
||||
}
|
||||
|
||||
const char* DateFormatToString(DateFormat format) {
|
||||
switch (format) {
|
||||
case DATE_FORMAT_YEAR_MONTH:
|
||||
return UDAT_YEAR_MONTH;
|
||||
case DATE_FORMAT_MONTH_WEEKDAY_DAY:
|
||||
return UDAT_MONTH_WEEKDAY_DAY;
|
||||
}
|
||||
NOTREACHED();
|
||||
return UDAT_YEAR_MONTH_DAY;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
string16 TimeFormatTimeOfDay(const Time& time) {
|
||||
// We can omit the locale parameter because the default should match
|
||||
// Chrome's application locale.
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createTimeInstance(icu::DateFormat::kShort));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatTimeOfDayWithMilliseconds(const Time& time) {
|
||||
icu::SimpleDateFormat formatter = CreateSimpleDateFormatter("HmsSSS");
|
||||
return TimeFormatWithoutAmPm(&formatter, time);
|
||||
}
|
||||
|
||||
string16 TimeFormatTimeOfDayWithHourClockType(const Time& time,
|
||||
HourClockType type,
|
||||
AmPmClockType ampm) {
|
||||
// Just redirect to the normal function if the default type matches the
|
||||
// given type.
|
||||
HourClockType default_type = GetHourClockType();
|
||||
if (default_type == type && (type == k24HourClock || ampm == kKeepAmPm)) {
|
||||
return TimeFormatTimeOfDay(time);
|
||||
}
|
||||
|
||||
const char* base_pattern = (type == k12HourClock ? "ahm" : "Hm");
|
||||
icu::SimpleDateFormat formatter = CreateSimpleDateFormatter(base_pattern);
|
||||
|
||||
if (ampm == kKeepAmPm) {
|
||||
return TimeFormat(&formatter, time);
|
||||
}
|
||||
return TimeFormatWithoutAmPm(&formatter, time);
|
||||
}
|
||||
|
||||
string16 TimeFormatShortDate(const Time& time) {
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createDateInstance(icu::DateFormat::kMedium));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatShortDateNumeric(const Time& time) {
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createDateInstance(icu::DateFormat::kShort));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatShortDateAndTime(const Time& time) {
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatShortDateAndTimeWithTimeZone(const Time& time) {
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort,
|
||||
icu::DateFormat::kLong));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatMonthAndYear(const Time& time) {
|
||||
icu::SimpleDateFormat formatter =
|
||||
CreateSimpleDateFormatter(DateFormatToString(DATE_FORMAT_YEAR_MONTH));
|
||||
return TimeFormat(&formatter, time);
|
||||
}
|
||||
|
||||
string16 TimeFormatFriendlyDateAndTime(const Time& time) {
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createDateTimeInstance(icu::DateFormat::kFull));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatFriendlyDate(const Time& time) {
|
||||
std::unique_ptr<icu::DateFormat> formatter(
|
||||
icu::DateFormat::createDateInstance(icu::DateFormat::kFull));
|
||||
return TimeFormat(formatter.get(), time);
|
||||
}
|
||||
|
||||
string16 TimeFormatWithPattern(const Time& time, const char* pattern) {
|
||||
icu::SimpleDateFormat formatter = CreateSimpleDateFormatter(pattern);
|
||||
return TimeFormat(&formatter, time);
|
||||
}
|
||||
|
||||
bool TimeDurationFormat(const TimeDelta time,
|
||||
const DurationFormatWidth width,
|
||||
string16* out) {
|
||||
DCHECK(out);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const int total_minutes = static_cast<int>(time.InSecondsF() / 60 + 0.5);
|
||||
const int hours = total_minutes / 60;
|
||||
const int minutes = total_minutes % 60;
|
||||
UMeasureFormatWidth u_width = DurationWidthToMeasureWidth(width);
|
||||
|
||||
// TODO(derat): Delete the |status| checks and LOG(ERROR) calls throughout
|
||||
// this function once the cause of http://crbug.com/677043 is tracked down.
|
||||
const icu::Measure measures[] = {
|
||||
icu::Measure(hours, icu::MeasureUnit::createHour(status), status),
|
||||
icu::Measure(minutes, icu::MeasureUnit::createMinute(status), status)};
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Creating MeasureUnit or Measure for " << hours << "h"
|
||||
<< minutes << "m failed: " << u_errorName(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
icu::MeasureFormat measure_format(icu::Locale::getDefault(), u_width, status);
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Creating MeasureFormat for "
|
||||
<< icu::Locale::getDefault().getName()
|
||||
<< " failed: " << u_errorName(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
icu::UnicodeString formatted;
|
||||
icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE);
|
||||
measure_format.formatMeasures(measures, 2, formatted, ignore, status);
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "formatMeasures failed: " << u_errorName(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
*out = i18n::UnicodeStringToString16(formatted);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TimeDurationFormatWithSeconds(const TimeDelta time,
|
||||
const DurationFormatWidth width,
|
||||
string16* out) {
|
||||
DCHECK(out);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const int64_t total_seconds = static_cast<int64_t>(time.InSecondsF() + 0.5);
|
||||
const int64_t hours = total_seconds / 3600;
|
||||
const int64_t minutes = (total_seconds - hours * 3600) / 60;
|
||||
const int64_t seconds = total_seconds % 60;
|
||||
UMeasureFormatWidth u_width = DurationWidthToMeasureWidth(width);
|
||||
|
||||
const icu::Measure measures[] = {
|
||||
icu::Measure(hours, icu::MeasureUnit::createHour(status), status),
|
||||
icu::Measure(minutes, icu::MeasureUnit::createMinute(status), status),
|
||||
icu::Measure(seconds, icu::MeasureUnit::createSecond(status), status)};
|
||||
icu::MeasureFormat measure_format(icu::Locale::getDefault(), u_width, status);
|
||||
icu::UnicodeString formatted;
|
||||
icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE);
|
||||
measure_format.formatMeasures(measures, 3, formatted, ignore, status);
|
||||
*out = i18n::UnicodeStringToString16(formatted);
|
||||
return U_SUCCESS(status) == TRUE;
|
||||
}
|
||||
|
||||
string16 DateIntervalFormat(const Time& begin_time,
|
||||
const Time& end_time,
|
||||
DateFormat format) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
std::unique_ptr<icu::DateIntervalFormat> formatter(
|
||||
icu::DateIntervalFormat::createInstance(DateFormatToString(format),
|
||||
status));
|
||||
|
||||
icu::FieldPosition pos = 0;
|
||||
UDate start_date = static_cast<UDate>(begin_time.ToDoubleT() * 1000);
|
||||
UDate end_date = static_cast<UDate>(end_time.ToDoubleT() * 1000);
|
||||
icu::DateInterval interval(start_date, end_date);
|
||||
icu::UnicodeString formatted;
|
||||
formatter->format(&interval, formatted, pos, status);
|
||||
return i18n::UnicodeStringToString16(formatted);
|
||||
}
|
||||
|
||||
HourClockType GetHourClockType() {
|
||||
// TODO(satorux,jshin): Rework this with ures_getByKeyWithFallback()
|
||||
// once it becomes public. The short time format can be found at
|
||||
// "calendar/gregorian/DateTimePatterns/3" in the resources.
|
||||
std::unique_ptr<icu::SimpleDateFormat> formatter(
|
||||
static_cast<icu::SimpleDateFormat*>(
|
||||
icu::DateFormat::createTimeInstance(icu::DateFormat::kShort)));
|
||||
// Retrieve the short time format.
|
||||
icu::UnicodeString pattern_unicode;
|
||||
formatter->toPattern(pattern_unicode);
|
||||
|
||||
// Determine what hour clock type the current locale uses, by checking
|
||||
// "a" (am/pm marker) in the short time format. This is reliable as "a"
|
||||
// is used by all of 12-hour clock formats, but not any of 24-hour clock
|
||||
// formats, as shown below.
|
||||
//
|
||||
// % grep -A4 DateTimePatterns third_party/icu/source/data/locales/*.txt |
|
||||
// grep -B1 -- -- |grep -v -- '--' |
|
||||
// perl -nle 'print $1 if /^\S+\s+"(.*)"/' |sort -u
|
||||
//
|
||||
// H.mm
|
||||
// H:mm
|
||||
// HH.mm
|
||||
// HH:mm
|
||||
// a h:mm
|
||||
// ah:mm
|
||||
// ahh:mm
|
||||
// h-mm a
|
||||
// h:mm a
|
||||
// hh:mm a
|
||||
//
|
||||
// See http://userguide.icu-project.org/formatparse/datetime for details
|
||||
// about the date/time format syntax.
|
||||
return pattern_unicode.indexOf('a') == -1 ? k24HourClock : k12HourClock;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
138
TMessagesProj/jni/voip/webrtc/base/i18n/time_formatting.h
Normal file
138
TMessagesProj/jni/voip/webrtc/base/i18n/time_formatting.h
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Basic time formatting methods. These methods use the current locale
|
||||
// formatting for displaying the time.
|
||||
|
||||
#ifndef BASE_I18N_TIME_FORMATTING_H_
|
||||
#define BASE_I18N_TIME_FORMATTING_H_
|
||||
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
class Time;
|
||||
class TimeDelta;
|
||||
|
||||
// Argument type used to specify the hour clock type.
|
||||
enum HourClockType {
|
||||
k12HourClock, // Uses 1-12. e.g., "3:07 PM"
|
||||
k24HourClock, // Uses 0-23. e.g., "15:07"
|
||||
};
|
||||
|
||||
// Argument type used to specify whether or not to include AM/PM sign.
|
||||
enum AmPmClockType {
|
||||
kDropAmPm, // Drops AM/PM sign. e.g., "3:07"
|
||||
kKeepAmPm, // Keeps AM/PM sign. e.g., "3:07 PM"
|
||||
};
|
||||
|
||||
// Should match UMeasureFormatWidth in measfmt.h; replicated here to avoid
|
||||
// requiring third_party/icu dependencies with this file.
|
||||
enum DurationFormatWidth {
|
||||
DURATION_WIDTH_WIDE, // "3 hours, 7 minutes"
|
||||
DURATION_WIDTH_SHORT, // "3 hr, 7 min"
|
||||
DURATION_WIDTH_NARROW, // "3h 7m"
|
||||
DURATION_WIDTH_NUMERIC // "3:07"
|
||||
};
|
||||
|
||||
// Date formats from third_party/icu/source/i18n/unicode/udat.h. Add more as
|
||||
// necessary.
|
||||
enum DateFormat {
|
||||
// November 2007
|
||||
DATE_FORMAT_YEAR_MONTH,
|
||||
// Tuesday, 7 November
|
||||
DATE_FORMAT_MONTH_WEEKDAY_DAY,
|
||||
};
|
||||
|
||||
// Returns the time of day, e.g., "3:07 PM".
|
||||
BASE_I18N_EXPORT string16 TimeFormatTimeOfDay(const Time& time);
|
||||
|
||||
// Returns the time of day in 24-hour clock format with millisecond accuracy,
|
||||
// e.g., "15:07:30.568"
|
||||
BASE_I18N_EXPORT string16 TimeFormatTimeOfDayWithMilliseconds(const Time& time);
|
||||
|
||||
// Returns the time of day in the specified hour clock type. e.g.
|
||||
// "3:07 PM" (type == k12HourClock, ampm == kKeepAmPm).
|
||||
// "3:07" (type == k12HourClock, ampm == kDropAmPm).
|
||||
// "15:07" (type == k24HourClock).
|
||||
BASE_I18N_EXPORT string16 TimeFormatTimeOfDayWithHourClockType(
|
||||
const Time& time,
|
||||
HourClockType type,
|
||||
AmPmClockType ampm);
|
||||
|
||||
// Returns a shortened date, e.g. "Nov 7, 2007"
|
||||
BASE_I18N_EXPORT string16 TimeFormatShortDate(const Time& time);
|
||||
|
||||
// Returns a numeric date such as 12/13/52.
|
||||
BASE_I18N_EXPORT string16 TimeFormatShortDateNumeric(const Time& time);
|
||||
|
||||
// Returns a numeric date and time such as "12/13/52 2:44:30 PM".
|
||||
BASE_I18N_EXPORT string16 TimeFormatShortDateAndTime(const Time& time);
|
||||
|
||||
// Returns a month and year, e.g. "November 2007"
|
||||
BASE_I18N_EXPORT string16 TimeFormatMonthAndYear(const Time& time);
|
||||
|
||||
// Returns a numeric date and time with time zone such as
|
||||
// "12/13/52 2:44:30 PM PST".
|
||||
BASE_I18N_EXPORT string16
|
||||
TimeFormatShortDateAndTimeWithTimeZone(const Time& time);
|
||||
|
||||
// Formats a time in a friendly sentence format, e.g.
|
||||
// "Monday, March 6, 2008 2:44:30 PM".
|
||||
BASE_I18N_EXPORT string16 TimeFormatFriendlyDateAndTime(const Time& time);
|
||||
|
||||
// Formats a time in a friendly sentence format, e.g.
|
||||
// "Monday, March 6, 2008".
|
||||
BASE_I18N_EXPORT string16 TimeFormatFriendlyDate(const Time& time);
|
||||
|
||||
// Formats a time using a skeleton to produce a format for different locales
|
||||
// when an unusual time format is needed, e.g. "Feb. 2, 18:00".
|
||||
//
|
||||
// See http://userguide.icu-project.org/formatparse/datetime for details.
|
||||
BASE_I18N_EXPORT string16 TimeFormatWithPattern(const Time& time,
|
||||
const char* pattern);
|
||||
|
||||
// Formats a time duration of hours and minutes into various formats, e.g.,
|
||||
// "3:07" or "3 hours, 7 minutes", and returns true on success. See
|
||||
// DurationFormatWidth for details.
|
||||
//
|
||||
// Please don't use width = DURATION_WIDTH_NUMERIC when the time duration
|
||||
// can possibly be larger than 24h, as the hour value will be cut below 24
|
||||
// after formatting.
|
||||
// TODO(crbug.com/675791): fix function output when width =
|
||||
// DURATION_WIDTH_NUMERIC.
|
||||
BASE_I18N_EXPORT bool TimeDurationFormat(const TimeDelta time,
|
||||
const DurationFormatWidth width,
|
||||
string16* out) WARN_UNUSED_RESULT;
|
||||
|
||||
// Formats a time duration of hours, minutes and seconds into various formats,
|
||||
// e.g., "3:07:30" or "3 hours, 7 minutes, 30 seconds", and returns true on
|
||||
// success. See DurationFormatWidth for details.
|
||||
//
|
||||
// Please don't use width = DURATION_WIDTH_NUMERIC when the time duration
|
||||
// can possibly be larger than 24h, as the hour value will be cut below 24
|
||||
// after formatting.
|
||||
// TODO(crbug.com/675791): fix function output when width =
|
||||
// DURATION_WIDTH_NUMERIC.
|
||||
BASE_I18N_EXPORT bool TimeDurationFormatWithSeconds(
|
||||
const TimeDelta time,
|
||||
const DurationFormatWidth width,
|
||||
string16* out) WARN_UNUSED_RESULT;
|
||||
|
||||
// Formats a date interval into various formats, e.g. "2 December - 4 December"
|
||||
// or "March 2016 - December 2016". See DateFormat for details.
|
||||
BASE_I18N_EXPORT string16 DateIntervalFormat(const Time& begin_time,
|
||||
const Time& end_time,
|
||||
DateFormat format);
|
||||
|
||||
// Gets the hour clock type of the current locale. e.g.
|
||||
// k12HourClock (en-US).
|
||||
// k24HourClock (en-GB).
|
||||
BASE_I18N_EXPORT HourClockType GetHourClockType();
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_TIME_FORMATTING_H_
|
||||
34
TMessagesProj/jni/voip/webrtc/base/i18n/timezone.cc
Normal file
34
TMessagesProj/jni/voip/webrtc/base/i18n/timezone.cc
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/i18n/timezone.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "third_party/icu/source/common/unicode/unistr.h"
|
||||
#include "third_party/icu/source/i18n/unicode/timezone.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
std::string CountryCodeForCurrentTimezone() {
|
||||
std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault());
|
||||
icu::UnicodeString id;
|
||||
// ICU returns '001' (world) for Etc/GMT. Preserve the old behavior
|
||||
// only for Etc/GMT while returning an empty string for Etc/UTC and
|
||||
// Etc/UCT because they're less likely to be chosen by mistake in UK in
|
||||
// place of Europe/London (Briitish Time).
|
||||
if (zone->getID(id) == UNICODE_STRING_SIMPLE("Etc/GMT"))
|
||||
return "GB";
|
||||
char region_code[4];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int length = zone->getRegion(id, region_code, 4, status);
|
||||
// Return an empty string if region_code is a 3-digit numeric code such
|
||||
// as 001 (World) for Etc/UTC, Etc/UCT.
|
||||
return (U_SUCCESS(status) && length == 2)
|
||||
? std::string(region_code, static_cast<size_t>(length))
|
||||
: std::string();
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
24
TMessagesProj/jni/voip/webrtc/base/i18n/timezone.h
Normal file
24
TMessagesProj/jni/voip/webrtc/base/i18n/timezone.h
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_TIMEZONE_H_
|
||||
#define BASE_I18N_TIMEZONE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/i18n/base_i18n_export.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Checks the system timezone and turns it into a two-character ISO 3166 country
|
||||
// code. This may fail (for example, it used to always fail on Android), in
|
||||
// which case it will return an empty string. It'll also return an empty string
|
||||
// when the timezone is Etc/UTC or Etc/UCT, but will return 'GB" for Etc/GMT
|
||||
// because people in the UK tends to select Etc/GMT by mistake instead of
|
||||
// Europe/London (British Time).
|
||||
BASE_I18N_EXPORT std::string CountryCodeForCurrentTimezone();
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_TIMEZONE_H_
|
||||
32
TMessagesProj/jni/voip/webrtc/base/i18n/unicodestring.h
Normal file
32
TMessagesProj/jni/voip/webrtc/base/i18n/unicodestring.h
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) 2017 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_UNICODESTRING_H_
|
||||
#define BASE_I18N_UNICODESTRING_H_
|
||||
|
||||
#include "base/strings/string16.h"
|
||||
#include "third_party/icu/source/common/unicode/unistr.h"
|
||||
#include "third_party/icu/source/common/unicode/uvernum.h"
|
||||
|
||||
#if U_ICU_VERSION_MAJOR_NUM >= 59
|
||||
#include "third_party/icu/source/common/unicode/char16ptr.h"
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
namespace i18n {
|
||||
|
||||
inline string16 UnicodeStringToString16(const icu::UnicodeString& unistr) {
|
||||
#if U_ICU_VERSION_MAJOR_NUM >= 59
|
||||
return base::string16(icu::toUCharPtr(unistr.getBuffer()),
|
||||
static_cast<size_t>(unistr.length()));
|
||||
#else
|
||||
return base::string16(unistr.getBuffer(),
|
||||
static_cast<size_t>(unistr.length()));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_UNICODESTRING_H_
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2014 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This file is auto-generated by build_utf8_validator_tables.
|
||||
// DO NOT EDIT.
|
||||
|
||||
#include "base/i18n/utf8_validator_tables.h"
|
||||
#include "base/stl_util.h"
|
||||
|
||||
namespace base {
|
||||
namespace internal {
|
||||
|
||||
const uint8_t kUtf8ValidatorTables[] = {
|
||||
// State 0, offset 0x00
|
||||
0x00, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x08
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x10
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x18
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x20
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x28
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x30
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x38
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x40
|
||||
0x81, 0x81, 0x81, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x48
|
||||
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x50
|
||||
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x58
|
||||
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x60
|
||||
0x83, 0x86, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, // 0x68
|
||||
0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8e, 0x8b, // 0x70
|
||||
0x8b, 0x93, 0x9c, 0x9c, 0x9c, 0x9f, 0x81, 0x81, // 0x78
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x80
|
||||
0x81, // 0x81
|
||||
// State 1, offset 0x81
|
||||
0x07, 0x81, // 0x83
|
||||
// State 2, offset 0x83
|
||||
0x06, 0x00, 0x81, // 0x86
|
||||
// State 3, offset 0x86
|
||||
0x05, 0x81, 0x83, 0x81, 0x81, // 0x8b
|
||||
// State 4, offset 0x8b
|
||||
0x06, 0x83, 0x81, // 0x8e
|
||||
// State 5, offset 0x8e
|
||||
0x05, 0x83, 0x81, 0x81, 0x81, // 0x93
|
||||
// State 6, offset 0x93
|
||||
0x04, 0x81, 0x8b, 0x8b, 0x8b, 0x81, 0x81, 0x81, // 0x9b
|
||||
0x81, // 0x9c
|
||||
// State 7, offset 0x9c
|
||||
0x06, 0x8b, 0x81, // 0x9f
|
||||
// State 8, offset 0x9f
|
||||
0x04, 0x8b, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0xa7
|
||||
0x81, // 0xa8
|
||||
};
|
||||
|
||||
const size_t kUtf8ValidatorTablesSize = base::size(kUtf8ValidatorTables);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2014 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_I18N_UTF8_VALIDATOR_TABLES_H_
|
||||
#define BASE_I18N_UTF8_VALIDATOR_TABLES_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/macros.h"
|
||||
|
||||
namespace base {
|
||||
namespace internal {
|
||||
|
||||
// The tables for all states; a list of entries of the form (right_shift,
|
||||
// next_state, next_state, ....). The right_shifts are used to reduce the
|
||||
// overall size of the table. The table only covers bytes in the range
|
||||
// [0x80, 0xFF] to save space.
|
||||
extern const uint8_t kUtf8ValidatorTables[];
|
||||
|
||||
extern const size_t kUtf8ValidatorTablesSize;
|
||||
|
||||
// The offset of the INVALID state in kUtf8ValidatorTables.
|
||||
enum {
|
||||
I18N_UTF8_VALIDATOR_INVALID_INDEX = 129
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_I18N_UTF8_VALIDATOR_TABLES_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue