|
mbxmlutils
1.3.0
Multi-Body XML Utils
|
#include <simdutf.h>
Public Member Functions | |
| virtual std::string | name () const |
| virtual std::string | description () const |
| bool | supported_by_runtime_system () const |
| virtual simdutf_warn_unused bool | validate_utf8 (const char *buf, size_t len) const noexcept=0 |
| virtual simdutf_warn_unused result | validate_utf8_with_errors (const char *buf, size_t len) const noexcept=0 |
| virtual simdutf_warn_unused bool | validate_utf16le (const char16_t *buf, size_t len) const noexcept=0 |
| virtual simdutf_warn_unused bool | validate_utf16be (const char16_t *buf, size_t len) const noexcept=0 |
| virtual simdutf_warn_unused result | validate_utf16le_with_errors (const char16_t *buf, size_t len) const noexcept=0 |
| virtual simdutf_warn_unused result | validate_utf16be_with_errors (const char16_t *buf, size_t len) const noexcept=0 |
| virtual void | to_well_formed_utf16le (const char16_t *input, size_t len, char16_t *output) const noexcept=0 |
| virtual void | to_well_formed_utf16be (const char16_t *input, size_t len, char16_t *output) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_utf8_to_utf16le (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_utf8_to_utf16be (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
| virtual simdutf_warn_unused result | convert_utf8_to_utf16le_with_errors (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
| virtual simdutf_warn_unused result | convert_utf8_to_utf16be_with_errors (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
| virtual simdutf_warn_unused result | utf8_length_from_utf16le_with_replacement (const char16_t *input, size_t length) const noexcept=0 |
| virtual simdutf_warn_unused result | utf8_length_from_utf16be_with_replacement (const char16_t *input, size_t length) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_valid_utf8_to_utf16le (const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_valid_utf8_to_utf16be (const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
| virtual simdutf_warn_unused size_t | utf16_length_from_utf8 (const char *input, size_t length) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_utf16le_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_utf16be_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
| virtual simdutf_warn_unused result | convert_utf16le_to_utf8_with_errors (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
| virtual simdutf_warn_unused result | convert_utf16be_to_utf8_with_errors (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_valid_utf16le_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
| virtual simdutf_warn_unused size_t | convert_valid_utf16be_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
| virtual simdutf_warn_unused size_t | utf8_length_from_utf16le (const char16_t *input, size_t length) const noexcept=0 |
| virtual simdutf_warn_unused size_t | utf8_length_from_utf16be (const char16_t *input, size_t length) const noexcept=0 |
| virtual void | change_endianness_utf16 (const char16_t *input, size_t length, char16_t *output) const noexcept=0 |
| virtual simdutf_warn_unused size_t | count_utf16le (const char16_t *input, size_t length) const noexcept=0 |
| virtual simdutf_warn_unused size_t | count_utf16be (const char16_t *input, size_t length) const noexcept=0 |
| virtual simdutf_warn_unused size_t | count_utf8 (const char *input, size_t length) const noexcept=0 |
Private Member Functions | |
| virtual uint32_t | required_instruction_sets () const |
| simdutf_really_inline | implementation (const char *name, const char *description, uint32_t required_instruction_sets) |
Private Attributes | |
| const char * | _name |
| const char * | _description |
| const uint32_t | _required_instruction_sets |
An implementation of simdutf for a particular CPU architecture.
Also used to maintain the currently active implementation. The active implementation is automatically initialized on first use to the most advanced implementation supported by the host.
|
inlineprivate |
Construct an implementation with the given name and description. For subclasses.
|
pure virtualnoexcept |
Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or from UTF-16BE to UTF-16LE.
This function does not validate the input.
This function is not BOM-aware.
| input | the UTF-16 string to process |
| length | the length of the string in 2-byte code units (char16_t) |
| output | the pointer to a buffer that can hold the conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into UTF-8 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
| input | the UTF-16BE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
| utf8_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
| input | the UTF-16BE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
| utf8_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into UTF-8 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
| input | the UTF-16LE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
| utf8_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
| input | the UTF-16LE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
| utf8_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16BE string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
| input | the UTF-8 string to convert |
| length | the length of the string in bytes |
| utf16_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
| input | the UTF-8 string to convert |
| length | the length of the string in bytes |
| utf16_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16LE string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
| input | the UTF-8 string to convert |
| length | the length of the string in bytes |
| utf16_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
| input | the UTF-8 string to convert |
| length | the length of the string in bytes |
| utf16_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert valid UTF-16BE string into UTF-8 string.
This function assumes that the input string is valid UTF-16BE.
This function is not BOM-aware.
| input | the UTF-16BE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
| utf8_buffer | the pointer to a buffer that can hold the conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert valid UTF-16LE string into UTF-8 string.
This function assumes that the input string is valid UTF-16LE.
This function is not BOM-aware.
| input | the UTF-16LE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
| utf8_buffer | the pointer to a buffer that can hold the conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert valid UTF-8 string into UTF-16BE string.
This function assumes that the input string is valid UTF-8.
| input | the UTF-8 string to convert |
| length | the length of the string in bytes |
| utf16_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Convert valid UTF-8 string into UTF-16LE string.
This function assumes that the input string is valid UTF-8.
| input | the UTF-8 string to convert |
| length | the length of the string in bytes |
| utf16_buffer | the pointer to buffer that can hold conversion result |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Count the number of code points (characters) in the string assuming that it is valid.
This function assumes that the input string is valid UTF-16BE. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
| input | the UTF-16BE string to process |
| length | the length of the string in 2-byte code units (char16_t) |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Count the number of code points (characters) in the string assuming that it is valid.
This function assumes that the input string is valid UTF-16LE. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
| input | the UTF-16LE string to process |
| length | the length of the string in 2-byte code units (char16_t) |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Count the number of code points (characters) in the string assuming that it is valid.
This function assumes that the input string is valid UTF-8. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.
| input | the UTF-8 string to process |
| length | the length of the string in bytes |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
inlinevirtual |
The description of this implementation.
const implementation *impl = simdutf::active_implementation;
cout << "simdutf is optimized for " << impl->name() << "(" <<
impl->description() << ")" << endl;
Reimplemented in simdutf::internal::detect_best_supported_implementation_on_first_use.
|
inlinevirtual |
The name of this implementation.
const implementation *impl = simdutf::active_implementation;
cout << "simdutf is optimized for " << impl->name() << "(" <<
impl->description() << ")" << endl;
Reimplemented in simdutf::internal::detect_best_supported_implementation_on_first_use.
|
inlineprivatevirtual |
For internal implementation use
The instruction sets this implementation is compiled against.
internal::instruction_set:: values Reimplemented in simdutf::internal::detect_best_supported_implementation_on_first_use.
| bool simdutf::implementation::supported_by_runtime_system | ( | ) | const |
The instruction sets this implementation is compiled against and the current CPU match. This function may poll the current CPU/system and should therefore not be called too often if performance is a concern.
|
pure virtualnoexcept |
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement character U+FFFD. We allow the input and output to be the same buffer so that the correction is done in-place.
Overridden by each implementation.
| input | the UTF-16BE string to correct. |
| len | the length of the string in number of 2-byte code units (char16_t). |
| output | the output buffer. |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement character U+FFFD. We allow the input and output to be the same buffer so that the correction is done in-place.
Overridden by each implementation.
| input | the UTF-16LE string to correct. |
| len | the length of the string in number of 2-byte code units (char16_t). |
| output | the output buffer. |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.
| input | the UTF-8 string to process |
| length | the length of the string in bytes |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
| input | the UTF-16BE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format even when the UTF-16BE content contains mismatched surrogates that have to be replaced by the replacement character (0xFFFD).
| input | the UTF-16BE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
| input | the UTF-16LE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format even when the UTF-16LE content contains mismatched surrogates that have to be replaced by the replacement character (0xFFFD).
| input | the UTF-16LE string to convert |
| length | the length of the string in 2-byte code units (char16_t) |
Implemented in simdutf::internal::unsupported_implementation, simdutf::internal::detect_best_supported_implementation_on_first_use, and simdutf::fallback::implementation.
|
pure virtualnoexcept |
Validate the UTF-16BE string. This function may be best when you expect the input to be almost always valid. Otherwise, consider using validate_utf16be_with_errors.
Overridden by each implementation.
This function is not BOM-aware.
| buf | the UTF-16BE string to validate. |
| len | the length of the string in number of 2-byte code units (char16_t). |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Validate the UTF-16BE string and stop on error. It might be faster than validate_utf16be when an error is expected to occur early.
Overridden by each implementation.
This function is not BOM-aware.
| buf | the UTF-16BE string to validate. |
| len | the length of the string in number of 2-byte code units (char16_t). |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always valid. Otherwise, consider using validate_utf16le_with_errors.
Overridden by each implementation.
This function is not BOM-aware.
| buf | the UTF-16LE string to validate. |
| len | the length of the string in number of 2-byte code units (char16_t). |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Validate the UTF-16LE string and stop on error. It might be faster than validate_utf16le when an error is expected to occur early.
Overridden by each implementation.
This function is not BOM-aware.
| buf | the UTF-16LE string to validate. |
| len | the length of the string in number of 2-byte code units (char16_t). |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Validate the UTF-8 string.
Overridden by each implementation.
| buf | the UTF-8 string to validate. |
| len | the length of the string in bytes. |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
pure virtualnoexcept |
Validate the UTF-8 string and stop on errors.
Overridden by each implementation.
| buf | the UTF-8 string to validate. |
| len | the length of the string in bytes. |
Implemented in simdutf::internal::unsupported_implementation, simdutf::fallback::implementation, and simdutf::internal::detect_best_supported_implementation_on_first_use.
|
private |
The description of this implementation.
|
private |
The name of this implementation.
|
private |
Instruction sets required for this implementation.