gcdb::strip_invalid_text()
最后更新于:2021-11-26 09:05:06
gcdb::strip_invalid_text( array$data)Strips any invalid characters based on value/charset pairs.
参数
- $data
-
(array) (Required) Array of value arrays. Each value array has the keys ‘value’ and ‘charset’. An optional ‘ascii’ key can be set to false to avoid redundant ASCII checks.
响应
(array|GC_Error) The $data parameter, with invalid characters removed from each value. This works as a passthrough: any additional keys such as ‘field’ are retained in each value array. If we cannot remove invalid characters, a GC_Error object is returned.
源文件
文件: gc-includes/gc-db.php
protected function strip_invalid_text( $data ) {
$db_check_string = false;
foreach ( $data as &$value ) {
$charset = $value['charset'];
if ( is_array( $value['length'] ) ) {
$length = $value['length']['length'];
$truncate_by_byte_length = 'byte' === $value['length']['type'];
} else {
$length = false;
// Since we have no length, we'll never truncate. Initialize the variable to false.
// True would take us through an unnecessary (for this case) codepath below.
$truncate_by_byte_length = false;
}
// There's no charset to work with.
if ( false === $charset ) {
continue;
}
// Column isn't a string.
if ( ! is_string( $value['value'] ) ) {
continue;
}
$needs_validation = true;
if (
// latin1 can store any byte sequence.
'latin1' === $charset
||
// ASCII is always OK.
( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) )
) {
$truncate_by_byte_length = true;
$needs_validation = false;
}
if ( $truncate_by_byte_length ) {
mbstring_binary_safe_encoding();
if ( false !== $length && strlen( $value['value'] ) > $length ) {
$value['value'] = substr( $value['value'], 0, $length );
}
reset_mbstring_encoding();
if ( ! $needs_validation ) {
continue;
}
}
// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) {
$regex = '/
(
(?: [x00-x7F] # single-byte sequences 0xxxxxxx
| [xC2-xDF][x80-xBF] # double-byte sequences 110xxxxx 10xxxxxx
| xE0[xA0-xBF][x80-xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [xE1-xEC][x80-xBF]{2}
| xED[x80-x9F][x80-xBF]
| [xEE-xEF][x80-xBF]{2}';
if ( 'utf8mb4' === $charset ) {
$regex .= '
| xF0[x90-xBF][x80-xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [xF1-xF3][x80-xBF]{3}
| xF4[x80-x8F][x80-xBF]{2}
';
}
$regex .= '){1,40} # ...one or more times
)
| . # anything else
/x';
$value['value'] = preg_replace( $regex, '$1', $value['value'] );
if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) {
$value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' );
}
continue;
}
// We couldn't use any local conversions, send it to the DB.
$value['db'] = true;
$db_check_string = true;
}
unset( $value ); // Remove by reference.
if ( $db_check_string ) {
$queries = array();
foreach ( $data as $col => $value ) {
if ( ! empty( $value['db'] ) ) {
// We're going to need to truncate by characters or bytes, depending on the length value we have.
if ( isset( $value['length']['type'] ) && 'byte' === $value['length']['type'] ) {
// Using binary causes LEFT() to truncate by bytes.
$charset = 'binary';
} else {
$charset = $value['charset'];
}
if ( $this->charset ) {
$connection_charset = $this->charset;
} else {
if ( $this->use_mysqli ) {
$connection_charset = mysqli_character_set_name( $this->dbh );
} else {
$connection_charset = mysql_client_encoding();
}
}
if ( is_array( $value['length'] ) ) {
$length = sprintf( '%.0f', $value['length']['length'] );
$queries[ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING $charset ), $length ) USING $connection_charset )", $value['value'] );
} elseif ( 'binary' !== $charset ) {
// If we don't have a length, there's no need to convert binary - it will always return the same result.
$queries[ $col ] = $this->prepare( "CONVERT( CONVERT( %s USING $charset ) USING $connection_charset )", $value['value'] );
}
unset( $data[ $col ]['db'] );
}
}
$sql = array();
foreach ( $queries as $column => $query ) {
if ( ! $query ) {
continue;
}
$sql[] = $query . " AS x_$column";
}
$this->check_current_query = false;
$row = $this->get_row( 'SELECT ' . implode( ', ', $sql ), ARRAY_A );
if ( ! $row ) {
return new GC_Error( 'gcdb_strip_invalid_text_failure' );
}
foreach ( array_keys( $data ) as $column ) {
if ( isset( $row[ "x_$column" ] ) ) {
$data[ $column ]['value'] = $row[ "x_$column" ];
}
}
}
return $data;
}