SQL Reference > String Functions

String Functions

This page covers functions to manipulate strings in Rockset.

String functions can be used in any part of a query where a string value is in scope, for example in the SELECT, WHERE, and HAVING clauses to name a few.

Basic Functions

CONCAT

CONCAT(x, ...)

Returns the concatenation of the strings given in the arguments. Supports variable number of arguments.

x || ...

Operator form equivalent to CONCAT.

LENGTH

LENGTH(x)

Returns length of string x.

SELECT
    LENGTH('foo')
3
SELECT
    LENGTH('')
0
SELECT
    LENGTH(null)
null

LOWER

LOWER(x)

Returns locale-independent lowercase string x.

SELECT
    LOWER('foo')
'foo'
SELECT
    LOWER('FOO')
'foo'
SELECT
    LOWER('123')
'123'
SELECT
    LOWER('')
''
SELECT
    LOWER(null)
null

LTRIM

LTRIM(string)

Returns string with leading whitespace removed.

SELECT
    LTRIM('    foo')
'foo'
SELECT
    LTRIM('foo    ')
'foo    '
SELECT
    LTRIM('\n\t foo')
'foo'

NORMALIZE

NORMALIZE(string[, form])

Returns Unicode-normalized form of string. form is an identifier and must be one of NFC, NFD, NFKC, NFKD, which are the four Unicode normalization methods; NFC is default.

Note that NORMALIZE uses special syntax; form is an identifier, not a string.

-- no quotes around NFC!
SELECT
    NORMALIZE('hello', NFC)
'hello'

REPLACE

REPLACE(string, search[, replacement])

Returns a string with all instances of search replaced with replacement in string. replacement is optional, which if not specified removes all instance of search from string. When search is an empty string, string is not modified in any way.

SELECT
    REPLACE('foobar', 'bar')
'foo'
SELECT
    REPLACE('foobar', 'bar', '')
'foo'
SELECT
    REPLACE('foobar', 'bar', 'baz')
'foobaz'
SELECT
    REPLACE('barfoobar', 'bar', 'baz')
'bazfoobaz'
SELECT
    REPLACE('foo', 'bar', '')
'foo'
SELECT
    REPLACE('foo', '', 'bar')
'foo'
SELECT
    REPLACE('foo', 'bar', null)
null

RTRIM

RTRIM(string)

Returns string with trailing whitespace removed.

SELECT
    RTRIM('foo    ')
'foo'
SELECT
    RTRIM('    foo')
'    foo'
SELECT
    RTRIM('foo \t\n')
'foo'

SPLIT

SPLIT(string, delimiter[, limit])

Splits string on delimiter and returns an array. With limit, only the first limit - 1 delimiters are split upon, thereby returning an array of size at most limit. The last element in the array always contains everything left in the string in the case where there are >= limit occurrences of the delimiter in the string. limit must be a positive number.

SELECT
    SPLIT('foo.bar', '.')
['foo', 'bar']
SELECT
    SPLIT('foo.bar.', '.')
['foo', 'bar', '']
SELECT
    SPLIT('foo.bar', 'foo')
['', '.bar']
SELECT
    SPLIT('foo.bar', '.', 1)
['foo.bar']
SELECT
    SPLIT('foo.bar', '.', 2)
['foo', 'bar']
SELECT
    SPLIT('foo.bar', '.', 3)
['foo', 'bar']
SELECT
    SPLIT('foo.bar.baz', '.', 2)
['foo', 'bar.baz']
SELECT
    SPLIT('foo.bar.baz', '.', 3)
['foo', 'bar', 'baz']
SELECT
    SPLIT('foo.bar.baz', '.')
['foo', 'bar', 'baz']

STRPOS

STRPOS(string, substring)

Returns the starting position of the first instance of substring in string. Positions start with 1. If not found, 0 is returned.

SELECT
    STRPOS('foo', 'foo')
1
SELECT
    STRPOS('foo', 'bar')
0
SELECT
    STRPOS('foo bar baz', 'bar')
5
SELECT
    STRPOS('foo', '')
1
SELECT
    STRPOS('foo', null)
null

SUBSTR

SUBSTR(string, start[, length])

Returns substring of string starting at character at index given by start (1-based index) and of length length. If length is not given, returns the substring starting at start until the end of string. If start is negative, it rolls over to the end of the string and counts backwards from there.

SELECT
    SUBSTR('abcdef', 2, 3)
'bcd'
SELECT
    SUBSTR('abcdef', 2)
'bcdef'
SELECT
    SUBSTR('abcdef', -1)
'f'
SELECT
    SUBSTR('abcdef', -4, 2)
'cd'

TRIM

TRIM(string)

Returns string with leading and trailing whitespace removed.

SELECT
    TRIM('    foo    ')
'foo'
SELECT
    TRIM('\n\t foo \t\n')
'foo'

UPPER

UPPER(x)

Returns locale-independent uppercase string x.

SELECT
    UPPER('foo')
'FOO'
SELECT
    LOWER('FOO')
'FOO'
SELECT
    LOWER('123')
'123'
SELECT
    UPPER('')
''
SELECT
    UPPER(null)
null

Encoding Functions

FROM_HEX

FROM_HEX(s)

Decodes the hex string s into a bytes value.

SELECT
    FROM_HEX('666f6f')
'Zm9v'
SELECT
    FROM_HEX('626172')
'YmFy'

FROM_UTF8

FROM_UTF8(b)

If b is a bytes value that represents a valid UTF-8 string, return it as a string. Otherwise, raise an error.

SELECT
    FROM_UTF8(bytes 'foo')
'foo'
SELECT
    FROM_UTF8(bytes 'bar')
'bar'

TO_HEX

TO_HEX(b)

Encodes the bytes value b into a hex string representation.

SELECT
    TO_HEX(bytes 'foo')
'666f6f'
SELECT
    TO_HEX(bytes 'bar')
'626172'

TO_UTF8

TO_UTF8(s)

Return the bytes UTF-8 representation of the string value s.

SELECT
    TO_UTF8('foo')
'Zm9v'
SELECT
    TO_UTF8('bar')
'YmFy'

Regular Expression Functions

Rockset uses RE2 syntax for specifying regular expressions. Named capturing groups is not supported, only numbered groups.

REGEXP_EXTRACT

REGEXP_EXTRACT(string, pattern[, group])

Returns the first match of pattern in string, or null if the pattern does not match. If group is specified and greater than zero, returns the groupth capturing group; if group is not specified or is zero, returns the full match.

SELECT
    REGEXP_EXTRACT('foo bar baz', '[a-z]+')
'foo bar baz'
SELECT
    REGEXP_EXTRACT('foo bar baz', '.*bar')
'foo bar'
SELECT
    REGEXP_EXTRACT('foo bar baz', ' [a-z]+ ')
' bar '
SELECT
    REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 1)
'foo'
SELECT
    REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 2)
'baz'

REGEXP_EXTRACT_ALL

REGEXP_EXTRACT_ALL(string, pattern[, group])

Returns an array containing all matches of pattern in string. If group is specified and greater than zero, extracts the groupth capturing group from each match; if group is not specified or is zero, returns the full matches.

SELECT
    REGEXP_EXTRACT_ALL('foo bar baz', '[a-z]+')
['foo', 'bar', 'baz']
SELECT
    REGEXP_EXTRACT_ALL('foo bar baz', 'ba([a-z])', 1)
['r', 'z']
SELECT
    REGEXP_EXTRACT_ALL('foo bar baz', '([a-z])([a-z]+)', 2)
['oo', 'ar', 'az']

REGEXP_LIKE

REGEXP_LIKE(string, pattern)

Returns true if string matches the regular expression pattern, false otherwise.

SELECT
    REGEXP_LIKE('foo bar baz', '.*')
True
SELECT
    REGEXP_LIKE('foo bar baz', '^$')
False
SELECT
    REGEXP_LIKE('foo bar baz', '^foo')
True
SELECT
    REGEXP_LIKE('foo bar baz', '[a-zA-Z0-9 ]+')
True
SELECT
    REGEXP_LIKE('foo bar baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
False
SELECT
    REGEXP_LIKE('foo@bar.baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
True

REGEXP_REPLACE

REGEXP_REPLACE(string, pattern[, replacement])

Returns string with all places where pattern matches replaced with replacement (or erased if replacement is not specified). replacement may use \1\9 escape sequences to refer to capturing groups, or \0 to refer to the entire match.

SELECT
    REGEXP_REPLACE('foo bar baz', ' ', '')
'foobarbaz'
SELECT
    REGEXP_REPLACE('foo bar baz', 'b[a-z]+ ', '')
'foo baz'
SELECT
    REGEXP_REPLACE('foo bar baz', '.* b', 'b')
'baz'
SELECT
    REGEXP_REPLACE(
        'John A. Doe',
        '([A-Z][a-z]+) ([A-Z]\.?) ([A-Z][a-z]+)',
        '\3, \1 \2'
    )
'Doe, John A.'

REGEXP_SPLIT

REGEXP_SPLIT(string, pattern)

Returns an array with the components of string when split by pattern.

SELECT
    REGEXP_SPLIT('foo bar baz', ' ')
['foo', 'bar', 'baz']
SELECT
    REGEXP_SPLIT('foo  bar       baz', ' +')
['foo', 'bar', 'baz']
SELECT
    REGEXP_SPLIT('foo123bar456baz', '[0-9]+')
['foo', 'bar', 'baz']