String Functions

This page covers functions to manipulate strings in Rockset.

String functions can be used in any part of a query where a string value is in scope, for example in the SELECT, WHERE, and HAVING clauses to name a few.

#Basic Functions

#CONCAT

CONCAT(x, ...)

Returns the concatenation of the strings given in the arguments. Supports variable number of arguments.

SELECT
    CONCAT('foo')
'foo'
SELECT
    CONCAT('foo', '')
'foo'
SELECT
    CONCAT('foo', 'bar')
'foobar'
SELECT
    CONCAT('foo', 'bar', 'baz')
'foobarbaz'
SELECT
    CONCAT('foo', 3)
Error: 'Arguments to concat() must be string'

Operator form equivalent to CONCAT.

SELECT
    'foo' || ''
'foo'
SELECT
    'foo' || 'bar'
'foobar'
SELECT
    'foo' || 'bar' || 'baz'
'foobarbaz'
SELECT
    'foo' || 3
Error: 'Arguments to concat() must be string'

#JSON_FORMAT

JSON_FORMAT(x)

Converts given JSON to string.

SELECT
    JSON_FORMAT(ARRAY_CREATE(3, 6, 9))
'[3,6,9]'
SELECT
    JSON_FORMAT('hello')
'"hello"'
SELECT
    JSON_FORMAT(true)
'true'
SELECT
    JSON_FORMAT(
        OBJECT(
            ARRAY_CREATE('name', 'age', 'city'),
            ARRAY_CREATE('John', 30, 'New
  York')
        )
    )
'{ "age": 30, "name": "John", "city": "New York" }'
-- Format `null`.
SELECT
    JSON_FORMAT(null)
'null'
-- Format `undefined`.
SELECT
    JSON_FORMAT(undefined)
'{ "__rockset_type": "undefined" }'

#JSON_PARSE

JSON_PARSE(x)

Parses given string as JSON.

SELECT
    JSON_PARSE('[3, 6, 9]')
[3, 6, 9]
SELECT
    JSON_PARSE('"hello"')
'hello'
SELECT
    JSON_PARSE('true')
true
SELECT
    JSON_PARSE('{ name: ' John ', age: 30, city: ' New York ' }')
{ name: 'John', age: 30, city: 'New York' }
SELECT
    JSON_PARSE('{ name: ' John ', age: 30, city: ' New York ' }').city
'New York'
-- Parse `null`.
SELECT
    TYPEOF(JSON_PARSE('null'))
'null'
-- Parse `undefined`.
SELECT
    TYPEOF(JSON_PARSE('{ __rockset_type: ' undefined ' }'))
'undefined'
SELECT
    JSON_PARSE('hello')
Error: 'json parse error on line 0 near `hello`: expected json value'

#LENGTH

LENGTH(obj)

Returns number of elements in obj.

SELECT
    LENGTH('foo')
3
SELECT
    LENGTH('')
0
SELECT
    LENGTH(null)
null

#LOWER

LOWER(x)

Returns locale-independent lowercase string x.

SELECT
    LOWER('foo')
'foo'
SELECT
    LOWER('FOO')
'foo'
SELECT
    LOWER('123')
'123'
SELECT
    LOWER('')
''
SELECT
    LOWER(null)
null

#LTRIM

LTRIM(string [, characters])

Remove the longest string containing only characters from characters (a space by default) from the start of string

SELECT
    LTRIM(' foo')
'foo'
SELECT
    LTRIM('foo ')
'foo '
SELECT
    LTRIM('\n\t foo')
'foo'
SELECT
    LTRIM('yyyyzabc', 'xyz')
'abc'

#NORMALIZE

NORMALIZE(string[, form])

Returns Unicode-normalized form of string. form is an identifier and must be one of NFC, NFD, NFKC, NFKD, which are the four Unicode normalization methods; NFC is default.

Note that NORMALIZE uses special syntax; form is an identifier, not a string.

-- no quotes around NFC!
SELECT
    NORMALIZE('hello', NFC)
'hello'

#REPLACE

REPLACE(string, search[, replacement])

Returns a string with all instances of search replaced with replacement in string. replacement is optional, which if not specified removes all instance of search from string. When search is an empty string, string is not modified in any way.

SELECT
    REPLACE('foobar', 'bar')
'foo'
SELECT
    REPLACE('foobar', 'bar', '')
'foo'
SELECT
    REPLACE('foobar', 'bar', 'baz')
'foobaz'
SELECT
    REPLACE('barfoobar', 'bar', 'baz')
'bazfoobaz'
SELECT
    REPLACE('foo', 'bar', '')
'foo'
SELECT
    REPLACE('foo', '', 'bar')
'foo'
SELECT
    REPLACE('foo', 'bar', null)
null

#RTRIM

RTRIM(string[, characters])

Remove the longest string containing only characters from characters (a space by default) from the end of string

SELECT
    RTRIM('foo ')
'foo'
SELECT
    RTRIM(' foo')
' foo'
SELECT
    RTRIM('foo \t\n')
'foo'
SELECT
    RTRIM('trimxxxx', 'x')
'trim'

#SPLIT

SPLIT(string, delimiter[, limit])

Splits string on delimiter and returns an array. With limit, only the first limit - 1 delimiters are split upon, thereby returning an array of size at most limit. The last element in the array always contains everything left in the string in the case where there are >= limit occurrences of the delimiter in the string. limit must be a positive number.

SELECT
    SPLIT('foo.bar', '.')
["foo", "bar"]
SELECT
    SPLIT('foo.bar.', '.')
["foo", "bar", ""]
SELECT
    SPLIT('foo.bar', 'foo')
["", ".bar"]
SELECT
    SPLIT('foo.bar', '.', 1)
["foo.bar"]
SELECT
    SPLIT('foo.bar', '.', 2)
["foo", "bar"]
SELECT
    SPLIT('foo.bar', '.', 3)
["foo", "bar"]
SELECT
    SPLIT('foo.bar.baz', '.', 2)
["foo", "bar.baz"]
SELECT
    SPLIT('foo.bar.baz', '.', 3)
["foo", "bar", "baz"]
SELECT
    SPLIT('foo.bar.baz', '.')
["foo", "bar", "baz"]

#STRPOS

STRPOS(string, substring)

Returns the starting position of the first instance of substring in string. Positions start with 1. If not found, 0 is returned.

SELECT
    STRPOS('foo', 'foo')
1
SELECT
    STRPOS('foo', 'bar')
0
SELECT
    STRPOS('foo bar baz', 'bar')
5

-- STRPOS of empty string is always 1

SELECT
    STRPOS('foo', '')
1
SELECT
    STRPOS('foo', null)
null

#SUBSTR

SUBSTR(string, start[, length])

Returns substring of string starting at character at index given by start (1-based index) and of length length. If length is not given, returns the substring starting at start until the end of string. If start is negative, it rolls over to the end of the string and counts backwards from there.

SELECT
    SUBSTR('abcdef', 2, 3)
'bcd'
SELECT
    SUBSTR('abcdef', 2)
'bcdef'
SELECT
    SUBSTR('abcdef', -1)
'f'
SELECT
    SUBSTR('abcdef', -4, 2)
'cd'

#TRIM

TRIM(string)

Returns string with leading and trailing whitespace removed.

SELECT
    TRIM(' foo ')
'foo'
SELECT
    TRIM('\n\t foo \t\n')
'foo'

#UPPER

UPPER(x)

Returns locale-independent uppercase string x.

SELECT
    UPPER('foo')
'FOO'
SELECT
    UPPER('FOO')
'FOO'
SELECT
    UPPER('123')
'123'
SELECT
    UPPER('')
''
SELECT
    UPPER(null)
null

#Encoding Functions

#FROM_HEX

FROM_HEX(s)

Decodes the hex string s into a bytes value.

SELECT
    FROM_HEX('666f6f')
'Zm9v'
SELECT
    FROM_HEX('626172')
'YmFy'

#FROM_UTF8

FROM_UTF8(b)

If b is a bytes value that represents a valid UTF-8 string, return it as a string. Otherwise, raise an error.

SELECT
    FROM_UTF8(bytes 'foo')
'foo'
SELECT
    FROM_UTF8(bytes 'bar')
'bar'

#TO_HEX

TO_HEX(b)

Encodes the bytes value b into a hex string representation.

SELECT
    TO_HEX(bytes 'foo')
'666f6f'
SELECT
    TO_HEX(bytes 'bar')
'626172'

#TO_UTF8

TO_UTF8(s)

Return the bytes UTF-8 representation of the string value s.

SELECT
    TO_UTF8('foo')
'Zm9v'
SELECT
    TO_UTF8('bar')
'YmFy'

#Regular Expression Functions

Rockset uses RE2 syntax for specifying regular expressions. Named capturing groups is not supported, only numbered groups.

#REGEXP_EXTRACT

REGEXP_EXTRACT(string, pattern[, group])

Returns the first match of pattern in string, or null if the pattern does not match. If group is specified and greater than zero, returns the groupth capturing group; if group is not specified or is zero, returns the full match.

SELECT
    REGEXP_EXTRACT('foo bar baz', '[a-z]+')
'foo'
SELECT
    REGEXP_EXTRACT('foo bar baz', '.*bar')
'foo bar'
SELECT
    REGEXP_EXTRACT('foo bar baz', ' [a-z]+ ')
' bar '
SELECT
    REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 1)
'foo'
SELECT
    REGEXP_EXTRACT('foo bar baz', '(.*) bar (.*)', 2)
'baz'

#REGEXP_EXTRACT_ALL

REGEXP_EXTRACT_ALL(string, pattern[, group])

Returns an array containing all matches of pattern in string. If group is specified and greater than zero, extracts the groupth capturing group from each match; if group is not specified or is zero, returns the full matches.

SELECT
    REGEXP_EXTRACT_ALL('foo bar baz', '[a-z]+')
["foo", "bar", "baz"]
SELECT
    REGEXP_EXTRACT_ALL('foo bar baz', 'ba([a-z])', 1)
["r", "z"]
SELECT
    REGEXP_EXTRACT_ALL('foo bar baz', '([a-z])([a-z]+)', 2)
["oo", "ar", "az"]

#REGEXP_LIKE

REGEXP_LIKE(string, pattern)

Returns true if string matches the regular expression pattern, false otherwise.

SELECT
    REGEXP_LIKE('foo bar baz', '.*')
true
SELECT
    REGEXP_LIKE('foo bar baz', '^$')
false
SELECT
    REGEXP_LIKE('foo bar baz', '^foo')
true
SELECT
    REGEXP_LIKE('foo bar baz', '[a-zA-Z0-9 ]+')
true
SELECT
    REGEXP_LIKE('foo bar baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
false
SELECT
    REGEXP_LIKE('foo@bar.baz', '[a-z0-9]@[a-z0-9]+\.[a-z]+')
true

#REGEXP_REPLACE

REGEXP_REPLACE(string, pattern[, replacement])

Returns string with all places where pattern matches replaced with replacement (or erased if replacement is not specified). replacement may use \1 .. \9 escape sequences to refer to capturing groups, or \0 to refer to the entire match.

SELECT
    REGEXP_REPLACE('foo bar baz', ' ', '')
'foobarbaz'
SELECT
    REGEXP_REPLACE('foo bar baz', 'b[a-z]+ ', '')
'foo baz'
SELECT
    REGEXP_REPLACE('foo bar baz', '.* b', 'b')
'baz'
SELECT
    REGEXP_REPLACE(
        'John A. Doe',
        '([A-Z][a-z]+) ([A-Z]\.?) ([A-Z][a-z]+)',
        '\3, \1 \2'
    )
'Doe, John A.'

#REGEXP_SPLIT

REGEXP_SPLIT(string, pattern)

Returns an array with the components of string when split by pattern.

SELECT
    REGEXP_SPLIT('foo bar baz', ' ')
["foo", "bar", "baz"]
SELECT
    REGEXP_SPLIT('foo bar baz', ' +')
["foo", "bar", "baz"]
SELECT
    REGEXP_SPLIT('foo123bar456baz', '[0-9]+')
["foo", "bar", "baz"]

#SPLIT_PART

SPLIT_PART(string, delimiter, index)

Splits string on delimiter and returns the field index. Field indexes start with 1. If the index is larger than than the number of fields, then null is returned.

SELECT
    SPLIT_PART('a:b:c:d', ':', 2)
'b'
SELECT
    SPLIT_PART('a:b:c', ',', 3)
null
SELECT
    SPLIT_PART('a:b:c:d', ':', 10)
null
SELECT
    SPLIT_PART('a::c:d', ':', 2)
''
SELECT
    SPLIT_PART('a:b:c', ',', 1)
'a:b:c'
Join us on Slack!
Building on Rockset? Come chat with us!