Basic Functions

CONCAT

CONCAT(x, ...) Returns the concatenation of the strings given in the arguments. Supports variable number of arguments. null arguments are ignored.

SELECT CONCAT('foo')
'foo'
 SELECT CONCAT('foo', '')
'foo'
 SELECT CONCAT('foo', 'bar')
'foobar'
 SELECT CONCAT('foo', 'bar', 'baz')
'foobarbaz'
 SELECT CONCAT('foo', null, 'bar')
'foobar'

Operator form equivalent to CONCAT, except that an argument of null will result in null.


 SELECT 'foo' || ''
'foo'

 SELECT 'foo' || 'bar'
'foobar'

 SELECT 'foo' || 'bar' || 'baz'
'foobarbaz'

 SELECT 'foo' || null || 'bar'
null

EDIT_DISTANCE

EDIT_DISTANCE(s1, s2[ALGORITHM algorithm]) can be used to calculate the edit distance between two strings s1 and s2 according to the algorithm specified (which defaults to 'damerau-levenshtein' if not specified).

Valid algorithms are 'levenshtein' and 'damerau-levenshtein' (default).

SELECT EDIT_DISTANCE('rockset', 'rocket')
1
SELECT EDIT_DISTANCE('rockset', 'rockest')
1
SELECT EDIT_DISTANCE('rockset', 'rockest' ALGORITHM 'levenshtein')
2
SELECT EDIT_DISTANCE('rockset', 'rocky')
3

Rockset will automatically optimize queries (like the below), that are predicated on the edit distance being less than a boundary.

SELECT s1, s2 FROM c WHERE EDIT_DISTANCE(s1, s2) <= 2

FORMAT

FORMAT(format_str, ...) Formats the arguments according to the format string. Will work with integers, floating point numbers, and strings. Follows the format string syntax of fmt: https://fmt.dev/latest/syntax.html

SELECT FORMAT('{} + {} + {}', 'one', 'two', 'three')
'one + two + three'
SELECT FORMAT('{:.2f}', 3.14159)
'3.14'
SELECT FORMAT('hex: {:#x}', 97)
'hex: 0x61'

JSON_FORMAT

JSON_FORMAT(x) Converts given JSON to string.

 SELECT JSON_FORMAT(ARRAY_CREATE(3, 6, 9))
'[3,6,9]'
 SELECT JSON_FORMAT('hello')
'"hello"'

 SELECT JSON_FORMAT(true)
'true'
  SELECT JSON_FORMAT(OBJECT(
    ARRAY_CREATE('name', 'age', 'city'),
    ARRAY_CREATE('John', 30, 'New York')))
'{"name":"John","city":"New York","age":30}'

 -- Format `null`.
 SELECT JSON_FORMAT(null)
'null'

 -- Format `undefined`.
 SELECT JSON_FORMAT(undefined)
'{"__rockset_type":"undefined"}'

JSON_PARSE

JSON_PARSE(x) Parses given string as JSON.


 SELECT JSON_PARSE('[3, 6, 9]')
[3, 6, 9]

 SELECT JSON_PARSE('"hello"')
'hello'

 SELECT JSON_PARSE('true')
true

 SELECT JSON_PARSE('{ "name": "John", "age": 30, "city": "New York" }')
{"name": "John", "age": 30, "city": "New York"}

 SELECT JSON_PARSE('{ "name": "John", "age": 30, "city": "New York" }').city
'New York'

 -- Parse `null`.
SELECT TYPEOF(JSON_PARSE('null'))
'null'

  -- Parse `undefined`.
 SELECT TYPEOF(JSON_PARSE('{ "__rockset_type": "undefined" }'))
'undefined'
 SELECT JSON_PARSE('hello')
Error: 'json parse error on line 0 near `hello`: expected json value'
SELECT JSON_PARSE('{ "name": "John", "age": 30, "city": "New York" }') AS *
+-----+------------+--------+ | age | city | name | +-----+------------+--------+ | 30 | "New York" | "John" | +-----+------------+--------+

PG_ARRAY_PARSE

PG_ARRAY_PARSE(x, typestr) Parses a string encoded using Postgres array output syntax. typestr should be a string holding the Postgres array type, such as 'integer[]' or 'text[]'. Int, float, and string arrays of any depth can be decoded.


 SELECT PG_ARRAY_PARSE('{foo,bar,NULL,null}', 'text[]')
["foo", "bar", null, null]

 SELECT PG_ARRAY_PARSE('{"foo \\","\"bar\"","NULL"}', 'char[]')
["foo \\", "\"bar\"", "NULL"]

 SELECT PG_ARRAY_PARSE('{{1,2},{3,4}}', 'integer[][]')
[[1, 2], [3, 4]]

 SELECT PG_ARRAY_PARSE('{{{Infinity}},{{-Infinity}}}', 'float[][]')
[[[Infinity]], [[-Infinity]]]

Supported types for the second argument of PG_ARRAY_PARSE:

  • bigint[]
  • char[]
  • double precision[]
  • double[]
  • float[]
  • int64[]
  • int[]
  • integer[]
  • smallint[]
  • string[]
  • text[]
  • varchar[]

DEC128_FORMAT

DEC128_FORMAT(low, high) Converts from the IEEE 754 decimal128 floating point format to a string using scientific notation. low and high should be integers holding the bottom and top 64 bits of the binary representation, respectively.

 SELECT DEC128_FORMAT(10, 3476215962376601600)
"+10E-1"
 SELECT DEC128_FORMAT(5000, 3475653012423180288)
"+5000E-2"
 /* Casting to float may lose precision, but allows arithmetic */
 SELECT CAST(DEC128_FORMAT(5000, 3475653012423180288) AS float)
50
 SELECT DEC128_FORMAT(0, 8646911284551352320)
"+Inf"
 SELECT DEC128_FORMAT(0, 8935141660703064064)
"+NaN"

LENGTH

LENGTH(obj) Returns number of elements in obj.

 SELECT LENGTH('foo')
3
 SELECT LENGTH('')
0
 SELECT LENGTH(null)
null

LOWER

LOWER(x) Returns locale-independent lowercase string x.

 SELECT LOWER('foo')
'foo'
 SELECT LOWER('FOO')
'foo'
 SELECT LOWER('123')
'123'
 SELECT LOWER('')
''
 SELECT LOWER(null)
null

LPAD

LPAD(string, targetLength, padString) Pads string from the left to targetLength using padString. If targetLength is less than the length of string, the result will be truncated to the targetLength.

 SELECT LPAD('foo', 6, '!$')
'!$!foo'
 SELECT LPAD('foo', 5, '!')
'!!foo'
 SELECT LPAD('foobar', 3, '!')
'foo'

LTRIM

LTRIM(string [, characters]) Remove the longest string containing only characters from characters (a space by default) from the start of string

 SELECT LTRIM(' foo')
'foo'
 SELECT LTRIM('foo ')
'foo '
 SELECT LTRIM('
    foo')
'foo'
 SELECT LTRIM('yyyyzabc', 'xyz')
'abc'

NORMALIZE

NORMALIZE(string[, form]) Returns Unicode-normalized form of string. form is an identifier and must be one of NFC, NFD, NFKC, NFKD, which are the four Unicode normalization methods; NFC is default.

Note that NORMALIZE uses special syntax; form is an identifier, not a string.

 -- no quotes around NFC!
SELECT NORMALIZE('hello', NFC)
'hello'

REPLACE

REPLACE(string, search[, replacement]) Returns a string with all instances of search replaced with replacement in string. replacement is optional, which if not specified removes all instance of search from string. When search is an empty string, string is not modified in any way.

 SELECT REPLACE('foobar', 'bar')
'foo'
 SELECT REPLACE('foobar', 'bar', '')
'foo'
 SELECT REPLACE('foobar', 'bar', 'baz')
'foobaz'
 SELECT REPLACE('barfoobar', 'bar', 'baz')
'bazfoobaz'
 SELECT REPLACE('foo', 'bar', '')
'foo'
 SELECT REPLACE('foo', '', 'bar')
'foo'
 SELECT REPLACE('foo', 'bar', null)
null

RPAD

RPAD(string, targetLength, padString) Pads string from the right to targetLength using padString. If targetLength is less than the length of string, the result will be truncated to the targetLength.

 SELECT RPAD('foo', 6, '!$')
'foo!$!'
 SELECT RPAD('foo', 5, '!')
'foo!!'
 SELECT RPAD('foobar', 3, '!')
'foo'

RTRIM

RTRIM(string[, characters]) Remove the longest string containing only characters from characters (a space by default) from the end of string

 SELECT RTRIM('foo ')
'foo'
 SELECT RTRIM(' foo')
' foo'
 SELECT RTRIM('  foo  ')
' foo'
 SELECT RTRIM('trimxxxx', 'x')
'trim'

SPLIT

SPLIT(string, delimiter[, limit]) Splits string on delimiter and returns an array. With limit, only the first limit - 1 delimiters are split upon, thereby returning an array of size at most limit. The last element in the array always contains everything left in the string in the case where there are >= limit occurrences of the delimiter in the string. limit must be a positive number.

 SELECT SPLIT('foo.bar', '.')
["foo", "bar"]
 SELECT SPLIT('foo.bar.', '.')
["foo", "bar", ""]
 SELECT SPLIT('foo.bar', 'foo')
["", ".bar"]
 SELECT SPLIT('foo.bar', '.', 1)
["foo.bar"]
 SELECT SPLIT('foo.bar', '.', 2)
["foo", "bar"]
 SELECT SPLIT('foo.bar', '.', 3)
["foo", "bar"]
 SELECT SPLIT('foo.bar.baz', '.', 2)
["foo", "bar.baz"]
 SELECT SPLIT('foo.bar.baz', '.', 3)
["foo", "bar", "baz"]
 SELECT SPLIT('foo.bar.baz', '.')
["foo", "bar", "baz"]

STRPOS

STRPOS(string, substring) Returns the starting position of the first instance of substring in string. Positions start with 1. If not found, 0 is returned.

 SELECT STRPOS('foo', 'foo')
1
 SELECT STRPOS('foo', 'bar')
0
 SELECT STRPOS('foo bar baz', 'bar')
5

-- STRPOS of empty string is always 1

 SELECT STRPOS('foo', '')
1
 SELECT STRPOS('foo', null)
null

SUBSTR

SUBSTR(string, start[, length]) Returns substring of string starting at character at index given by start (1-based index) and of length length. If length is not given, returns the substring starting at start until the end of string. If start is negative, it rolls over to the end of the string and counts backwards from there.

 SELECT SUBSTR('abcdef', 2, 3)
'bcd'
 SELECT SUBSTR('abcdef', 2)
'bcdef'
 SELECT SUBSTR('abcdef', -1)
'f'
 SELECT SUBSTR('abcdef', -4, 2)
'cd'

SUFFIXES

SUFFIXES(string, truncate_length) Returns an array of all suffixes of string where each returned suffix is truncated to truncate_length. If not specified, the default truncate_length used is 100. The max length allowed for the input string is 100,000.

 SELECT SUFFIXES('hello')
["hello", "ello", "llo", "lo", "o"]
 SELECT SUFFIXES('hello', 2)
["he", "el", "ll", "lo", "o"]
 SELECT SUFFIXES('hello', 3)
["hel", "ell", "llo", "lo", "o"]
 SELECT SUFFIXES(null)
null

PREFIXES

PREFIXES(string, max_length) Returns an array of all prefixes of string whose length is less than or equal to max_length. If not specified, the default max_length used is 100. The max length allowed for the input string is 100,000.

 SELECT PREFIXES('hello')
["h", "he", "hel", "hell", "hello"]
 SELECT PREFIXES('hello', 2)
["h", "he"]
 SELECT PREFIXES(null, 1)
null

NGRAMS

NGRAMS(string, n_length) Returns an array of all ngrams of string of length n_length. The max length allowed for the input string is 100,000.

 SELECT NGRAMS('hello', 2)
["he", "el", "ll", "lo"]
 SELECT NGRAMS('hello', 1)
["h", "e", "l", "l", "o"]
 SELECT NGRAMS(null, 1)
null

NGRAMS(string, min_length, max_length) Returns an array of all ngrams of string that are between min_length and max_length in size. The max length allowed for the input string is 100,000.

 SELECT NGRAMS('hello', 1, 3)
["h", "he", "hel", "e", "el", "ell", "l", "ll", "llo", "l", "lo", "o"]
 SELECT NGRAMS('hello', 1, 1)
["h", "e", "l", "l", "o"]
 SELECT NGRAMS(null, 1, 1)
null

🚧

Using NGRAMS in search queries with CONTAINS

If you are using NGRAMS in search queries with CONTAINS, please note from the CONTAINS documentation that all strings are lower-cased when using the CONTAINS function. This means that for accurate search results, you must apply the LOWER function before the NGRAMS function to get an accurate search result.

SELECT NGRAMS(LOWER('Hello'), 2)

TOKENIZE

TOKENIZE(text[, locale]) Tokenize text into array of strings, normalized and lower-cased. text is interpreted as text in the language specified by locale. locale is specified using the ICU format. The default is en_US_POSIX. This function is usually used in an ingest transformation to convert a text field into array of strings at the time of document ingestion. Once an input text field is converted to array of strings, you can use SEARCH function in a SQL query to perform text search using this array of strings.

SQL commandResult
SELECT TOKENIZE('Hello world')["hello","world"]
SELECT TOKENIZE('Hi, how are you?')["hi","how","are","you"]

TRIM

TRIM(string [, characters]) Remove the longest string containing only characters from characters (a space by default) from the start of string

 SELECT TRIM(' foo ')
'foo'
 SELECT TRIM('
    foo
 ')
'foo'

UPPER

UPPER(x) Returns locale-independent uppercase string x.

 SELECT UPPER('foo')
'FOO'
 SELECT UPPER('FOO')
'FOO'
 SELECT UPPER('123')
'123'
 SELECT UPPER('')
''
 SELECT UPPER(null)
null