Source code for rockset.document

"""
Introduction
------------
Document object represents a single record or row in the result retrieved
from executing a query.

.. note:: Document objects are generally instantiated by the Cursor object \
and do not need to be instantiated directly by the application while \
retrieving results of a query.

This class adapts Rockset's SQL types to standard Python types for all the
top level fields retrieved by the query.

+----------------------+--------------------+-------------------------------------+
| Rockset Data Type    | Python Data Type   | Comments                            |
+======================+====================+=====================================+
| SQL NULL             | None               |                                     |
+----------------------+--------------------+-------------------------------------+
| JSON NULL            | None               |                                     |
+----------------------+--------------------+-------------------------------------+
| bool                 | bool               |                                     |
+----------------------+--------------------+-------------------------------------+
| int                  | int                |                                     |
+----------------------+--------------------+-------------------------------------+
| float                | float              |                                     |
+----------------------+--------------------+-------------------------------------+
| string               | str                |                                     |
+----------------------+--------------------+-------------------------------------+
| bytes                | str                | - NOTE: will be changed to bytes    |
|                      |                    |   in the future                     |
+----------------------+--------------------+-------------------------------------+
| array                | list               |                                     |
+----------------------+--------------------+-------------------------------------+
| object               | dict               |                                     |
+----------------------+--------------------+-------------------------------------+
| date                 | datetime.date      |                                     |
+----------------------+--------------------+-------------------------------------+
| datetime             | datetime.datetime  | tzinfo will be None                 |
+----------------------+--------------------+-------------------------------------+
| time                 | datetime.time      |                                     |
+----------------------+--------------------+-------------------------------------+
| timestamp            | str                | - ISO8601 format in UTC timezone    |
|                      |                    | - eg: '2019-11-09T23:14:31.561512Z' |
|                      |                    | - NOTE: will be changed to          |
|                      |                    |   datetime.datetime with tzinfo     |
|                      |                    |   in the future                     |
+----------------------+--------------------+-------------------------------------+
| month_interval       | dict               | - INTERVAL 10 MONTH                 |
|                      |                    |   will return {'value': 10}         |
|                      |                    | - INTERVAL '3-4' YEAR TO MONTH      |
|                      |                    |   will return {'value': 40}         |
+----------------------+--------------------+-------------------------------------+
| microsecond_interval | datetime.timedelta |                                     |
+----------------------+--------------------+-------------------------------------+
| geography.Point      | geojson.Point      |                                     |
+----------------------+--------------------+-------------------------------------+
| geography.LineString | geojson.LineString |                                     |
+----------------------+--------------------+-------------------------------------+
| geography.Polygon    | geojson.Polygon    |                                     |
+----------------------+--------------------+-------------------------------------+


.. note:: Please note that this type adaptation is only done for the top \
level fields returned in a query. If a top level field retrieved by the query \
is a map or an array, then fields nested within that map or an array are \
not adapted to the respective Python data types.

"""

import datetime
import geojson

# all Rockset data types
DATATYPE_META = '__rockset_type'
DATATYPE_INT = 'int'
DATATYPE_FLOAT = 'float'
DATATYPE_BOOL = 'bool'
DATATYPE_STRING = 'string'
DATATYPE_BYTES = 'bytes'
DATATYPE_NULL = 'null'
DATATYPE_NULL_TYPE = 'null_type'
DATATYPE_ARRAY = 'array'
DATATYPE_OBJECT = 'object'
DATATYPE_DATE = 'date'
DATATYPE_DATETIME = 'datetime'
DATATYPE_TIME = 'time'
DATATYPE_TIMESTAMP = 'timestamp'
DATATYPE_MONTH_INTERVAL = 'month_interval'
DATATYPE_MICROSECOND_INTERVAL = 'microsecond_interval'
DATATYPE_GEOGRAPHY = 'geography'


def _date_fromisoformat(s):
    dt = datetime.datetime.strptime(s, '%Y-%m-%d')
    return datetime.date(year=dt.year, month=dt.month, day=dt.day)


def _time_fromisoformat(s):
    try:
        dt = datetime.datetime.strptime(s, '%H:%M:%S.%f')
    except ValueError:
        dt = datetime.datetime.strptime(s, '%H:%M:%S')
    return datetime.time(
        hour=dt.hour,
        minute=dt.minute,
        second=dt.second,
        microsecond=dt.microsecond
    )


def _datetime_fromisoformat(s):
    try:
        dt = datetime.datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%f')
    except ValueError:
        dt = datetime.datetime.strptime(s, '%Y-%m-%dT%H:%M:%S')
    return dt


def _timedelta_from_microseconds(us):
    return datetime.timedelta(microseconds=us)


[docs]class Document(dict): """Represents a single record or row in query results. This is a sub-class of dict. So, treat this object as a dict for all practical purposes. Only the constructor is overridden to handle the type adaptations shown in the table above. """ def __init__(self, *args, **kwargs): """This is a sub-class of dict. So, treat this object as a dict for all practical purposes. Only contains the constructor to handle the type adaptations shown in the table above. """ super(Document, self).__init__(*args, **kwargs) for k in self.keys(): if not isinstance(self[k], dict): continue if DATATYPE_META not in self[k]: continue t = self[k][DATATYPE_META].lower() v = self[k]['value'] if t == DATATYPE_DATE: self[k] = _date_fromisoformat(v) elif t == DATATYPE_TIME: self[k] = _time_fromisoformat(v) elif t == DATATYPE_DATETIME: self[k] = _datetime_fromisoformat(v) elif t == DATATYPE_MICROSECOND_INTERVAL: self[k] = _timedelta_from_microseconds(v) elif t == DATATYPE_GEOGRAPHY: self[k] = geojson.GeoJSON.to_instance(v) def _py_type_to_rs_type(self, v): if isinstance(v, bool): # check for bool before int, since bools are ints too return DATATYPE_BOOL elif isinstance(v, int): return DATATYPE_INT elif isinstance(v, float): return DATATYPE_FLOAT elif isinstance(v, str): return DATATYPE_STRING elif isinstance(v, bytes): return DATATYPE_BYTES elif isinstance(v, type(None)): return DATATYPE_NULL elif isinstance(v, list): return DATATYPE_ARRAY elif isinstance(v, datetime.datetime): # check for datetime first, since datetimes are dates too return DATATYPE_DATETIME elif isinstance(v, datetime.date): return DATATYPE_DATE elif isinstance(v, datetime.time): return DATATYPE_TIME elif isinstance(v, datetime.timedelta): return DATATYPE_MICROSECOND_INTERVAL elif isinstance(v, geojson.GeoJSON): return DATATYPE_GEOGRAPHY elif isinstance(v, dict): # keep this in the end if DATATYPE_META not in v: return DATATYPE_OBJECT return v[DATATYPE_META].lower() def fields(self, columns=None): columns = columns or sorted(self) return [ { 'name': c, 'type': self._py_type_to_rs_type(self.get(c, type(None))) } for c in columns ]
__all__ = [ 'Document', ]