Testing and Validating Transforms

Using the validator API Endpoint

The validator API endpoint allows you to test your transform with sample data.
It'll interpret both the transform and data and output how Hydrolix will parse and enrich it.

Example:

curl -s \
     -H 'content-type: application/json' \
     -H 'x-hdx-table: sample_project.sample_table' \
     -H 'x-hdx-test: true' \
     https://$host/validator \
     --data-binary '
{
  "transform": {
    "settings": {
      "sql_transform": "SELECT primary, -some_int32 AS `some_int32`, COALESCE(some_other_int32, 789) AS `some_other_int32` FROM {STREAM}",
      "output_columns": [
        {
          "name": "primary",
          "datatype": {
            "type": "datetime",
            "format": "02/01/2006 15:04:05",
            "primary": true
          }
        },
        {
          "name": "some_int32",
          "datatype": {
            "type": "int32"
          }
        },
        {
          "name": "some_other_int32",
          "datatype": {
            "type": "int32"
          }
        }
      ]
    }
  },
  "data": [
    {"primary": "14/06/2022 13:06:47", "some_int32": 99, "some_other_int32": 1234},
    {"primary": "14/06/2022 13:06:48", "some_int32": 99},
    {"primary": "14/06/2022 13:06:49", "some_int32": 123},
    {"primary": "14/06/2022 13:06:50", "some_int32": 456}
  ]
}

Response:

HTTP/1.1 200 OK
Content-Type: application/json; charset=utf-8
Date: Mon, 18 Jul 2022 08:49:37 GMT
Connection: close
Transfer-Encoding: chunked

{
  "code": 200,
  "message": "success",
  "project": {
    "name": "",
    "uuid": ""
  },
  "table": {
    "name": "",
    "uuid": ""
  },
  "transform": {
    "name": "",
    "uuid": ""
  },
  "parsed": [
    [
      {
        "name": "primary",
        "datatype": "datetime",
        "value": "2022-06-14 13:06:47"
      },
      {
        "name": "some_int32",
        "datatype": "int32",
        "value": 99
      },
      {
        "name": "some_other_int32",
        "datatype": "int32",
        "value": 1234
      }
    ],
    [
      {
        "name": "primary",
        "datatype": "datetime",
        "value": "2022-06-14 13:06:48"
      },
      {
        "name": "some_int32",
        "datatype": "int32",
        "value": 99
      },
      {
        "name": "some_other_int32",
        "datatype": "int32",
        "value": null
      }
    ],
    [
      {
        "name": "primary",
        "datatype": "datetime",
        "value": "2022-06-14 13:06:49"
      },
      {
        "name": "some_int32",
        "datatype": "int32",
        "value": 123
      },
      {
        "name": "some_other_int32",
        "datatype": "int32",
        "value": null
      }
    ],
    [
      {
        "name": "primary",
        "datatype": "datetime",
        "value": "2022-06-14 13:06:50"
      },
      {
        "name": "some_int32",
        "datatype": "int32",
        "value": 456
      },
      {
        "name": "some_other_int32",
        "datatype": "int32",
        "value": null
      }
    ]
  ]
    "partition": "",
    "bytes": 0,
    "rows": 0,
    "columns": 0,
    "min_timestamp": 0,
    "max_timestamp": 0,
    "mem_size": 0,
    "shard_key_hash": "",
    "test_response": {
      "data": [
        [
          "2022-06-14 13:06:47",
          -99,
          1234
        ],
        [
          "2022-06-14 13:06:48",
          -99,
          789
        ],
        [
          "2022-06-14 13:06:49",
          -123,
          789
        ],
        [
          "2022-06-14 13:06:50",
          -456,
          789
        ]
      ],
      "meta": [
        {
          "name": "primary",
          "type": "DateTime"
        },
        {
          "name": "some_int32",
          "type": "Nullable(Int32)"
        },
        {
          "name": "some_other_int32",
          "type": "Nullable(Int32)"
        }
      ],
      "rows": 4,
      "statistics": {
        "bytes_read": 56,
        "elapsed": 0.000934846,
        "rows_read": 4
      }
    },
    "indexing_sql": "SELECT `primary`::DateTime as `primary`,`some_int32`::Nullable(Int32) as `some_int32`,`some_other_int32`::Nullable(Int32) as `some_other_int32` FROM (SELECT `primary`::DateTime as `primary`,`some_int32`::Nullable(Int32) as `some_int32`,`some_other_int32`::Nullable(Int32) as `some_other_int32` FROM (SELECT primary, -some_int32 AS `some_int32`, COALESCE(some_other_int32, 789) AS `some_other_int32` FROM file('e81c5d8c-4804-4dd0-812d-e331e3bad1c0.json', 'JSONCompactEachRow', '`primary` DateTime,`some_int32` Nullable(Int32),`some_other_int32` Nullable(Int32)'))) FORMAT JSONCompact SETTINGS hdx_format_work_dir='', hdx_format_sort_keys='', hdx_format_json_schema='[{\"name\":\"primary\",\"datatype\":{\"type\":\"datetime\",\"primary\":true,\"format\":\"%F %T\",\"resolution\":\"s\"}},{\"name\":\"some_int32\",\"datatype\":{\"type\":\"int32\"}},{\"name\":\"some_other_int32\",\"datatype\":{\"type\":\"int32\"}}]', hdx_format_metadata='{\"project\":{\"id\":\"bb8c48f4-7ac2-4de7-99c7-a3cbc195b265\",\"name\":\"sample_project\",\"revision\":5},\"table\":{\"id\":\"c21408ad-7b48-49e8-88c7-bdd8e0f716f1\",\"name\":\"sample_table\",\"revision\":4},\"transforms\":[{\"id\":\"\",\"name\":\"\"}]}', max_ast_elements=100000, max_expanded_ast_elements=100000"
  },
  "intermediate_data": {
    "/var/lib/indexer/e81c5d8c-4804-4dd0-812d-e331e3bad1c0.json": "WyIyMDIyLTA2LTE0IDEzOjA2OjQ3Iiw5OSwxMjM0XQpbIjIwMjItMDYtMTQgMTM6MDY6NDgiLDk5LG51bGxdClsiMjAyMi0wNi0xNCAxMzowNjo0OSIsMTIzLG51bGxdClsiMjAyMi0wNi0xNCAxMzowNjo1MCIsNDU2LG51bGxdCg=="
  }
}

The intermediate_data is the base64 JSON encoded interpretation of the data, and the indexing SQL is what is used as SQL statement to generate Hydrolix partition based the data.
If there's any errors from any component they'll be output in the response, for example if the indexer is not able to parse the timestamp or the format it'll output an error.

Using the catchall Feature

The catchall feature allows you to see your the shape of your data before you've applied a transform to it.

Example leveraging the catchall feature:

POST https://$host/validator
Content-Type: application/json
x-hdx-table: sample_project.sample_table
x-hdx-test: true

{
    "transform":
    {
    "name": "demo",
    "settings": {
        "is_default": true,
        "output_columns": [
            {
                "name": "date",
                "datatype": {
                    "type": "datetime",
                    "format": "2006-01-01",
                    "resolution": "seconds",
                    "primary": true
                }
            },
            {
                "name": "catchall",
                "datatype": {
                    "type": "map",
                    "elements":
                    [
                        {
                            "type": "string"
                        },
                        {
                            "type": "string"
                        }
                    ],
                    "catch_all": true
                }
            }
        ],
        "compression": "none",
        "format_details": {
            "flattening": {
                "active": true,
                "depth": 1,
                "map_flattening_strategy": {
                    "left": "."
                }
            }
        }
    },
    "type": "json"
  },
  "data": [ {"timestamp": "2022-06-13 06:03:05.579 +00:00", "component": "query_executor", "level":"info", "user":"anonymous", "duration": 552, "query": "'select version()'", "message": "comment='' admin_comment=''"},
{"timestamp": "2022-06-13 06:03:06.078 +00:00", "component": "query_executor", "level":"info", "user":"anonymous", "duration": 552, "query": "'select version()'", "message": "comment='' admin_comment=''"},
{"timestamp": "2022-06-13 06:03:06.086 +00:00", "component": "query_executor", "level":"info", "message": "user=anonymous duration_ms=0 comment='' admin_comment='' query='select version()'"}]
}

Here any json key value will be interpreted as a map and put in the catchall column:

"parsed": [
    [
      {
        "name": "date",
        "datatype": "datetime",
        "value": null
      },
      {
        "name": "catchall",
        "datatype": "map",
        "value": {
          "component": "query_executor",
          "duration": "552",
          "level": "info",
          "message": "comment='' admin_comment=''",
          "query": "'select version()'",
          "timestamp": "2022-06-13 06:03:05.579 +00:00",
          "user": "anonymous"
        }
      }
    ]

Did this page help you?