Test and Validate Transforms
Using the validator
API Endpoint
The validator
API endpoint allows you to test your transform with sample data. It will interpret both the transform and data and output how Hydrolix will parse and enrich it.
Example:
curl -s \
-H 'content-type: application/json' \
-H 'x-hdx-table: sample_project.sample_table' \
-H 'x-hdx-test: true' \
https://$host/validator \
--data-binary '
{
"transform": {
"settings": {
"sql_transform": "SELECT primary, -some_int32 AS `some_int32`, COALESCE(some_other_int32, 789) AS `some_other_int32` FROM {STREAM}",
"output_columns": [
{
"name": "primary",
"datatype": {
"type": "datetime",
"format": "02/01/2006 15:04:05",
"primary": true
}
},
{
"name": "some_int32",
"datatype": {
"type": "int32"
}
},
{
"name": "some_other_int32",
"datatype": {
"type": "int32"
}
}
]
}
},
"data": [
{"primary": "14/06/2022 13:06:47", "some_int32": 99, "some_other_int32": 1234},
{"primary": "14/06/2022 13:06:48", "some_int32": 99},
{"primary": "14/06/2022 13:06:49", "some_int32": 123},
{"primary": "14/06/2022 13:06:50", "some_int32": 456}
]
}
Response:
HTTP/1.1 200 OK
Content-Type: application/json; charset=utf-8
Date: Mon, 18 Jul 2022 08:49:37 GMT
Connection: close
Transfer-Encoding: chunked
{
"code": 200,
"message": "success",
"project": {
"name": "",
"uuid": ""
},
"table": {
"name": "",
"uuid": ""
},
"transform": {
"name": "",
"uuid": ""
},
"parsed": [
[
{
"name": "primary",
"datatype": "datetime",
"value": "2022-06-14 13:06:47"
},
{
"name": "some_int32",
"datatype": "int32",
"value": 99
},
{
"name": "some_other_int32",
"datatype": "int32",
"value": 1234
}
],
[
{
"name": "primary",
"datatype": "datetime",
"value": "2022-06-14 13:06:48"
},
{
"name": "some_int32",
"datatype": "int32",
"value": 99
},
{
"name": "some_other_int32",
"datatype": "int32",
"value": null
}
],
[
{
"name": "primary",
"datatype": "datetime",
"value": "2022-06-14 13:06:49"
},
{
"name": "some_int32",
"datatype": "int32",
"value": 123
},
{
"name": "some_other_int32",
"datatype": "int32",
"value": null
}
],
[
{
"name": "primary",
"datatype": "datetime",
"value": "2022-06-14 13:06:50"
},
{
"name": "some_int32",
"datatype": "int32",
"value": 456
},
{
"name": "some_other_int32",
"datatype": "int32",
"value": null
}
]
]
"partition": "",
"bytes": 0,
"rows": 0,
"columns": 0,
"min_timestamp": 0,
"max_timestamp": 0,
"mem_size": 0,
"shard_key_hash": "",
"test_response": {
"data": [
[
"2022-06-14 13:06:47",
-99,
1234
],
[
"2022-06-14 13:06:48",
-99,
789
],
[
"2022-06-14 13:06:49",
-123,
789
],
[
"2022-06-14 13:06:50",
-456,
789
]
],
"meta": [
{
"name": "primary",
"type": "DateTime"
},
{
"name": "some_int32",
"type": "Nullable(Int32)"
},
{
"name": "some_other_int32",
"type": "Nullable(Int32)"
}
],
"rows": 4,
"statistics": {
"bytes_read": 56,
"elapsed": 0.000934846,
"rows_read": 4
}
},
"indexing_sql": "SELECT `primary`::DateTime as `primary`,`some_int32`::Nullable(Int32) as `some_int32`,`some_other_int32`::Nullable(Int32) as `some_other_int32` FROM (SELECT `primary`::DateTime as `primary`,`some_int32`::Nullable(Int32) as `some_int32`,`some_other_int32`::Nullable(Int32) as `some_other_int32` FROM (SELECT primary, -some_int32 AS `some_int32`, COALESCE(some_other_int32, 789) AS `some_other_int32` FROM file('e81c5d8c-4804-4dd0-812d-e331e3bad1c0.json', 'JSONCompactEachRow', '`primary` DateTime,`some_int32` Nullable(Int32),`some_other_int32` Nullable(Int32)'))) FORMAT JSONCompact SETTINGS hdx_format_work_dir='', hdx_format_sort_keys='', hdx_format_json_schema='[{\"name\":\"primary\",\"datatype\":{\"type\":\"datetime\",\"primary\":true,\"format\":\"%F %T\",\"resolution\":\"s\"}},{\"name\":\"some_int32\",\"datatype\":{\"type\":\"int32\"}},{\"name\":\"some_other_int32\",\"datatype\":{\"type\":\"int32\"}}]', hdx_format_metadata='{\"project\":{\"id\":\"bb8c48f4-7ac2-4de7-99c7-a3cbc195b265\",\"name\":\"sample_project\",\"revision\":5},\"table\":{\"id\":\"c21408ad-7b48-49e8-88c7-bdd8e0f716f1\",\"name\":\"sample_table\",\"revision\":4},\"transforms\":[{\"id\":\"\",\"name\":\"\"}]}', max_ast_elements=100000, max_expanded_ast_elements=100000"
},
"intermediate_data": {
"/var/lib/indexer/e81c5d8c-4804-4dd0-812d-e331e3bad1c0.json": "WyIyMDIyLTA2LTE0IDEzOjA2OjQ3Iiw5OSwxMjM0XQpbIjIwMjItMDYtMTQgMTM6MDY6NDgiLDk5LG51bGxdClsiMjAyMi0wNi0xNCAxMzowNjo0OSIsMTIzLG51bGxdClsiMjAyMi0wNi0xNCAxMzowNjo1MCIsNDU2LG51bGxdCg=="
}
}
The intermediate_data
is the base64 JSON encoded interpretation of the data, and the indexing SQL is what is used as SQL statement to generate Hydrolix partition based the data.
If there's any errors from any component they'll be output in the response, for example if the indexer is not able to parse the timestamp or the format it'll output an error.
Using the catchall
Feature
catchall
FeatureThe catchall
feature allows you to see the shape of your data before you've applied a transform to it. Example leveraging the catchall
feature:
POST https://$host/validator
Content-Type: application/json
x-hdx-table: sample_project.sample_table
x-hdx-test: true
{
"transform":
{
"name": "demo",
"settings": {
"is_default": true,
"output_columns": [
{
"name": "date",
"datatype": {
"type": "datetime",
"format": "2006-01-01",
"resolution": "seconds",
"primary": true
}
},
{
"name": "catchall",
"datatype": {
"type": "map",
"elements":
[
{
"type": "string"
},
{
"type": "string"
}
],
"catch_all": true
}
}
],
"compression": "none",
"format_details": {
"flattening": {
"active": true,
"depth": 1,
"map_flattening_strategy": {
"left": "."
}
}
}
},
"type": "json"
},
"data": [ {"timestamp": "2022-06-13 06:03:05.579 +00:00", "component": "query_executor", "level":"info", "user":"anonymous", "duration": 552, "query": "'select version()'", "message": "comment='' admin_comment=''"},
{"timestamp": "2022-06-13 06:03:06.078 +00:00", "component": "query_executor", "level":"info", "user":"anonymous", "duration": 552, "query": "'select version()'", "message": "comment='' admin_comment=''"},
{"timestamp": "2022-06-13 06:03:06.086 +00:00", "component": "query_executor", "level":"info", "message": "user=anonymous duration_ms=0 comment='' admin_comment='' query='select version()'"}]
}
Here any json key value will be interpreted as a map and put in the catchall column:
"parsed": [
[
{
"name": "date",
"datatype": "datetime",
"value": null
},
{
"name": "catchall",
"datatype": "map",
"value": {
"component": "query_executor",
"duration": "552",
"level": "info",
"message": "comment='' admin_comment=''",
"query": "'select version()'",
"timestamp": "2022-06-13 06:03:05.579 +00:00",
"user": "anonymous"
}
}
]
Updated 6 months ago