From f722288ed17f6cfe23ab00346dbfbe4eb5ef1383 Mon Sep 17 00:00:00 2001 From: Sriramajeyam Sugumaran <153843+yesoreyeram@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:24:55 +0000 Subject: [PATCH 1/2] some tests for html parsing --- .../golden/html_backend_url_default.jsonc | 299 ++++++++++++++++++ .../golden/html_default_url_default.jsonc | 207 ++++++++++++ pkg/testsuite/handler_querydata_test.go | 172 ++++++++++ 3 files changed, 678 insertions(+) create mode 100644 pkg/testsuite/golden/html_backend_url_default.jsonc create mode 100644 pkg/testsuite/golden/html_default_url_default.jsonc diff --git a/pkg/testsuite/golden/html_backend_url_default.jsonc b/pkg/testsuite/golden/html_backend_url_default.jsonc new file mode 100644 index 00000000..d4b7dcda --- /dev/null +++ b/pkg/testsuite/golden/html_backend_url_default.jsonc @@ -0,0 +1,299 @@ +// 🌟 This was machine generated. Do not edit. 🌟 +// +// Frame[0] { +// "typeVersion": [ +// 0, +// 0 +// ], +// "custom": { +// "query": { +// "refId": "", +// "type": "html", +// "format": "", +// "source": "url", +// "url": "http://127.0.0.1:8080", +// "url_options": { +// "method": "", +// "params": null, +// "headers": null, +// "data": "", +// "body_type": "", +// "body_content_type": "", +// "body_form": null, +// "body_graphql_query": "", +// "body_graphql_variables": "" +// }, +// "data": "", +// "parser": "backend", +// "filterExpression": "", +// "summarizeExpression": "", +// "summarizeBy": "", +// "uql": "", +// "groq": "", +// "sqlite_query": "", +// "csv_options": { +// "delimiter": "", +// "skip_empty_lines": false, +// "skip_lines_with_error": false, +// "relax_column_count": false, +// "columns": "", +// "comment": "" +// }, +// "json_options": { +// "root_is_not_array": false, +// "columnar": false +// }, +// "root_selector": "html.body.table.tbody.tr", +// "columns": [ +// { +// "selector": "td.0", +// "text": "name", +// "type": "string", +// "timestampFormat": "" +// }, +// { +// "selector": "td.1.#content", +// "text": "age", +// "type": "number", +// "timestampFormat": "" +// }, +// { +// "selector": "td.2", +// "text": "country", +// "type": "string", +// "timestampFormat": "" +// }, +// { +// "selector": "td.3", +// "text": "occupation", +// "type": "string", +// "timestampFormat": "" +// }, +// { +// "selector": "td.4.#content", +// "text": "salary", +// "type": "number", +// "timestampFormat": "" +// } +// ], +// "computed_columns": [], +// "filters": null, +// "seriesCount": 0, +// "expression": "", +// "alias": "", +// "dataOverrides": null, +// "global_query_id": "", +// "query_mode": "" +// }, +// "data": "\u003c!DOCTYPE html\u003e\n\u003chtml lang=\"en\"\u003e\n \u003chead\u003e\n \u003cmeta charset=\"UTF-8\" /\u003e\n \u003cmeta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" /\u003e\n \u003cmeta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" /\u003e\n \u003ctitle\u003eUsers\u003c/title\u003e\n \u003c/head\u003e\n \u003cbody\u003e\n \u003ctable class=\"table table-bordered table-hover table-condensed\"\u003e\n \u003cthead\u003e\n \u003ctr\u003e\n \u003cth title=\"Field #1\"\u003ename\u003c/th\u003e\n \u003cth title=\"Field #2\"\u003eage\u003c/th\u003e\n \u003cth title=\"Field #3\"\u003ecountry\u003c/th\u003e\n \u003cth title=\"Field #4\"\u003eoccupation\u003c/th\u003e\n \u003cth title=\"Field #5\"\u003esalary\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Graham\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eDevops Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eErvin Howell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e27\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2300\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eClementine Bauch\u003c/td\u003e\n \u003ctd align=\"right\"\u003e17\u003c/td\u003e\n \u003ctd\u003eCanada\u003c/td\u003e\n \u003ctd\u003eStudent\u003c/td\u003e\n \u003ctd align=\"right\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003ePatricia Lebsack\u003c/td\u003e\n \u003ctd align=\"right\"\u003e42\u003c/td\u003e\n \u003ctd\u003eUK\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2800\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Bell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSenior Software Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e4000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eChelsey Dietrich\u003c/td\u003e\n \u003ctd align=\"right\"\u003e32\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3500\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n \u003c/table\u003e\n \u003c/body\u003e\n\u003c/html\u003e\n", +// "responseCodeFromServer": 200, +// "duration": 123, +// "error": "" +// }, +// "executedQueryString": "###############\n## URL\n###############\n\nhttp://127.0.0.1:8080\n\n###############\n## Curl Command\n###############\n\ncurl -X 'GET' 'http://127.0.0.1:8080'" +// } +// Name: response +// Dimensions: 5 Fields by 6 Rows +// +------------------+-----------------+------------------+--------------------------+------------------+ +// | Name: age | Name: country | Name: name | Name: occupation | Name: salary | +// | Labels: | Labels: | Labels: | Labels: | Labels: | +// | Type: []*float64 | Type: []*string | Type: []*string | Type: []*string | Type: []*float64 | +// +------------------+-----------------+------------------+--------------------------+------------------+ +// | 38 | USA | Leanne Graham | Devops Engineer | 3000 | +// | 27 | USA | Ervin Howell | Software Engineer | 2300 | +// | 17 | Canada | Clementine Bauch | Student | null | +// | 42 | UK | Patricia Lebsack | Software Engineer | 2800 | +// | 38 | USA | Leanne Bell | Senior Software Engineer | 4000 | +// | 32 | USA | Chelsey Dietrich | Software Engineer | 3500 | +// +------------------+-----------------+------------------+--------------------------+------------------+ +// +// +// 🌟 This was machine generated. Do not edit. 🌟 +{ + "status": 200, + "frames": [ + { + "schema": { + "name": "response", + "meta": { + "typeVersion": [ + 0, + 0 + ], + "custom": { + "query": { + "refId": "", + "type": "html", + "format": "", + "source": "url", + "url": "http://127.0.0.1:8080", + "url_options": { + "method": "", + "params": null, + "headers": null, + "data": "", + "body_type": "", + "body_content_type": "", + "body_form": null, + "body_graphql_query": "", + "body_graphql_variables": "" + }, + "data": "", + "parser": "backend", + "filterExpression": "", + "summarizeExpression": "", + "summarizeBy": "", + "uql": "", + "groq": "", + "sqlite_query": "", + "csv_options": { + "delimiter": "", + "skip_empty_lines": false, + "skip_lines_with_error": false, + "relax_column_count": false, + "columns": "", + "comment": "" + }, + "json_options": { + "root_is_not_array": false, + "columnar": false + }, + "root_selector": "html.body.table.tbody.tr", + "columns": [ + { + "selector": "td.0", + "text": "name", + "type": "string", + "timestampFormat": "" + }, + { + "selector": "td.1.#content", + "text": "age", + "type": "number", + "timestampFormat": "" + }, + { + "selector": "td.2", + "text": "country", + "type": "string", + "timestampFormat": "" + }, + { + "selector": "td.3", + "text": "occupation", + "type": "string", + "timestampFormat": "" + }, + { + "selector": "td.4.#content", + "text": "salary", + "type": "number", + "timestampFormat": "" + } + ], + "computed_columns": [], + "filters": null, + "seriesCount": 0, + "expression": "", + "alias": "", + "dataOverrides": null, + "global_query_id": "", + "query_mode": "" + }, + "data": "\u003c!DOCTYPE html\u003e\n\u003chtml lang=\"en\"\u003e\n \u003chead\u003e\n \u003cmeta charset=\"UTF-8\" /\u003e\n \u003cmeta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" /\u003e\n \u003cmeta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" /\u003e\n \u003ctitle\u003eUsers\u003c/title\u003e\n \u003c/head\u003e\n \u003cbody\u003e\n \u003ctable class=\"table table-bordered table-hover table-condensed\"\u003e\n \u003cthead\u003e\n \u003ctr\u003e\n \u003cth title=\"Field #1\"\u003ename\u003c/th\u003e\n \u003cth title=\"Field #2\"\u003eage\u003c/th\u003e\n \u003cth title=\"Field #3\"\u003ecountry\u003c/th\u003e\n \u003cth title=\"Field #4\"\u003eoccupation\u003c/th\u003e\n \u003cth title=\"Field #5\"\u003esalary\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Graham\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eDevops Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eErvin Howell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e27\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2300\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eClementine Bauch\u003c/td\u003e\n \u003ctd align=\"right\"\u003e17\u003c/td\u003e\n \u003ctd\u003eCanada\u003c/td\u003e\n \u003ctd\u003eStudent\u003c/td\u003e\n \u003ctd align=\"right\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003ePatricia Lebsack\u003c/td\u003e\n \u003ctd align=\"right\"\u003e42\u003c/td\u003e\n \u003ctd\u003eUK\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2800\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Bell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSenior Software Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e4000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eChelsey Dietrich\u003c/td\u003e\n \u003ctd align=\"right\"\u003e32\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3500\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n \u003c/table\u003e\n \u003c/body\u003e\n\u003c/html\u003e\n", + "responseCodeFromServer": 200, + "duration": 123, + "error": "" + }, + "executedQueryString": "###############\n## URL\n###############\n\nhttp://127.0.0.1:8080\n\n###############\n## Curl Command\n###############\n\ncurl -X 'GET' 'http://127.0.0.1:8080'" + }, + "fields": [ + { + "name": "age", + "type": "number", + "typeInfo": { + "frame": "float64", + "nullable": true + } + }, + { + "name": "country", + "type": "string", + "typeInfo": { + "frame": "string", + "nullable": true + } + }, + { + "name": "name", + "type": "string", + "typeInfo": { + "frame": "string", + "nullable": true + } + }, + { + "name": "occupation", + "type": "string", + "typeInfo": { + "frame": "string", + "nullable": true + } + }, + { + "name": "salary", + "type": "number", + "typeInfo": { + "frame": "float64", + "nullable": true + } + } + ] + }, + "data": { + "values": [ + [ + 38, + 27, + 17, + 42, + 38, + 32 + ], + [ + "USA", + "USA", + "Canada", + "UK", + "USA", + "USA" + ], + [ + "Leanne Graham", + "Ervin Howell", + "Clementine Bauch", + "Patricia Lebsack", + "Leanne Bell", + "Chelsey Dietrich" + ], + [ + "Devops Engineer", + "Software Engineer", + "Student", + "Software Engineer", + "Senior Software Engineer", + "Software Engineer" + ], + [ + 3000, + 2300, + null, + 2800, + 4000, + 3500 + ] + ] + } + } + ] +} \ No newline at end of file diff --git a/pkg/testsuite/golden/html_default_url_default.jsonc b/pkg/testsuite/golden/html_default_url_default.jsonc new file mode 100644 index 00000000..aee6c857 --- /dev/null +++ b/pkg/testsuite/golden/html_default_url_default.jsonc @@ -0,0 +1,207 @@ +// 🌟 This was machine generated. Do not edit. 🌟 +// +// Frame[0] { +// "typeVersion": [ +// 0, +// 0 +// ], +// "custom": { +// "query": { +// "refId": "", +// "type": "html", +// "format": "", +// "source": "url", +// "url": "http://127.0.0.1:8080", +// "url_options": { +// "method": "", +// "params": null, +// "headers": null, +// "data": "", +// "body_type": "", +// "body_content_type": "", +// "body_form": null, +// "body_graphql_query": "", +// "body_graphql_variables": "" +// }, +// "data": "", +// "parser": "", +// "filterExpression": "", +// "summarizeExpression": "", +// "summarizeBy": "", +// "uql": "", +// "groq": "", +// "sqlite_query": "", +// "csv_options": { +// "delimiter": "", +// "skip_empty_lines": false, +// "skip_lines_with_error": false, +// "relax_column_count": false, +// "columns": "", +// "comment": "" +// }, +// "json_options": { +// "root_is_not_array": false, +// "columnar": false +// }, +// "root_selector": "tbody tr", +// "columns": [ +// { +// "selector": "td:nth-child(1)", +// "text": "name", +// "type": "string", +// "timestampFormat": "" +// }, +// { +// "selector": "td:nth-child(2)", +// "text": "age", +// "type": "number", +// "timestampFormat": "" +// }, +// { +// "selector": "td:nth-child(3)", +// "text": "country", +// "type": "string", +// "timestampFormat": "" +// }, +// { +// "selector": "td:nth-child(4)", +// "text": "occupation", +// "type": "string", +// "timestampFormat": "" +// }, +// { +// "selector": "td:nth-child(5)", +// "text": "salary", +// "type": "number", +// "timestampFormat": "" +// } +// ], +// "computed_columns": [], +// "filters": null, +// "seriesCount": 0, +// "expression": "", +// "alias": "", +// "dataOverrides": null, +// "global_query_id": "", +// "query_mode": "" +// }, +// "data": "\u003c!DOCTYPE html\u003e\n\u003chtml lang=\"en\"\u003e\n \u003chead\u003e\n \u003cmeta charset=\"UTF-8\" /\u003e\n \u003cmeta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" /\u003e\n \u003cmeta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" /\u003e\n \u003ctitle\u003eUsers\u003c/title\u003e\n \u003c/head\u003e\n \u003cbody\u003e\n \u003ctable class=\"table table-bordered table-hover table-condensed\"\u003e\n \u003cthead\u003e\n \u003ctr\u003e\n \u003cth title=\"Field #1\"\u003ename\u003c/th\u003e\n \u003cth title=\"Field #2\"\u003eage\u003c/th\u003e\n \u003cth title=\"Field #3\"\u003ecountry\u003c/th\u003e\n \u003cth title=\"Field #4\"\u003eoccupation\u003c/th\u003e\n \u003cth title=\"Field #5\"\u003esalary\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Graham\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eDevops Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eErvin Howell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e27\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2300\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eClementine Bauch\u003c/td\u003e\n \u003ctd align=\"right\"\u003e17\u003c/td\u003e\n \u003ctd\u003eCanada\u003c/td\u003e\n \u003ctd\u003eStudent\u003c/td\u003e\n \u003ctd align=\"right\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003ePatricia Lebsack\u003c/td\u003e\n \u003ctd align=\"right\"\u003e42\u003c/td\u003e\n \u003ctd\u003eUK\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2800\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Bell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSenior Software Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e4000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eChelsey Dietrich\u003c/td\u003e\n \u003ctd align=\"right\"\u003e32\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3500\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n \u003c/table\u003e\n \u003c/body\u003e\n\u003c/html\u003e\n", +// "responseCodeFromServer": 200, +// "duration": 123, +// "error": "" +// }, +// "executedQueryString": "###############\n## URL\n###############\n\nhttp://127.0.0.1:8080\n\n###############\n## Curl Command\n###############\n\ncurl -X 'GET' 'http://127.0.0.1:8080'" +// } +// Name: response +// Dimensions: 0 Fields by 0 Rows +// + +// + +// +// +// 🌟 This was machine generated. Do not edit. 🌟 +{ + "status": 200, + "frames": [ + { + "schema": { + "name": "response", + "meta": { + "typeVersion": [ + 0, + 0 + ], + "custom": { + "query": { + "refId": "", + "type": "html", + "format": "", + "source": "url", + "url": "http://127.0.0.1:8080", + "url_options": { + "method": "", + "params": null, + "headers": null, + "data": "", + "body_type": "", + "body_content_type": "", + "body_form": null, + "body_graphql_query": "", + "body_graphql_variables": "" + }, + "data": "", + "parser": "", + "filterExpression": "", + "summarizeExpression": "", + "summarizeBy": "", + "uql": "", + "groq": "", + "sqlite_query": "", + "csv_options": { + "delimiter": "", + "skip_empty_lines": false, + "skip_lines_with_error": false, + "relax_column_count": false, + "columns": "", + "comment": "" + }, + "json_options": { + "root_is_not_array": false, + "columnar": false + }, + "root_selector": "tbody tr", + "columns": [ + { + "selector": "td:nth-child(1)", + "text": "name", + "type": "string", + "timestampFormat": "" + }, + { + "selector": "td:nth-child(2)", + "text": "age", + "type": "number", + "timestampFormat": "" + }, + { + "selector": "td:nth-child(3)", + "text": "country", + "type": "string", + "timestampFormat": "" + }, + { + "selector": "td:nth-child(4)", + "text": "occupation", + "type": "string", + "timestampFormat": "" + }, + { + "selector": "td:nth-child(5)", + "text": "salary", + "type": "number", + "timestampFormat": "" + } + ], + "computed_columns": [], + "filters": null, + "seriesCount": 0, + "expression": "", + "alias": "", + "dataOverrides": null, + "global_query_id": "", + "query_mode": "" + }, + "data": "\u003c!DOCTYPE html\u003e\n\u003chtml lang=\"en\"\u003e\n \u003chead\u003e\n \u003cmeta charset=\"UTF-8\" /\u003e\n \u003cmeta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" /\u003e\n \u003cmeta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" /\u003e\n \u003ctitle\u003eUsers\u003c/title\u003e\n \u003c/head\u003e\n \u003cbody\u003e\n \u003ctable class=\"table table-bordered table-hover table-condensed\"\u003e\n \u003cthead\u003e\n \u003ctr\u003e\n \u003cth title=\"Field #1\"\u003ename\u003c/th\u003e\n \u003cth title=\"Field #2\"\u003eage\u003c/th\u003e\n \u003cth title=\"Field #3\"\u003ecountry\u003c/th\u003e\n \u003cth title=\"Field #4\"\u003eoccupation\u003c/th\u003e\n \u003cth title=\"Field #5\"\u003esalary\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Graham\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eDevops Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eErvin Howell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e27\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2300\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eClementine Bauch\u003c/td\u003e\n \u003ctd align=\"right\"\u003e17\u003c/td\u003e\n \u003ctd\u003eCanada\u003c/td\u003e\n \u003ctd\u003eStudent\u003c/td\u003e\n \u003ctd align=\"right\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003ePatricia Lebsack\u003c/td\u003e\n \u003ctd align=\"right\"\u003e42\u003c/td\u003e\n \u003ctd\u003eUK\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e2800\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eLeanne Bell\u003c/td\u003e\n \u003ctd align=\"right\"\u003e38\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSenior Software Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e4000\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003ctd\u003eChelsey Dietrich\u003c/td\u003e\n \u003ctd align=\"right\"\u003e32\u003c/td\u003e\n \u003ctd\u003eUSA\u003c/td\u003e\n \u003ctd\u003eSoftware Engineer\u003c/td\u003e\n \u003ctd align=\"right\"\u003e3500\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n \u003c/table\u003e\n \u003c/body\u003e\n\u003c/html\u003e\n", + "responseCodeFromServer": 200, + "duration": 123, + "error": "" + }, + "executedQueryString": "###############\n## URL\n###############\n\nhttp://127.0.0.1:8080\n\n###############\n## Curl Command\n###############\n\ncurl -X 'GET' 'http://127.0.0.1:8080'" + }, + "fields": [] + }, + "data": { + "values": [] + } + } + ] +} \ No newline at end of file diff --git a/pkg/testsuite/handler_querydata_test.go b/pkg/testsuite/handler_querydata_test.go index 31bd3c9b..92502727 100644 --- a/pkg/testsuite/handler_querydata_test.go +++ b/pkg/testsuite/handler_querydata_test.go @@ -1138,6 +1138,178 @@ func TestQuery(t *testing.T) { resItem := res.Responses["A"] experimental.CheckGoldenJSONResponse(t, "golden", strings.ReplaceAll(t.Name(), "TestQuery/", ""), &resItem, UPDATE_GOLDEN_DATA) }) + t.Run("html", func(t *testing.T) { + sampleHtml := ` + + + + + + Users + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameagecountryoccupationsalary
Leanne Graham38USADevops Engineer3000
Ervin Howell27USASoftware Engineer2300
Clementine Bauch17CanadaStudent
Patricia Lebsack42UKSoftware Engineer2800
Leanne Bell38USASenior Software Engineer4000
Chelsey Dietrich32USASoftware Engineer3500
+ +` + fmt.Printf(sampleHtml) + server := getServerWithStaticResponse(t, "../../testdata/users.html", true) + server.Start() + defer server.Close() + t.Run("default url default", func(t *testing.T) { + res, err := host.QueryData(context.Background(), &backend.QueryDataRequest{ + PluginContext: backend.PluginContext{ + DataSourceInstanceSettings: &backend.DataSourceInstanceSettings{ + JSONData: []byte(`{"is_mock": true}`), + DecryptedSecureJSONData: map[string]string{}, + }, + }, + Queries: []backend.DataQuery{{RefID: "A", JSON: []byte(fmt.Sprintf(`{ + "type": "html", + "url": "%s", + "source": "url", + "root_selector": "tbody tr", + "columns": [ + { + "text": "name", + "selector": "td:nth-child(1)", + "type": "string" + }, + { + "text": "age", + "selector": "td:nth-child(2)", + "type": "number" + }, + { + "text": "country", + "selector": "td:nth-child(3)", + "type": "string" + }, + { + "text": "occupation", + "selector": "td:nth-child(4)", + "type": "string" + }, + { + "text": "salary", + "selector": "td:nth-child(5)", + "type": "number" + } + ] + }`, server.URL))}}, + }) + require.Nil(t, err) + require.NotNil(t, res) + resItem := res.Responses["A"] + experimental.CheckGoldenJSONResponse(t, "golden", strings.ReplaceAll(t.Name(), "TestQuery/html/", "html_"), &resItem, UPDATE_GOLDEN_DATA) + }) + t.Run("backend url default", func(t *testing.T) { + res, err := host.QueryData(context.Background(), &backend.QueryDataRequest{ + PluginContext: backend.PluginContext{ + DataSourceInstanceSettings: &backend.DataSourceInstanceSettings{ + JSONData: []byte(`{"is_mock": true}`), + DecryptedSecureJSONData: map[string]string{}, + }, + }, + Queries: []backend.DataQuery{{RefID: "A", JSON: []byte(fmt.Sprintf(`{ + "type": "html", + "url": "%s", + "source": "url", + "parser": "backend", + "root_selector": "html.body.table.tbody.tr", + "columns": [ + { + "selector": "td.0", + "text": "name", + "timestampFormat": "", + "type": "string" + }, + { + "selector": "td.1.#content", + "text": "age", + "timestampFormat": "", + "type": "number" + }, + { + "selector": "td.2", + "text": "country", + "timestampFormat": "", + "type": "string" + }, + { + "selector": "td.3", + "text": "occupation", + "timestampFormat": "", + "type": "string" + }, + { + "selector": "td.4.#content", + "text": "salary", + "timestampFormat": "", + "type": "number" + } + ] + }`, server.URL))}}, + }) + require.Nil(t, err) + require.NotNil(t, res) + resItem := res.Responses["A"] + experimental.CheckGoldenJSONResponse(t, "golden", strings.ReplaceAll(t.Name(), "TestQuery/html/", "html_"), &resItem, UPDATE_GOLDEN_DATA) + }) + }) t.Run("scenario azure cost management", func(t *testing.T) { server := getServerWithStaticResponse(t, "./../../testdata/misc/azure-cost-management-daily.json", true) server.Start() From 8f1e69c1d91291dae92be97ee9c3ed01a815be9d Mon Sep 17 00:00:00 2001 From: Sriramajeyam Sugumaran <153843+yesoreyeram@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:00:11 +0000 Subject: [PATCH 2/2] update docs for html --- docs/sources/html.md | 39 +++++++------- pkg/testsuite/handler_querydata_test.go | 67 ------------------------- 2 files changed, 22 insertions(+), 84 deletions(-) diff --git a/docs/sources/html.md b/docs/sources/html.md index 88918e83..83722114 100644 --- a/docs/sources/html.md +++ b/docs/sources/html.md @@ -26,30 +26,35 @@ weight: 25 # Visualizing HTML data -In the below example, we are going to convert the HTML URL `https://grafana.com/about/team/` into grafana datasource. +{{< admonition type="caution" >}} +HTML query type should be used only for retrieving data from legacy systems where there are no alternative APIs exist. Instead of HTML query type, we strongly recommend to use other query types such as JSON, CSV, XML. +{{< /admonition >}} -![image](https://user-images.githubusercontent.com/153843/92399290-faabcf80-f121-11ea-9261-b06c708e81c0.png#center) +In the below example, we are going to retrieve data from [this](https://github.com/grafana/grafana-infinity-datasource/blob/main/testdata/users.html) sample html page. -Once you open the page in browser, right click and inspect the element (first element of the array you want to display). Then copy the selector as your root / rows element. +In the Query editor, fill the following query details -![image](https://user-images.githubusercontent.com/153843/92396876-ac94cd00-f11d-11ea-850d-f1754f980fc7.png#center) +1. Select **HTML** as query type +2. Select **Default** ( frontend ) as the parser +3. Select **URL** as the source +4. Select **GET** as the http method +5. Enter `https://github.com/grafana/grafana-infinity-datasource/blob/main/testdata/users.html` in the URL field of the query -Then you can select, individual properties of the row as columns of the table as shown in the example image. You can select any element with in the row context. +Once the above initial setup is done, you need to configure the selectors. -![image](https://user-images.githubusercontent.com/153843/92382094-f4a6f600-f103-11ea-8035-e1bbd9157629.png#center) +1. In the root selector, you need to provide the selector which shall give you array of symmetrical elements. (This can be potentially rows in a table or repeating div elements with symmetrical structure ). In our case, we are entering `table:nth-child(1) tbody tr` (css selector) as our root selector. Alternatively, you can give `tr` as selector if your html content have only one table. Also If the table have any unique selectors such as id, use that as the selector instead. +2. From our html structure, we know that each row contain several div elements where each div represent a property of the user. So we need to uniquely identify the div elements corresponding to the user property. +3. Add a column and enter `td:nth-child(1)` as selector field. Also mark this as `Name` in the **as/alias** field. We can leave this as a string +4. Add another column and enter `td:nth-child(2)` as selector field. Also mark this as `Age` in the **as/alias** field. We know that this is a number, so we can change the field type to number. +5. Add another column and enter `td:nth-child(3)` as selector field. Also mark this as `Country` in the **as/alias** field +6. Likewise, add any other columns as per your need. -![image](https://user-images.githubusercontent.com/153843/92747321-fbd83900-f37b-11ea-8be9-9366386dc6e2.png#center) - -Example : - -- `h4` --> h4 element will be selected -- `.team__title` --> Element with the class `team__title` will be selected -- `td:nth-child(4)` --> 4th td element within the row context will be selected. This will be useful when you element doesn't have any id or duplicate class names. +Example of the above query is given in the [play.grafana](https://play.grafana.org/explore?schemaVersion=1&panes=%7B%22s9j%22:%7B%22datasource%22:%22infinity-universal%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22datasource%22:%7B%22type%22:%22yesoreyeram-infinity-datasource%22,%22uid%22:%22infinity-universal%22%7D,%22type%22:%22html%22,%22source%22:%22url%22,%22format%22:%22table%22,%22url%22:%22https:%2F%2Fgithub.com%2Fgrafana%2Fgrafana-infinity-datasource%2Fblob%2Fmain%2Ftestdata%2Fusers.html%22,%22url_options%22:%7B%22method%22:%22GET%22,%22data%22:%22%22%7D,%22root_selector%22:%22table:nth-child%281%29%20tbody%20tr%22,%22columns%22:%5B%7B%22text%22:%22Name%22,%22selector%22:%22td:nth-child%281%29%22,%22type%22:%22string%22%7D,%7B%22text%22:%22Age%22,%22selector%22:%22td:nth-child%282%29%22,%22type%22:%22number%22%7D,%7B%22text%22:%22Country%22,%22selector%22:%22td:nth-child%283%29%22,%22type%22:%22string%22%7D,%7B%22text%22:%22Occupation%22,%22selector%22:%22td:nth-child%284%29%22,%22type%22:%22string%22%7D,%7B%22text%22:%22Salary%22,%22selector%22:%22td:nth-child%285%29%22,%22type%22:%22number%22%7D%5D,%22filters%22:%5B%5D,%22global_query_id%22:%22%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&orgId=1) site for reference. ## Limitations -- Only symmetrical data can be scrapped. (Example: `table` elements with `colspan` or `rowspan` will break the scrapping) -- Only text element is supported. Attribute scraping not available -- To scrap the AJAX content, use [JSON type](/docs/json) in the Query +- Only symmetrical data can be queries. (Example: `table` with `colspan` or `rowspan` will break the scrapping) +- Only text element querying is supported. Retrieving other html attributes are not supported +- If you prefer to use **backend** parser for html query type, be aware that the backend html query parser is experimental and subject to breaking changes. Also, only the html pages compatible to XML syntax, can be used with html backend query type. - Websites may block you/your IP address, If the scrapping is at high frequency/refresh rate. Be sensible and responsible about setting your refresh limits -- Caching is not implemented. So be aware of the rate limits. +- Caching is not implemented. So be aware of the rate limits diff --git a/pkg/testsuite/handler_querydata_test.go b/pkg/testsuite/handler_querydata_test.go index 92502727..810b3384 100644 --- a/pkg/testsuite/handler_querydata_test.go +++ b/pkg/testsuite/handler_querydata_test.go @@ -1139,73 +1139,6 @@ func TestQuery(t *testing.T) { experimental.CheckGoldenJSONResponse(t, "golden", strings.ReplaceAll(t.Name(), "TestQuery/", ""), &resItem, UPDATE_GOLDEN_DATA) }) t.Run("html", func(t *testing.T) { - sampleHtml := ` - - - - - - Users - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameagecountryoccupationsalary
Leanne Graham38USADevops Engineer3000
Ervin Howell27USASoftware Engineer2300
Clementine Bauch17CanadaStudent
Patricia Lebsack42UKSoftware Engineer2800
Leanne Bell38USASenior Software Engineer4000
Chelsey Dietrich32USASoftware Engineer3500
- -` - fmt.Printf(sampleHtml) server := getServerWithStaticResponse(t, "../../testdata/users.html", true) server.Start() defer server.Close()