diff --git a/README.md b/README.md index 25651bc..732cd9b 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,22 @@ Syntax: Example [query](https://github.com/CLARIAH/grlc-queries/blob/master/pagination.rq) and the equivalent [API operation](http://grlc.io/api-git/CLARIAH/grlc-queries/#/default/get_pagination). +### `limit` +Sets SPAQRL query parameter LIMIT to for example 100. It also allows you to replace variables with name ?_limit. + +Syntax: +``` +#+ limit: 100 +``` + +### `offset` +Sets SPAQRL query parameter OFFSET to for example 50. It also allows you to replace variables with name ?_offset. + +Syntax: +``` +#+ offset: 50 +``` + ### `method` Indicates the HTTP request method (`GET` and `POST` are supported). diff --git a/src/gquery.py b/src/gquery.py index 43e912e..c0d1c57 100644 --- a/src/gquery.py +++ b/src/gquery.py @@ -65,6 +65,19 @@ def guess_endpoint_uri(rq, loader): return endpoint, auth +def get_parameter_from_decorators(rq, parameter_name): + """ + Getting parameter by its name from decorators + """ + try: + decorators = get_yaml_decorators(rq) + parameter_value = decorators[parameter_name] + glogger.debug(f"Got parameter value from decorators {parameter_name}: {parameter_value}") + except (TypeError, KeyError): + return None + return parameter_value + + def count_query_results(query, endpoint): """ Returns the total number of results that query 'query' will generate @@ -72,26 +85,23 @@ def count_query_results(query, endpoint): Providing a dummy count for now """ - # number_results_query, repl = re.subn("SELECT.*FROM", "SELECT COUNT (*) FROM", query) - # if not repl: - # number_results_query = re.sub("SELECT.*{", "SELECT COUNT(*) {", query) - # number_results_query = re.sub("GROUP\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query) - # number_results_query = re.sub("ORDER\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query) - # number_results_query = re.sub("LIMIT\s+[0-9]+", "", number_results_query) - # number_results_query = re.sub("OFFSET\s+[0-9]+", "", number_results_query) - # - # glogger.debug("Query for result count: " + number_results_query) - # - # # Preapre HTTP request - # headers = { 'Accept' : 'application/json' } - # data = { 'query' : number_results_query } - # count_json = requests.get(endpoint, params=data, headers=headers).json() - # count = int(count_json['results']['bindings'][0]['callret-0']['value']) - # glogger.info("Paginated query has {} results in total".format(count)) - # - # return count - - return 1000 + number_results_query, repl = re.subn("SELECT.*FROM", "SELECT COUNT (*) FROM", query) + if not repl: + number_results_query = re.sub("SELECT.*{", "SELECT COUNT(*) {", query) + number_results_query = re.sub("GROUP\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query) + number_results_query = re.sub("ORDER\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query) + number_results_query = re.sub("LIMIT\s+[0-9]+", "", number_results_query) + number_results_query = re.sub("OFFSET\s+[0-9]+", "", number_results_query) + + glogger.debug("Query for result count: " + number_results_query) + # Preapre HTTP request + headers = { 'Accept' : 'application/json' } + data = { 'query' : number_results_query } + count_json = requests.get(endpoint, params=data, headers=headers).json() + count = len(count_json['results']['bindings']) + glogger.info(f"Paginated query has {count} results in total") + + return count def _getDictWithKey(key, dict_list): diff --git a/src/pagination.py b/src/pagination.py index 4262857..97d65ad 100644 --- a/src/pagination.py +++ b/src/pagination.py @@ -1,3 +1,4 @@ +import math from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode, ParseResult def getSwaggerPaginationDef(resultsPerPage): @@ -11,7 +12,7 @@ def getSwaggerPaginationDef(resultsPerPage): def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url): """Build link header for result pagination""" - lastPage = resultCount / resultsPerPage + lastPage = math.ceil(resultCount / resultsPerPage) url_parts = urlparse(url) query = dict(parse_qsl(url_parts.query)) # Use dict parse_qsl instead of parse_qs to ensure 'page' is unique @@ -19,9 +20,8 @@ def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url): first_url = _buildNewUrlWithPage(url_parts, query, page=1) last_url = _buildNewUrlWithPage(url_parts, query, page=lastPage) - if not pageArg: - next_url = _buildNewUrlWithPage(url_parts, query, page=1) - prev_url = "" + if not pageArg or int(pageArg) == 1 or int(pageArg) == 0: + next_url = _buildNewUrlWithPage(url_parts, query, page=2) headerLink = "<{}>; rel=next, <{}>; rel=last".format(next_url, last_url) else: page = int(pageArg) diff --git a/src/swagger.py b/src/swagger.py index 613512b..98d4894 100644 --- a/src/swagger.py +++ b/src/swagger.py @@ -207,6 +207,17 @@ def process_sparql_query_text(query_text, loader, call_name, extraMetadata): # We get the endpoint name first, since some query metadata fields (eg enums) require it endpoint, _ = gquery.guess_endpoint_uri(query_text, loader) glogger.debug("Read query endpoint: {}".format(endpoint)) + limit = gquery.get_parameter_from_decorators(query_text, "limit") + offset = gquery.get_parameter_from_decorators(query_text, "offset") + pagination = gquery.get_parameter_from_decorators(query_text, "pagination") + if pagination and (limit is not None or offset is not None): + raise Exception("Providing pagination and limit/offset decorator at the same time is forbidden!") + if limit is not None: + query_text = query_text.replace("?_limit", str(limit)) + query_text += f"LIMIT {limit} " + if offset is not None: + query_text = query_text.replace("?_offset", str(offset)) + query_text += f"OFFSET {offset} " try: query_metadata = gquery.get_metadata(query_text, endpoint) @@ -272,6 +283,12 @@ def process_sparql_query_text(query_text, loader, call_name, extraMetadata): endpoint_param['description'] = "Alternative endpoint for SPARQL query" endpoint_param['default'] = endpoint params.append(endpoint_param) + if limit is not None: + limit_param = {'name': "limit", 'type': "int", 'in': "query", 'description': "Alternative limit", 'default': limit} + params.append(limit_param) + if offset is not None: + offset_param = {'name': "offset", 'type': "int", 'in': "query", 'description': "Alternative offset", 'default': offset} + params.append(offset_param) # If this is a URL generated spec we need to force API calls with the specUrl parameter set if type(loader) is URLLoader: diff --git a/src/utils.py b/src/utils.py index 4f8ed7d..f0b28f4 100644 --- a/src/utils.py +++ b/src/utils.py @@ -77,8 +77,8 @@ def build_swagger_spec(user, repo, subdir, spec_url, sha, serverName): # TODO: Add bootstrap style to top level HTML # Without a better place to display warnings, we can make them part of the description. - if 'description' not in swag['info']: - swag['info']['description'] = '' + if 'description' not in swag['info'] or swag["info"]["description"] is None: + swag['info']['description'] = "" for warn in warnings: swag['info']['description'] += swagger.get_warning_div(warn) @@ -116,6 +116,12 @@ def dispatch_query(user, repo, query_name, subdir=None, spec_url=None, sha=None, def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, content, formData, requestUrl): """Executes the specified SPARQL query.""" + if "limit" in requestArgs: + raw_sparql_query = raw_sparql_query.replace("?_limit", str(requestArgs["limit"])) + raw_sparql_query += f"LIMIT {requestArgs['limit']} " + if "offset" in requestArgs: + raw_sparql_query = raw_sparql_query.replace("?_offset", str(requestArgs["offset"])) + raw_sparql_query += f"OFFSET {requestArgs['offset']} " endpoint, auth = gquery.guess_endpoint_uri(raw_sparql_query, loader) if endpoint == '': return 'No SPARQL endpoint indicated', 407, {} @@ -140,9 +146,9 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con rewritten_query = gquery.rewrite_query(query_metadata['original_query'], query_metadata['parameters'], requestArgs) # Rewrite query using pagination - if query_metadata['type'] == 'SelectQuery' and 'pagination' in query_metadata: - rewritten_query = gquery.paginate_query(rewritten_query, query_metadata['pagination'], requestArgs) - + if "limit" not in requestArgs and "offset" not in requestArgs: + if query_metadata['type'] == 'SelectQuery' and 'pagination' in query_metadata: + rewritten_query = gquery.paginate_query(rewritten_query, query_metadata['pagination'], requestArgs) resp = None headers = {} @@ -213,7 +219,7 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con headers['Content-Type'] = response.headers['Content-Type'] # If the query is paginated, set link HTTP headers - if pagination: + if pagination and "limit" not in requestArgs and "offset" not in requestArgs: # Get number of total results count = gquery.count_query_results(rewritten_query, endpoint) pageArg = requestArgs.get('page', None) @@ -229,6 +235,11 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con resp = SPARQLTransformer.post_process(json.loads(resp), query_metadata['transform'], opt) headers['Server'] = 'grlc/' + grlc_version + if isinstance(resp, list) and len(resp) == 1: + resp = resp[0] + if isinstance(resp, dict): + resp = json.dumps(resp) + return resp, 200, headers