Skip to content

Commit

Permalink
Added limit and offset decorators CLARIAH#379, fixing issues CLARIAH#367
Browse files Browse the repository at this point in the history
  • Loading branch information
Pawlik02 committed Mar 2, 2023
1 parent c6fdd61 commit 4191cc1
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 30 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,22 @@ Syntax:

Example [query](https://github.com/CLARIAH/grlc-queries/blob/master/pagination.rq) and the equivalent [API operation](http://grlc.io/api-git/CLARIAH/grlc-queries/#/default/get_pagination).

### `limit`
Sets SPAQRL query parameter LIMIT to for example 100. It also allows you to replace variables with name ?_limit.

Syntax:
```
#+ limit: 100
```

### `offset`
Sets SPAQRL query parameter OFFSET to for example 50. It also allows you to replace variables with name ?_offset.

Syntax:
```
#+ offset: 50
```

### `method`
Indicates the HTTP request method (`GET` and `POST` are supported).

Expand Down
50 changes: 30 additions & 20 deletions src/gquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,33 +65,43 @@ def guess_endpoint_uri(rq, loader):
return endpoint, auth


def get_parameter_from_decorators(rq, parameter_name):
"""
Getting parameter by its name from decorators
"""
try:
decorators = get_yaml_decorators(rq)
parameter_value = decorators[parameter_name]
glogger.debug(f"Got parameter value from decorators {parameter_name}: {parameter_value}")
except (TypeError, KeyError):
return None
return parameter_value


def count_query_results(query, endpoint):
"""
Returns the total number of results that query 'query' will generate
WARNING: This is too expensive just for providing a number of result pages
Providing a dummy count for now
"""

# number_results_query, repl = re.subn("SELECT.*FROM", "SELECT COUNT (*) FROM", query)
# if not repl:
# number_results_query = re.sub("SELECT.*{", "SELECT COUNT(*) {", query)
# number_results_query = re.sub("GROUP\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query)
# number_results_query = re.sub("ORDER\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query)
# number_results_query = re.sub("LIMIT\s+[0-9]+", "", number_results_query)
# number_results_query = re.sub("OFFSET\s+[0-9]+", "", number_results_query)
#
# glogger.debug("Query for result count: " + number_results_query)
#
# # Preapre HTTP request
# headers = { 'Accept' : 'application/json' }
# data = { 'query' : number_results_query }
# count_json = requests.get(endpoint, params=data, headers=headers).json()
# count = int(count_json['results']['bindings'][0]['callret-0']['value'])
# glogger.info("Paginated query has {} results in total".format(count))
#
# return count

return 1000
number_results_query, repl = re.subn("SELECT.*FROM", "SELECT COUNT (*) FROM", query)
if not repl:
number_results_query = re.sub("SELECT.*{", "SELECT COUNT(*) {", query)
number_results_query = re.sub("GROUP\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query)
number_results_query = re.sub("ORDER\s+BY\s+[\?\_\(\)a-zA-Z0-9]+", "", number_results_query)
number_results_query = re.sub("LIMIT\s+[0-9]+", "", number_results_query)
number_results_query = re.sub("OFFSET\s+[0-9]+", "", number_results_query)

glogger.debug("Query for result count: " + number_results_query)
# Preapre HTTP request
headers = { 'Accept' : 'application/json' }
data = { 'query' : number_results_query }
count_json = requests.get(endpoint, params=data, headers=headers).json()
count = len(count_json['results']['bindings'])
glogger.info(f"Paginated query has {count} results in total")

return count


def _getDictWithKey(key, dict_list):
Expand Down
8 changes: 4 additions & 4 deletions src/pagination.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode, ParseResult

def getSwaggerPaginationDef(resultsPerPage):
Expand All @@ -11,17 +12,16 @@ def getSwaggerPaginationDef(resultsPerPage):

def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url):
"""Build link header for result pagination"""
lastPage = resultCount / resultsPerPage
lastPage = math.ceil(resultCount / resultsPerPage)

url_parts = urlparse(url)
query = dict(parse_qsl(url_parts.query)) # Use dict parse_qsl instead of parse_qs to ensure 'page' is unique

first_url = _buildNewUrlWithPage(url_parts, query, page=1)
last_url = _buildNewUrlWithPage(url_parts, query, page=lastPage)

if not pageArg:
next_url = _buildNewUrlWithPage(url_parts, query, page=1)
prev_url = ""
if not pageArg or int(pageArg) == 1 or int(pageArg) == 0:
next_url = _buildNewUrlWithPage(url_parts, query, page=2)
headerLink = "<{}>; rel=next, <{}>; rel=last".format(next_url, last_url)
else:
page = int(pageArg)
Expand Down
17 changes: 17 additions & 0 deletions src/swagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,17 @@ def process_sparql_query_text(query_text, loader, call_name, extraMetadata):
# We get the endpoint name first, since some query metadata fields (eg enums) require it
endpoint, _ = gquery.guess_endpoint_uri(query_text, loader)
glogger.debug("Read query endpoint: {}".format(endpoint))
limit = gquery.get_parameter_from_decorators(query_text, "limit")
offset = gquery.get_parameter_from_decorators(query_text, "offset")
pagination = gquery.get_parameter_from_decorators(query_text, "pagination")
if pagination and (limit is not None or offset is not None):
raise Exception("Providing pagination and limit/offset decorator at the same time is forbidden!")
if limit is not None:
query_text = query_text.replace("?_limit", str(limit))
query_text += f"LIMIT {limit} "
if offset is not None:
query_text = query_text.replace("?_offset", str(offset))
query_text += f"OFFSET {offset} "

try:
query_metadata = gquery.get_metadata(query_text, endpoint)
Expand Down Expand Up @@ -272,6 +283,12 @@ def process_sparql_query_text(query_text, loader, call_name, extraMetadata):
endpoint_param['description'] = "Alternative endpoint for SPARQL query"
endpoint_param['default'] = endpoint
params.append(endpoint_param)
if limit is not None:
limit_param = {'name': "limit", 'type': "int", 'in': "query", 'description': "Alternative limit", 'default': limit}
params.append(limit_param)
if offset is not None:
offset_param = {'name': "offset", 'type': "int", 'in': "query", 'description': "Alternative offset", 'default': offset}
params.append(offset_param)

# If this is a URL generated spec we need to force API calls with the specUrl parameter set
if type(loader) is URLLoader:
Expand Down
23 changes: 17 additions & 6 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def build_swagger_spec(user, repo, subdir, spec_url, sha, serverName):

# TODO: Add bootstrap style to top level HTML
# Without a better place to display warnings, we can make them part of the description.
if 'description' not in swag['info']:
swag['info']['description'] = ''
if 'description' not in swag['info'] or swag["info"]["description"] is None:
swag['info']['description'] = ""
for warn in warnings:
swag['info']['description'] += swagger.get_warning_div(warn)

Expand Down Expand Up @@ -116,6 +116,12 @@ def dispatch_query(user, repo, query_name, subdir=None, spec_url=None, sha=None,
def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, content,
formData, requestUrl):
"""Executes the specified SPARQL query."""
if "limit" in requestArgs:
raw_sparql_query = raw_sparql_query.replace("?_limit", str(requestArgs["limit"]))
raw_sparql_query += f"LIMIT {requestArgs['limit']} "
if "offset" in requestArgs:
raw_sparql_query = raw_sparql_query.replace("?_offset", str(requestArgs["offset"]))
raw_sparql_query += f"OFFSET {requestArgs['offset']} "
endpoint, auth = gquery.guess_endpoint_uri(raw_sparql_query, loader)
if endpoint == '':
return 'No SPARQL endpoint indicated', 407, {}
Expand All @@ -140,9 +146,9 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con
rewritten_query = gquery.rewrite_query(query_metadata['original_query'], query_metadata['parameters'], requestArgs)

# Rewrite query using pagination
if query_metadata['type'] == 'SelectQuery' and 'pagination' in query_metadata:
rewritten_query = gquery.paginate_query(rewritten_query, query_metadata['pagination'], requestArgs)

if "limit" not in requestArgs and "offset" not in requestArgs:
if query_metadata['type'] == 'SelectQuery' and 'pagination' in query_metadata:
rewritten_query = gquery.paginate_query(rewritten_query, query_metadata['pagination'], requestArgs)
resp = None
headers = {}

Expand Down Expand Up @@ -213,7 +219,7 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con
headers['Content-Type'] = response.headers['Content-Type']

# If the query is paginated, set link HTTP headers
if pagination:
if pagination and "limit" not in requestArgs and "offset" not in requestArgs:
# Get number of total results
count = gquery.count_query_results(rewritten_query, endpoint)
pageArg = requestArgs.get('page', None)
Expand All @@ -229,6 +235,11 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con
resp = SPARQLTransformer.post_process(json.loads(resp), query_metadata['transform'], opt)

headers['Server'] = 'grlc/' + grlc_version
if isinstance(resp, list) and len(resp) == 1:
resp = resp[0]
if isinstance(resp, dict):
resp = json.dumps(resp)

return resp, 200, headers


Expand Down

0 comments on commit 4191cc1

Please sign in to comment.