From 4dc5f41d439149b0ba5d4185e20bebcd2df62ccf Mon Sep 17 00:00:00 2001
From: Benjamin Bertrand <benjamin.bertrand@esss.se>
Date: Wed, 3 Apr 2019 16:25:24 +0200
Subject: [PATCH] Limit the max number of elements returned per page

- requesting too many elements in one query has an impact on the
database and the web application
- max limit set to 100

JIRA INFRA-942 #action In Progress
---
 app/api/utils.py             |  5 ++-
 app/settings.py              |  3 ++
 docs/api.rst                 |  3 +-
 tests/functional/conftest.py |  1 +
 tests/functional/test_api.py | 67 ++++++++++++++++++++++++++++++++++++
 5 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/app/api/utils.py b/app/api/utils.py
index b9e6964..9a2269c 100644
--- a/app/api/utils.py
+++ b/app/api/utils.py
@@ -79,7 +79,9 @@ def get_generic_model(model, order_by=None, query=None):
         if order_by is None:
             order_by = getattr(model, "name")
         query = query.order_by(order_by)
-    pagination = query.paginate(page, per_page)
+    pagination = query.paginate(
+        page, per_page, max_per_page=current_app.config["MAX_PER_PAGE"]
+    )
     data = [item.to_dict(recursive=recursive) for item in pagination.items]
     # Re-add recursive to kwargs so that it's part of the pagination url
     kwargs["recursive"] = recursive
@@ -96,6 +98,7 @@ def search_generic_model(model):
     kwargs = request.args.to_dict()
     page = int(kwargs.pop("page", 1))
     per_page = int(kwargs.pop("per_page", 20))
+    per_page = min(per_page, current_app.config["MAX_PER_PAGE"])
     search = kwargs.get("q", "*")
     instances, nb_filtered = model.search(search, page=page, per_page=per_page)
     current_app.logger.debug(
diff --git a/app/settings.py b/app/settings.py
index 37e804c..5c1ab33 100644
--- a/app/settings.py
+++ b/app/settings.py
@@ -89,6 +89,9 @@ DOCUMENTATION_URL = "http://ics-infrastructure.pages.esss.lu.se/csentry/index.ht
 # Shall be set to staging|production|development
 CSENTRY_ENVIRONMENT = "staging"
 
+# Maximum number of elements returned per page by an API call
+MAX_PER_PAGE = 100
+
 AWX_URL = "https://torn.tn.esss.lu.se"
 # AWX dynamic inventory source to update
 # Use the id because resource.update requires a number
diff --git a/docs/api.rst b/docs/api.rst
index 0b0a7bc..300d082 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -51,7 +51,8 @@ Pagination
 ----------
 
 When returning an array, the result is limited to 20 elements per page by default.
-You can pass the `page` (default to 1) and `per_page` (default to 20) parameters to access other elements.
+You can pass the `page` (default: 1) and `per_page` (default:20, max: 100) parameters to access other elements.
+Note that you can't request more than 100 elements per page. You should follow the link headers to get more results.
 
 To list the 30 elements of the second page::
 
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
index 7d7cc00..9976b99 100644
--- a/tests/functional/conftest.py
+++ b/tests/functional/conftest.py
@@ -60,6 +60,7 @@ def app(request):
         },
         "AWX_URL": "https://awx.example.org",
         "ALLOWED_VM_CREATION_DOMAINS": ["lab.example.org"],
+        "MAX_PER_PAGE": 25,
     }
     app = create_app(config=config)
     ctx = app.app_context()
diff --git a/tests/functional/test_api.py b/tests/functional/test_api.py
index 23ca0ed..2b0da71 100644
--- a/tests/functional/test_api.py
+++ b/tests/functional/test_api.py
@@ -1700,3 +1700,70 @@ def test_search_hosts(client, host_factory, readonly_token):
     assert HOST_KEYS == set(r[0].keys())
     assert r[0]["name"] == host1.name
     assert r[0]["description"] == host1.description
+
+
+@pytest.mark.parametrize("endpoint", ["network/hosts", "network/hosts/search"])
+def test_pagination(endpoint, client, host_factory, readonly_token):
+    # MAX_PER_PAGE set to 25 for testing
+    # Create 30 hosts
+    for i in range(30):
+        host_factory()
+    if endpoint == "network/hosts":
+        extra_args = "&recursive=False"
+    else:
+        extra_args = ""
+    # By default 20 hosts per page shall be returned
+    response = get(client, f"{API_URL}/{endpoint}", token=readonly_token)
+    assert response.status_code == 200
+    assert len(response.get_json()) == 20
+    assert response.headers["x-total-count"] == "30"
+    assert (
+        f'{API_URL}/{endpoint}?per_page=20&page=2{extra_args}>; rel="next",'
+        in response.headers["link"]
+    )
+    assert 'rel="prev"' not in response.headers["link"]
+    assert 'rel="first"' not in response.headers["link"]
+    # Get second page (which is last)
+    response = get(
+        client,
+        f"{API_URL}/{endpoint}?per_page=20&page=2{extra_args}",
+        token=readonly_token,
+    )
+    assert response.status_code == 200
+    assert len(response.get_json()) == 10
+    assert (
+        f'{API_URL}/{endpoint}?per_page=20&page=1{extra_args}>; rel="first",'
+        in response.headers["link"]
+    )
+    assert (
+        f'{API_URL}/{endpoint}?per_page=20&page=1{extra_args}>; rel="prev"'
+        in response.headers["link"]
+    )
+    assert 'rel="next"' not in response.headers["link"]
+    assert 'rel="last"' not in response.headers["link"]
+    # Request 10 elements per_page
+    response = get(client, f"{API_URL}/{endpoint}?per_page=10", token=readonly_token)
+    assert response.status_code == 200
+    assert len(response.get_json()) == 10
+    assert response.headers["x-total-count"] == "30"
+    assert (
+        f'{API_URL}/{endpoint}?per_page=10&page=2{extra_args}>; rel="next",'
+        in response.headers["link"]
+    )
+    assert (
+        f'{API_URL}/{endpoint}?per_page=10&page=3{extra_args}>; rel="last"'
+        in response.headers["link"]
+    )
+    # You can't request more than MAX_PER_PAGE elements
+    response = get(client, f"{API_URL}/{endpoint}?per_page=50", token=readonly_token)
+    assert response.status_code == 200
+    assert len(response.get_json()) == 25
+    assert response.headers["x-total-count"] == "30"
+    assert (
+        f'{API_URL}/{endpoint}?per_page=25&page=2{extra_args}>; rel="next",'
+        in response.headers["link"]
+    )
+    assert (
+        f'{API_URL}/{endpoint}?per_page=25&page=2{extra_args}>; rel="last"'
+        in response.headers["link"]
+    )
-- 
GitLab