From 67b2f81578d77e58c8bfc3b61a53ead4f3c43da8 Mon Sep 17 00:00:00 2001
From: Benjamin Bertrand <benjamin.bertrand@esss.se>
Date: Fri, 25 Jan 2019 14:05:52 +0100
Subject: [PATCH] Fix case insensitive search

Search on keyword can be made case insensitive by using a normalizer.
But some fields with type keyword include several words.
They are only returned by entering all words. This is not what
most users expect.

It's better to use the "text" type by default (for text fields) and add
an extra "keyword" field of type "keyword" for sorting.

JIRA INFRA-770
---
 app/models.py                | 63 +++++++++++++++++++-----------------
 app/utils.py                 |  8 +++--
 tests/functional/test_web.py | 30 +++++++++++++++++
 3 files changed, 68 insertions(+), 33 deletions(-)

diff --git a/app/models.py b/app/models.py
index dc89da1..bdbd11f 100644
--- a/app/models.py
+++ b/app/models.py
@@ -607,19 +607,19 @@ class Item(CreatedMixin, SearchableMixin, db.Model):
     __mapping__ = {
         "created_at": {"type": "date", "format": "yyyy-MM-dd HH:mm"},
         "updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm"},
-        "user": {"type": "keyword"},
-        "ics_id": {"type": "keyword"},
-        "serial_number": {"type": "keyword"},
+        "user": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "ics_id": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "serial_number": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
         "quantity": {"type": "long"},
-        "manufacturer": {"type": "keyword"},
-        "model": {"type": "keyword"},
-        "location": {"type": "keyword"},
-        "status": {"type": "keyword"},
-        "parent": {"type": "keyword"},
-        "children": {"type": "keyword"},
-        "macs": {"type": "keyword"},
-        "host": {"type": "keyword"},
-        "stack_member": {"type": "keyword"},
+        "manufacturer": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "model": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "location": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "status": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "parent": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "children": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "macs": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "host": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "stack_member": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
         "history": {"enabled": False},
         "comments": {"type": "text"},
     }
@@ -1090,36 +1090,39 @@ class Host(CreatedMixin, SearchableMixin, db.Model):
     __mapping__ = {
         "created_at": {"type": "date", "format": "yyyy-MM-dd HH:mm"},
         "updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm"},
-        "user": {"type": "keyword"},
-        "name": {"type": "keyword"},
-        "fqdn": {"type": "keyword"},
+        "user": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "name": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "fqdn": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
         "is_ioc": {"type": "boolean"},
-        "device_type": {"type": "keyword"},
-        "model": {"type": "keyword"},
-        "description": {"type": "text"},
-        "items": {"type": "keyword"},
+        "device_type": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "model": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "description": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+        "items": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
         "interfaces": {
             "properties": {
                 "id": {"enabled": False},
                 "created_at": {"type": "date", "format": "yyyy-MM-dd HH:mm"},
                 "updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm"},
-                "user": {"type": "keyword"},
+                "user": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
                 "is_main": {"type": "boolean"},
-                "network": {"type": "keyword"},
+                "network": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
                 "ip": {"type": "ip"},
                 "netmask": {"enabled": False},
-                "name": {"type": "keyword"},
-                "mac": {"type": "keyword"},
-                "host": {"type": "keyword"},
-                "cnames": {"type": "keyword"},
-                "domain": {"type": "keyword"},
-                "tags": {"type": "keyword"},
-                "device_type": {"type": "keyword"},
-                "model": {"type": "keyword"},
+                "name": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+                "mac": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+                "host": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+                "cnames": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+                "domain": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+                "tags": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
+                "device_type": {
+                    "type": "text",
+                    "fields": {"keyword": {"type": "keyword"}},
+                },
+                "model": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
             }
         },
         "ansible_vars": {"enabled": False},
-        "ansible_groups": {"type": "keyword"},
+        "ansible_groups": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
     }
 
     # id shall be defined here to be used by SQLAlchemy-Continuum
diff --git a/app/utils.py b/app/utils.py
index 7362ca1..b05c883 100644
--- a/app/utils.py
+++ b/app/utils.py
@@ -311,9 +311,11 @@ def retrieve_data_for_datatables(values, model):
         order_dir = values.get("order[0][dir]", "asc")
         # Sorting can be done directly on all fields of type
         # keyword/date/long
-        # If we want to sort on fields of type text, we should
-        # add an extra .keyword field!
-        sort = f"{name}:{order_dir}"
+        # To sort on fields of type text, we use the extra .keyword field
+        if name in ("created_at", "updated_at", "quantity"):
+            sort = f"{name}:{order_dir}"
+        else:
+            sort = f"{name}.keyword:{order_dir}"
     instances, nb_filtered = model.search(
         search, page=page, per_page=per_page, sort=sort
     )
diff --git a/tests/functional/test_web.py b/tests/functional/test_web.py
index 1343c61..757e5e9 100644
--- a/tests/functional/test_web.py
+++ b/tests/functional/test_web.py
@@ -180,6 +180,36 @@ def test_retrieve_items_sort(logged_client, item_factory):
     ]
 
 
+def test_retrieve_items_case_insensitive(logged_client, model_factory, item_factory):
+    juniper_model = model_factory(name="Juniper")
+    item_factory(serial_number="BBB001", model=juniper_model)
+    item_factory(serial_number="ABB042")
+    response = logged_client.post(
+        "/inventory/_retrieve_items",
+        data={"draw": "50", "length": 20, "start": 0, "search[value]": "juniper"},
+    )
+    r = response.get_json()
+    assert r["recordsTotal"] == 2
+    assert r["recordsFiltered"] == 1
+    assert len(r["data"]) == 1
+    assert r["data"][0]["model"] == "Juniper"
+
+
+def test_retrieve_items_one_word(logged_client, manufacturer_factory, item_factory):
+    manufacturer = manufacturer_factory(name="Concurrent Technologies")
+    item_factory(serial_number="AAA001", manufacturer=manufacturer)
+    item_factory(serial_number="ABB042")
+    response = logged_client.post(
+        "/inventory/_retrieve_items",
+        data={"draw": "50", "length": 20, "start": 0, "search[value]": "concurrent"},
+    )
+    r = response.get_json()
+    assert r["recordsTotal"] == 2
+    assert r["recordsFiltered"] == 1
+    assert len(r["data"]) == 1
+    assert r["data"][0]["manufacturer"] == "Concurrent Technologies"
+
+
 def test_generate_random_mac(logged_client):
     response = logged_client.get("/network/_generate_random_mac")
     mac = response.get_json()["data"]["mac"]
-- 
GitLab