Source code for redis.commands.search.commands

import itertools
import time

from ..helpers import parse_to_dict
from ._util import to_string
from .aggregation import AggregateRequest, AggregateResult, Cursor
from .document import Document
from .query import Query
from .result import Result
from .suggestion import SuggestionParser

NUMERIC = "NUMERIC"

CREATE_CMD = "FT.CREATE"
ALTER_CMD = "FT.ALTER"
SEARCH_CMD = "FT.SEARCH"
ADD_CMD = "FT.ADD"
ADDHASH_CMD = "FT.ADDHASH"
DROP_CMD = "FT.DROP"
EXPLAIN_CMD = "FT.EXPLAIN"
EXPLAINCLI_CMD = "FT.EXPLAINCLI"
DEL_CMD = "FT.DEL"
AGGREGATE_CMD = "FT.AGGREGATE"
PROFILE_CMD = "FT.PROFILE"
CURSOR_CMD = "FT.CURSOR"
SPELLCHECK_CMD = "FT.SPELLCHECK"
DICT_ADD_CMD = "FT.DICTADD"
DICT_DEL_CMD = "FT.DICTDEL"
DICT_DUMP_CMD = "FT.DICTDUMP"
GET_CMD = "FT.GET"
MGET_CMD = "FT.MGET"
CONFIG_CMD = "FT.CONFIG"
TAGVALS_CMD = "FT.TAGVALS"
ALIAS_ADD_CMD = "FT.ALIASADD"
ALIAS_UPDATE_CMD = "FT.ALIASUPDATE"
ALIAS_DEL_CMD = "FT.ALIASDEL"
INFO_CMD = "FT.INFO"
SUGADD_COMMAND = "FT.SUGADD"
SUGDEL_COMMAND = "FT.SUGDEL"
SUGLEN_COMMAND = "FT.SUGLEN"
SUGGET_COMMAND = "FT.SUGGET"
SYNUPDATE_CMD = "FT.SYNUPDATE"
SYNDUMP_CMD = "FT.SYNDUMP"

NOOFFSETS = "NOOFFSETS"
NOFIELDS = "NOFIELDS"
NOHL = "NOHL"
NOFREQS = "NOFREQS"
MAXTEXTFIELDS = "MAXTEXTFIELDS"
TEMPORARY = "TEMPORARY"
STOPWORDS = "STOPWORDS"
SKIPINITIALSCAN = "SKIPINITIALSCAN"
WITHSCORES = "WITHSCORES"
FUZZY = "FUZZY"
WITHPAYLOADS = "WITHPAYLOADS"


[docs]class SearchCommands: """Search commands."""
[docs] def batch_indexer(self, chunk_size=100): """ Create a new batch indexer from the client with a given chunk size """ return self.BatchIndexer(self, chunk_size=chunk_size)
[docs] def create_index( self, fields, no_term_offsets=False, no_field_flags=False, stopwords=None, definition=None, max_text_fields=False, temporary=None, no_highlight=False, no_term_frequencies=False, skip_initial_scan=False, ): """ Create the search index. The index must not already exist. ### Parameters: - **fields**: a list of TextField or NumericField objects - **no_term_offsets**: If true, we will not save term offsets in the index - **no_field_flags**: If true, we will not save field flags that allow searching in specific fields - **stopwords**: If not None, we create the index with this custom stopword list. The list can be empty - **max_text_fields**: If true, we will encode indexes as if there were more than 32 text fields which allows you to add additional fields (beyond 32). - **temporary**: Create a lightweight temporary index which will expire after the specified period of inactivity (in seconds). The internal idle timer is reset whenever the index is searched or added to. - **no_highlight**: If true, disabling highlighting support. Also implied by no_term_offsets. - **no_term_frequencies**: If true, we avoid saving the term frequencies in the index. - **skip_initial_scan**: If true, we do not scan and index. For more information: https://oss.redis.com/redisearch/Commands/#ftcreate """ # noqa args = [CREATE_CMD, self.index_name] if definition is not None: args += definition.args if max_text_fields: args.append(MAXTEXTFIELDS) if temporary is not None and isinstance(temporary, int): args.append(TEMPORARY) args.append(temporary) if no_term_offsets: args.append(NOOFFSETS) if no_highlight: args.append(NOHL) if no_field_flags: args.append(NOFIELDS) if no_term_frequencies: args.append(NOFREQS) if skip_initial_scan: args.append(SKIPINITIALSCAN) if stopwords is not None and isinstance(stopwords, (list, tuple, set)): args += [STOPWORDS, len(stopwords)] if len(stopwords) > 0: args += list(stopwords) args.append("SCHEMA") try: args += list(itertools.chain(*(f.redis_args() for f in fields))) except TypeError: args += fields.redis_args() return self.execute_command(*args)
[docs] def alter_schema_add(self, fields): """ Alter the existing search index by adding new fields. The index must already exist. ### Parameters: - **fields**: a list of Field objects to add for the index For more information: https://oss.redis.com/redisearch/Commands/#ftalter_schema_add """ # noqa args = [ALTER_CMD, self.index_name, "SCHEMA", "ADD"] try: args += list(itertools.chain(*(f.redis_args() for f in fields))) except TypeError: args += fields.redis_args() return self.execute_command(*args)
[docs] def dropindex(self, delete_documents=False): """ Drop the index if it exists. Replaced `drop_index` in RediSearch 2.0. Default behavior was changed to not delete the indexed documents. ### Parameters: - **delete_documents**: If `True`, all documents will be deleted. For more information: https://oss.redis.com/redisearch/Commands/#ftdropindex """ # noqa keep_str = "" if delete_documents else "KEEPDOCS" return self.execute_command(DROP_CMD, self.index_name, keep_str)
def _add_document( self, doc_id, conn=None, nosave=False, score=1.0, payload=None, replace=False, partial=False, language=None, no_create=False, **fields, ): """ Internal add_document used for both batch and single doc indexing """ if conn is None: conn = self.client if partial or no_create: replace = True args = [ADD_CMD, self.index_name, doc_id, score] if nosave: args.append("NOSAVE") if payload is not None: args.append("PAYLOAD") args.append(payload) if replace: args.append("REPLACE") if partial: args.append("PARTIAL") if no_create: args.append("NOCREATE") if language: args += ["LANGUAGE", language] args.append("FIELDS") args += list(itertools.chain(*fields.items())) return conn.execute_command(*args) def _add_document_hash( self, doc_id, conn=None, score=1.0, language=None, replace=False, ): """ Internal add_document_hash used for both batch and single doc indexing """ if conn is None: conn = self.client args = [ADDHASH_CMD, self.index_name, doc_id, score] if replace: args.append("REPLACE") if language: args += ["LANGUAGE", language] return conn.execute_command(*args)
[docs] def add_document( self, doc_id, nosave=False, score=1.0, payload=None, replace=False, partial=False, language=None, no_create=False, **fields, ): """ Add a single document to the index. ### Parameters - **doc_id**: the id of the saved document. - **nosave**: if set to true, we just index the document, and don't save a copy of it. This means that searches will just return ids. - **score**: the document ranking, between 0.0 and 1.0 - **payload**: optional inner-index payload we can save for fast i access in scoring functions - **replace**: if True, and the document already is in the index, we perform an update and reindex the document - **partial**: if True, the fields specified will be added to the existing document. This has the added benefit that any fields specified with `no_index` will not be reindexed again. Implies `replace` - **language**: Specify the language used for document tokenization. - **no_create**: if True, the document is only updated and reindexed if it already exists. If the document does not exist, an error will be returned. Implies `replace` - **fields** kwargs dictionary of the document fields to be saved and/or indexed. NOTE: Geo points shoule be encoded as strings of "lon,lat" For more information: https://oss.redis.com/redisearch/Commands/#ftadd """ # noqa return self._add_document( doc_id, conn=None, nosave=nosave, score=score, payload=payload, replace=replace, partial=partial, language=language, no_create=no_create, **fields, )
[docs] def add_document_hash( self, doc_id, score=1.0, language=None, replace=False, ): """ Add a hash document to the index. ### Parameters - **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs. - **score**: the document ranking, between 0.0 and 1.0 - **replace**: if True, and the document already is in the index, we perform an update and reindex the document - **language**: Specify the language used for document tokenization. For more information: https://oss.redis.com/redisearch/Commands/#ftaddhash """ # noqa return self._add_document_hash( doc_id, conn=None, score=score, language=language, replace=replace, )
[docs] def delete_document(self, doc_id, conn=None, delete_actual_document=False): """ Delete a document from index Returns 1 if the document was deleted, 0 if not ### Parameters - **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index For more information: https://oss.redis.com/redisearch/Commands/#ftdel """ # noqa args = [DEL_CMD, self.index_name, doc_id] if conn is None: conn = self.client if delete_actual_document: args.append("DD") return conn.execute_command(*args)
[docs] def load_document(self, id): """ Load a single document by id """ fields = self.client.hgetall(id) f2 = {to_string(k): to_string(v) for k, v in fields.items()} fields = f2 try: del fields["id"] except KeyError: pass return Document(id=id, **fields)
[docs] def get(self, *ids): """ Returns the full contents of multiple documents. ### Parameters - **ids**: the ids of the saved documents. For more information https://oss.redis.com/redisearch/Commands/#ftget """ return self.client.execute_command(MGET_CMD, self.index_name, *ids)
[docs] def info(self): """ Get info an stats about the the current index, including the number of documents, memory consumption, etc For more information https://oss.redis.com/redisearch/Commands/#ftinfo """ res = self.client.execute_command(INFO_CMD, self.index_name) it = map(to_string, res) return dict(zip(it, it))
def _mk_query_args(self, query): args = [self.index_name] if isinstance(query, str): # convert the query from a text to a query object query = Query(query) if not isinstance(query, Query): raise ValueError(f"Bad query type {type(query)}") args += query.get_args() return args, query
[docs] def search(self, query): """ Search the index for a given query, and return a result of documents ### Parameters - **query**: the search query. Either a text for simple queries with default parameters, or a Query object for complex queries. See RediSearch's documentation on query format For more information: https://oss.redis.com/redisearch/Commands/#ftsearch """ # noqa args, query = self._mk_query_args(query) st = time.time() res = self.execute_command(SEARCH_CMD, *args) return Result( res, not query._no_content, duration=(time.time() - st) * 1000.0, has_payload=query._with_payloads, with_scores=query._with_scores, )
[docs] def explain(self, query): """Returns the execution plan for a complex query. For more information: https://oss.redis.com/redisearch/Commands/#ftexplain """ # noqa args, query_text = self._mk_query_args(query) return self.execute_command(EXPLAIN_CMD, *args)
def explain_cli(self, query): # noqa raise NotImplementedError("EXPLAINCLI will not be implemented.")
[docs] def aggregate(self, query): """ Issue an aggregation query. ### Parameters **query**: This can be either an `AggregateRequest`, or a `Cursor` An `AggregateResult` object is returned. You can access the rows from its `rows` property, which will always yield the rows of the result. Fpr more information: https://oss.redis.com/redisearch/Commands/#ftaggregate """ # noqa if isinstance(query, AggregateRequest): has_cursor = bool(query._cursor) cmd = [AGGREGATE_CMD, self.index_name] + query.build_args() elif isinstance(query, Cursor): has_cursor = True cmd = [CURSOR_CMD, "READ", self.index_name] + query.build_args() else: raise ValueError("Bad query", query) raw = self.execute_command(*cmd) return self._get_AggregateResult(raw, query, has_cursor)
def _get_AggregateResult(self, raw, query, has_cursor): if has_cursor: if isinstance(query, Cursor): query.cid = raw[1] cursor = query else: cursor = Cursor(raw[1]) raw = raw[0] else: cursor = None if isinstance(query, AggregateRequest) and query._with_schema: schema = raw[0] rows = raw[2:] else: schema = None rows = raw[1:] return AggregateResult(rows, cursor, schema)
[docs] def profile(self, query, limited=False): """ Performs a search or aggregate command and collects performance information. ### Parameters **query**: This can be either an `AggregateRequest`, `Query` or string. **limited**: If set to True, removes details of reader iterator. """ st = time.time() cmd = [PROFILE_CMD, self.index_name, ""] if limited: cmd.append("LIMITED") cmd.append("QUERY") if isinstance(query, AggregateRequest): cmd[2] = "AGGREGATE" cmd += query.build_args() elif isinstance(query, Query): cmd[2] = "SEARCH" cmd += query.get_args() else: raise ValueError("Must provide AggregateRequest object or " "Query object.") res = self.execute_command(*cmd) if isinstance(query, AggregateRequest): result = self._get_AggregateResult(res[0], query, query._cursor) else: result = Result( res[0], not query._no_content, duration=(time.time() - st) * 1000.0, has_payload=query._with_payloads, with_scores=query._with_scores, ) return result, parse_to_dict(res[1])
[docs] def spellcheck(self, query, distance=None, include=None, exclude=None): """ Issue a spellcheck query ### Parameters **query**: search query. **distance***: the maximal Levenshtein distance for spelling suggestions (default: 1, max: 4). **include**: specifies an inclusion custom dictionary. **exclude**: specifies an exclusion custom dictionary. For more information: https://oss.redis.com/redisearch/Commands/#ftspellcheck """ # noqa cmd = [SPELLCHECK_CMD, self.index_name, query] if distance: cmd.extend(["DISTANCE", distance]) if include: cmd.extend(["TERMS", "INCLUDE", include]) if exclude: cmd.extend(["TERMS", "EXCLUDE", exclude]) raw = self.execute_command(*cmd) corrections = {} if raw == 0: return corrections for _correction in raw: if isinstance(_correction, int) and _correction == 0: continue if len(_correction) != 3: continue if not _correction[2]: continue if not _correction[2][0]: continue # For spellcheck output # 1) 1) "TERM" # 2) "{term1}" # 3) 1) 1) "{score1}" # 2) "{suggestion1}" # 2) 1) "{score2}" # 2) "{suggestion2}" # # Following dictionary will be made # corrections = { # '{term1}': [ # {'score': '{score1}', 'suggestion': '{suggestion1}'}, # {'score': '{score2}', 'suggestion': '{suggestion2}'} # ] # } corrections[_correction[1]] = [ {"score": _item[0], "suggestion": _item[1]} for _item in _correction[2] ] return corrections
[docs] def dict_add(self, name, *terms): """Adds terms to a dictionary. ### Parameters - **name**: Dictionary name. - **terms**: List of items for adding to the dictionary. For more information: https://oss.redis.com/redisearch/Commands/#ftdictadd """ # noqa cmd = [DICT_ADD_CMD, name] cmd.extend(terms) return self.execute_command(*cmd)
[docs] def dict_del(self, name, *terms): """Deletes terms from a dictionary. ### Parameters - **name**: Dictionary name. - **terms**: List of items for removing from the dictionary. For more information: https://oss.redis.com/redisearch/Commands/#ftdictdel """ # noqa cmd = [DICT_DEL_CMD, name] cmd.extend(terms) return self.execute_command(*cmd)
[docs] def dict_dump(self, name): """Dumps all terms in the given dictionary. ### Parameters - **name**: Dictionary name. For more information: https://oss.redis.com/redisearch/Commands/#ftdictdump """ # noqa cmd = [DICT_DUMP_CMD, name] return self.execute_command(*cmd)
[docs] def config_set(self, option, value): """Set runtime configuration option. ### Parameters - **option**: the name of the configuration option. - **value**: a value for the configuration option. For more information: https://oss.redis.com/redisearch/Commands/#ftconfig """ # noqa cmd = [CONFIG_CMD, "SET", option, value] raw = self.execute_command(*cmd) return raw == "OK"
[docs] def config_get(self, option): """Get runtime configuration option value. ### Parameters - **option**: the name of the configuration option. For more information: https://oss.redis.com/redisearch/Commands/#ftconfig """ # noqa cmd = [CONFIG_CMD, "GET", option] res = {} raw = self.execute_command(*cmd) if raw: for kvs in raw: res[kvs[0]] = kvs[1] return res
[docs] def tagvals(self, tagfield): """ Return a list of all possible tag values ### Parameters - **tagfield**: Tag field name For more information: https://oss.redis.com/redisearch/Commands/#fttagvals """ # noqa return self.execute_command(TAGVALS_CMD, self.index_name, tagfield)
[docs] def aliasadd(self, alias): """ Alias a search index - will fail if alias already exists ### Parameters - **alias**: Name of the alias to create For more information: https://oss.redis.com/redisearch/Commands/#ftaliasadd """ # noqa return self.execute_command(ALIAS_ADD_CMD, alias, self.index_name)
[docs] def aliasupdate(self, alias): """ Updates an alias - will fail if alias does not already exist ### Parameters - **alias**: Name of the alias to create For more information: https://oss.redis.com/redisearch/Commands/#ftaliasupdate """ # noqa return self.execute_command(ALIAS_UPDATE_CMD, alias, self.index_name)
[docs] def aliasdel(self, alias): """ Removes an alias to a search index ### Parameters - **alias**: Name of the alias to delete For more information: https://oss.redis.com/redisearch/Commands/#ftaliasdel """ # noqa return self.execute_command(ALIAS_DEL_CMD, alias)
[docs] def sugadd(self, key, *suggestions, **kwargs): """ Add suggestion terms to the AutoCompleter engine. Each suggestion has a score and string. If kwargs["increment"] is true and the terms are already in the server's dictionary, we increment their scores. For more information: https://oss.redis.com/redisearch/master/Commands/#ftsugadd """ # noqa # If Transaction is not False it will MULTI/EXEC which will error pipe = self.pipeline(transaction=False) for sug in suggestions: args = [SUGADD_COMMAND, key, sug.string, sug.score] if kwargs.get("increment"): args.append("INCR") if sug.payload: args.append("PAYLOAD") args.append(sug.payload) pipe.execute_command(*args) return pipe.execute()[-1]
[docs] def suglen(self, key): """ Return the number of entries in the AutoCompleter index. For more information https://oss.redis.com/redisearch/master/Commands/#ftsuglen """ # noqa return self.execute_command(SUGLEN_COMMAND, key)
[docs] def sugdel(self, key, string): """ Delete a string from the AutoCompleter index. Returns 1 if the string was found and deleted, 0 otherwise. For more information: https://oss.redis.com/redisearch/master/Commands/#ftsugdel """ # noqa return self.execute_command(SUGDEL_COMMAND, key, string)
[docs] def sugget( self, key, prefix, fuzzy=False, num=10, with_scores=False, with_payloads=False ): """ Get a list of suggestions from the AutoCompleter, for a given prefix. Parameters: prefix : str The prefix we are searching. **Must be valid ascii or utf-8** fuzzy : bool If set to true, the prefix search is done in fuzzy mode. **NOTE**: Running fuzzy searches on short (<3 letters) prefixes can be very slow, and even scan the entire index. with_scores : bool If set to true, we also return the (refactored) score of each suggestion. This is normally not needed, and is NOT the original score inserted into the index. with_payloads : bool Return suggestion payloads num : int The maximum number of results we return. Note that we might return less. The algorithm trims irrelevant suggestions. Returns: list: A list of Suggestion objects. If with_scores was False, the score of all suggestions is 1. For more information: https://oss.redis.com/redisearch/master/Commands/#ftsugget """ # noqa args = [SUGGET_COMMAND, key, prefix, "MAX", num] if fuzzy: args.append(FUZZY) if with_scores: args.append(WITHSCORES) if with_payloads: args.append(WITHPAYLOADS) ret = self.execute_command(*args) results = [] if not ret: return results parser = SuggestionParser(with_scores, with_payloads, ret) return [s for s in parser]
[docs] def synupdate(self, groupid, skipinitial=False, *terms): """ Updates a synonym group. The command is used to create or update a synonym group with additional terms. Only documents which were indexed after the update will be affected. Parameters: groupid : Synonym group id. skipinitial : bool If set to true, we do not scan and index. terms : The terms. For more information: https://oss.redis.com/redisearch/Commands/#ftsynupdate """ # noqa cmd = [SYNUPDATE_CMD, self.index_name, groupid] if skipinitial: cmd.extend(["SKIPINITIALSCAN"]) cmd.extend(terms) return self.execute_command(*cmd)
[docs] def syndump(self): """ Dumps the contents of a synonym group. The command is used to dump the synonyms data structure. Returns a list of synonym terms and their synonym group ids. For more information: https://oss.redis.com/redisearch/Commands/#ftsyndump """ # noqa raw = self.execute_command(SYNDUMP_CMD, self.index_name) return {raw[i]: raw[i + 1] for i in range(0, len(raw), 2)}