search_query package

Subpackages

Submodules

search_query.cli module

CLI for search-query.

search_query.cli.lint() None

Main entrypoint for the query linter hook

search_query.cli.translate() None

Main entrypoint for the query translation CLI

search_query.constants module

Constants for search-query

class search_query.constants.Colors

Bases: object

Colors for CLI printing

BLUE = '\x1b[94m'
END = '\x1b[0m'
GREEN = '\x1b[92m'
GREY = '\x1b[90m'
ORANGE = '\x1b[93m'
RED = '\x1b[91m'
class search_query.constants.ExitCodes

Bases: object

Exit codes

FAIL = 1
SUCCESS = 0
class search_query.constants.Fields

Bases: object

Search fields

ABSTRACT = 'abstract'
ACCESSION_NUMBER = 'accession-nr'
ADDRESS = 'address'
AFFILIATION = 'affiliation'
ALL = 'all-fields'
AUTHOR = 'author'
AUTHOR_IDENTIFIERS = 'author-identifiers'
AUTHOR_KEYWORDS = 'keywords-author'
CITY = 'city'
CONFERENCE = 'conference'
COUNTRY_REGION = 'country-region'
DESCRIPTORS = 'descriptors'
DOI = 'doi'
EDITOR = 'editor'
FILTER = 'sb'
FUNDING_AGENCY = 'funding-agency'
FUNDING_TEXT = 'funding-text'
GRANT_NUMBER = 'grant-nr'
GROUP_AUTHOR = 'group-author'
ISBN = 'isbn'
ISSN = 'issn'
ISSN_ISBN = 'issn-isbn'
JOURNAL = 'journal'
KEYWORDS = 'keywords'
KEYWORDS_PLUS = 'keywords-plus'
LANGUAGE = 'language'
MESH_TERM = 'mesh-term'
ORGANIZATION = 'organization'
ORGANIZATION_ENHANCED = 'organization-enhanced'
PROVINCE_STATE = 'province-state'
PUBLICATION_NAME = 'publication-name'
PUBLICATION_TYPE = 'publication-type'
PUBLISHER = 'publisher'
PUBMED_ID = 'pmid'
RESEARCH_AREA = 'research-area'
SOURCE = 'source'
STREET_ADDRESS = 'street-address'
SUBJECT_TERMS = 'subject-terms'
SUBORGANIZATION = 'suborganization'
TEXT_WORD = 'text-word'
TITLE = 'title'
TOPIC = 'topic'
WEB_OF_SCIENCE_CATEGORY = 'wos-category'
YEAR_PUBLICATION = 'year-publication'
ZIP_POSTAL_CODE = 'zip'
classmethod all() list

Return all fields as a list.

class search_query.constants.LinterMode

Bases: object

Linter mode

NONSTRICT = 'non-strict'
STRICT = 'strict'
class search_query.constants.ListToken(value: str, type: OperatorNodeTokenTypes, level: int, position: Tuple[int, int])

Bases: object

Token class

level: int
position: Tuple[int, int]
type: OperatorNodeTokenTypes
value: str
class search_query.constants.ListTokenTypes(value)

Bases: Enum

List token types

OPERATOR_NODE = 'OPERATOR_NODE'
QUERY_NODE = 'QUERY_NODE'
class search_query.constants.OperatorNodeTokenTypes(value)

Bases: Enum

Operator node token types (list queries)

LIST_ITEM_REFERENCE = 'LIST_ITEM_REFERENCE'
LOGIC_OPERATOR = 'LOGIC_OPERATOR'
UNKNOWN = 'UNKNOWN'
class search_query.constants.Operators

Bases: object

AND = 'AND'
NEAR = 'NEAR'
NOT = 'NOT'
OR = 'OR'
RANGE = 'RANGE'
WITHIN = 'WITHIN'
class search_query.constants.PLATFORM(value)

Bases: Enum

Database identifier

EBSCO = 'ebscohost'
GENERIC = 'generic'
PRE_NOTATION = 'pre_notation'
PUBMED = 'pubmed'
STRUCTURED = 'structured'
WOS = 'wos'
class search_query.constants.QueryErrorCode(value)

Bases: Enum

Error codes for the query parser

BOOLEAN_OPERATOR_READABILITY = (['all'], 'W0009', 'boolean-operator-readability', 'Boolean operator readability', '')
CHARACTER_REPLACEMENT = ([PLATFORM.PUBMED], 'W0010', 'character-replacement', 'Character replacement', '')
DATE_FILTER_IN_SUBQUERY = ([PLATFORM.PUBMED], 'W0011', 'date-filter-in-subquery', 'Date filter in subquery', '')
DOI_FORMAT_INVALID = ([PLATFORM.WOS], 'F2009', 'doi-format-invalid', 'Invalid DOI format.', '')
EMPTY_PARENTHESES = ([PLATFORM.PUBMED], 'F1009', 'empty-parentheses', 'Query contains empty parentheses', '')
IMPLICIT_NEAR_VALUE = ([PLATFORM.WOS], 'W0006', 'implicit-near-value', 'The value of NEAR operator is implicit', '**Typical fix**: The parser automatically sets NEAR values to 15 (default).\n\n**Problematic query**:\n\n.. code-block:: python\n\n    A NEAR B\n\n**Correct query**:\n\n.. code-block:: python\n\n    A NEAR/15 B')
IMPLICIT_OPERATOR = ([PLATFORM.PUBMED], 'W0012', 'implicit-operator', 'Implicit operator', '')
IMPLICIT_PRECEDENCE = (['all', PLATFORM.PUBMED], 'W0007', 'implicit-precedence', 'Operator changed at the same level (explicit parentheses are recommended)', '')
INVALID_CHARACTER = ([PLATFORM.PUBMED], 'E0004', 'invalid-character', 'Search term contains invalid character', '')
INVALID_LIST_REFERENCE = ([PLATFORM.WOS, PLATFORM.PUBMED], 'F3003', 'invalid-list-reference', 'Invalid list reference in list query', '')
INVALID_PROXIMITY_USE = ([PLATFORM.PUBMED, PLATFORM.EBSCO], 'E0005', 'invalid-proximity-use', 'Invalid use of the proximity operator', '')
INVALID_SYNTAX = (['all'], 'F1010', 'invalid-syntax', 'Query contains invalid syntax', '')
INVALID_TOKEN_SEQUENCE = ([PLATFORM.EBSCO], 'F1004', 'invalid-token-sequence', 'The sequence of tokens is invalid.', '')
INVALID_WILDCARD_USE = ([PLATFORM.PUBMED], 'E0006', 'invalid-wildcard-use', 'Invalid use of the wildcard operator *', '')
ISBN_FORMAT_INVALID = ([PLATFORM.WOS], 'F2008', 'isbn-format-invalid', 'Invalid ISBN format.', '')
JOURNAL_FILTER_IN_SUBQUERY = ([PLATFORM.PUBMED], 'W0014', 'journal-filter-in-subquery', 'Journal (or publication name) filter in subquery', '')
MISSING_OPERATOR_NODES = ([PLATFORM.WOS], 'F3002', 'missing-operator-nodes', 'List format query without operator nodes', '')
MISSING_ROOT_NODE = ([PLATFORM.WOS], 'F3001', 'missing-root-node', 'List format query without root node (typically containing operators)', '')
NEAR_DISTANCE_TOO_LARGE = ([PLATFORM.WOS], 'F2007', 'near-distance-too-large', 'NEAR distance is too large (max: 15).', '')
NESTED_QUERY_WITH_SEARCH_FIELD = ([PLATFORM.PUBMED], 'F2013', 'nested-query-with-search-field', 'A Nested query cannot have a search field.', '')
NON_STANDARD_QUOTES = (['all'], 'W0013', 'non-standard-quotes', 'Non-standard quotes', '')
OPERATOR_CAPITALIZATION = (['all'], 'W0005', 'operator-capitalization', 'Operators should be capitalized', '**Typical fix**: Capitalize the operator\n\n\n**Problematic query**:\n\n.. code-block:: python\n\n    a and b or c\n\n**Correct query**:\n\n.. code-block:: python\n\n    a AND b OR c')
QUERY_IN_QUOTES = ([PLATFORM.WOS], 'E0008', 'query-in-quotes', 'The whole Search string is in quotes.', '')
QUERY_STARTS_WITH_PLATFORM_IDENTIFIER = ([PLATFORM.WOS], 'E0007', 'query-starts-with-platform-identifier', 'Query starts with platform identifier', '')
QUERY_STRUCTURE_COMPLEX = (['all'], 'W0004', 'query-structure-unnecessarily-complex', 'Query structure is more complex than necessary', '')
SEARCH_FIELD_CONTRADICTION = (['all'], 'E0002', 'search-field-contradiction', 'Contradictory search fields specified', '')
SEARCH_FIELD_EXTRACTED = (['all'], 'W0002', 'search-field-extracted', 'Recommend explicitly specifying the search field in the string', '')
SEARCH_FIELD_MISSING = (['all'], 'E0001', 'search-field-missing', 'Expected search field is missing', '')
SEARCH_FIELD_REDUNDANT = (['all'], 'W0001', 'search-field-redundant', 'Recommend specifying search field only once in the search string', '')
SEARCH_FIELD_UNSUPPORTED = (['all', PLATFORM.WOS], 'F2011', 'search-field-unsupported', 'Search field is not supported for this database', '')
TOKENIZING_FAILED = (['all'], 'F0001', 'tokenizing-failed', 'Fatal error during tokenization', '')
TOKEN_AMBIGUITY = (['all'], 'W0008', 'token-ambiguity', 'Token ambiguity', '')
TOO_MANY_OPERATORS = ([PLATFORM.WOS], 'F1011', 'too-many-operators', 'Too many operators in the query', '')
TOO_MANY_SEARCH_TERMS = ([PLATFORM.WOS], 'F1012', 'too-many-search-terms', 'Too many search terms in the query', '')
UNBALANCED_PARENTHESES = (['all'], 'F1001', 'unbalanced-parentheses', 'Parentheses are unbalanced in the query', '**Typical fix**: Check the parentheses in the query\n\n**Problematic query**:\n\n.. code-block:: python\n\n    (a AND b OR c\n\n**Correct query**:\n\n.. code-block:: python\n\n    (a AND b) OR c')
UNBALANCED_QUOTES = (['all'], 'F1002', 'unbalanced-quotes', 'Quotes are unbalanced in the query', '')
UNSUPPORTED_PREFIX = ([PLATFORM.PUBMED], 'W0015', 'unsupported-prefix', 'Unsupported prefix in search query', '')
UNSUPPORTED_SUFFIX = ([PLATFORM.PUBMED], 'W0016', 'unsupported-suffix', 'Unsupported suffix in search query', '')
WILDCARD_AFTER_SPECIAL_CHAR = ([PLATFORM.WOS], 'F2005', 'wildcard-after-special-char', 'Wildcard cannot be preceded by special characters.', '')
WILDCARD_IN_YEAR = ([PLATFORM.WOS], 'F2002', 'wildcard-in-year', 'Wildcard characters (*, ?, $) not supported in year search.', '**Typical fix**: Replace with year range.\n\n**Problematic query**:\n\n.. code-block:: python\n\n    A AND year=201*\n\n**Correct query**:\n\n.. code-block:: python\n\n    A AND (year >= 2010 AND year < 2020)')
WILDCARD_LEFT_SHORT_LENGTH = ([PLATFORM.WOS], 'F2004', 'wildcard-left-short-length', 'Left-hand wildcard must be preceded by at least three characters.', '')
WILDCARD_RIGHT_SHORT_LENGTH = ([PLATFORM.WOS], 'F2003', 'wildcard-right-short-length', 'Right-hand wildcard must preceded by at least three characters.', '')
WILDCARD_STANDALONE = ([PLATFORM.WOS], 'F2006', 'wildcard-standalone', 'Wildcard cannot be standalone.', '')
WILDCARD_UNSUPPORTED = ([PLATFORM.WOS], 'F2001', 'wildcard-unsupported', 'Unsupported wildcard in search string.', '')
YEAR_FORMAT_INVALID = ([PLATFORM.WOS], 'F2014', 'year-format-invalid', 'Invalid year format.', '')
YEAR_SPAN_VIOLATION = ([PLATFORM.WOS], 'F2010', 'year-span-violation', 'Year span must be five or less.', '**Typical fix**: The parser automatically sets the year span to 5.\n\n**Problematic query**:\n\n.. code-block:: python\n\n    A AND PY=2000-2020\n\n**Correct query**:\n\n.. code-block:: python\n\n    A AND PY=2015-2020')
YEAR_WITHOUT_SEARCH_TERMS = ([PLATFORM.WOS], 'F2012', 'year-without-search-terms', 'A search for publication years must include at least another search term.', '**Typical fix**: A search for publication years must include at least another search term.\n\n**Problematic query**:\n\n.. code-block:: python\n\n    PY=2000\n\n**Correct query**:\n\n.. code-block:: python\n\n    PY=2000 AND TI=eHealth')
is_error() bool

Check if error is an error

is_fatal() bool

Check if error is fatal

is_warning() bool

Check if error is a warning

class search_query.constants.SearchField(value: str, *, position: Tuple[int, int] | None = None)

Bases: object

SearchField class.

copy() SearchField

Return a copy of the SearchField instance.

class search_query.constants.Token(value: str, type: TokenTypes, position: Tuple[int, int])

Bases: object

Token class

is_operator() bool

Check if token is an operator

position: Tuple[int, int]
type: TokenTypes
value: str
class search_query.constants.TokenTypes(value)

Bases: Enum

Token types

FIELD = 'FIELD'
LOGIC_OPERATOR = 'LOGIC_OPERATOR'
PARENTHESIS_CLOSED = 'PARENTHESIS_CLOSED'
PARENTHESIS_OPEN = 'PARENTHESIS_OPEN'
PROXIMITY_OPERATOR = 'PROXIMITY_OPERATOR'
RANGE_OPERATOR = 'RANGE_OPERATOR'
SEARCH_TERM = 'SEARCH_TERM'
UNKNOWN = 'UNKNOWN'

search_query.database module

Database and filters.

search_query.database.list_queries() List[str]

List all available predefined query identifiers (without .json).

search_query.database.list_queries_with_details() dict

List all available queries.

search_query.database.load_query(name: str) Query

Load a query object from JSON by name.

search_query.database_queries module

Database and filters.

search_query.exception module

Exceptions of SearchQuery.

exception search_query.exception.ListQuerySyntaxError(linter: QueryListLinter)

Bases: SearchQueryException

ListQuerySyntaxError Exception

exception search_query.exception.QuerySyntaxError(linter: QueryStringLinter)

Bases: SearchQueryException

QuerySyntaxError Exception

exception search_query.exception.SearchQueryException

Bases: Exception

Base class for all exceptions raised by this package

search_query.linter module

Query linter hook.

search_query.linter.get_parser(search_string: str, *, platform: str, search_field_general: str) QueryStringParser

Run the linter on the search string

search_query.linter.pre_commit_hook(file_path: str) int

Entrypoint for the query linter hook

search_query.linter_base module

Validator for search queries.

class search_query.linter_base.QueryListLinter(parser: QueryListParser, string_parser_class: Type[QueryStringParser])

Bases: object

Class for Query List Validation

add_linter_message(error: QueryErrorCode, *, list_position: int, positions: List[tuple[int, int]], details: str = '') None

Add a linter message.

check_status() None

Check the output of the linter and report errors to the user

has_fatal_errors() bool

Check if there are any fatal errors.

print_messages() None

Print the latest linter messages.

class search_query.linter_base.QueryStringLinter(query_str: str)

Bases: object

Class for Query String Validation

FAULTY_OPERATOR_REGEX = '\\b(?:[aA][nN][dD]|[oO][rR]|[nN][oO][tT])\\b'
OPERATOR_PRECEDENCE = {'AND': 1, 'NEAR': 3, 'NOT': 2, 'OR': 0, 'WITHIN': 3}
PARENTHESIS_REGEX = '[\\(\\)]'
PLATFORM: PLATFORM = 'generic'
VALID_FIELDS_REGEX: re.Pattern
add_artificial_parentheses_for_operator_precedence(index: int = 0, output: list | None = None) tuple[int, list[Token]]

Adds artificial parentheses with position (-1, -1) to enforce operator precedence.

add_higher_value(output: list[Token], previous_value: int, value: int, art_par: int) tuple[list[Token], int]

Adds open parenthesis to higher value operators

add_linter_message(error: QueryErrorCode, *, positions: Sequence[tuple], details: str = '') None

Add a linter message.

check_boolean_operator_readability(*, faulty_operators: str = '|&') None

Check for readability of boolean operators.

check_invalid_characters_in_search_term(invalid_characters: str) None

Check a search term for invalid characters

check_invalid_characters_in_search_term_query(query: Query, invalid_characters: str) None

Check a search term for invalid characters

check_missing_tokens() None

Check missing tokens

check_near_distance_in_range(*, max_value: int) None

Check for NEAR with a specified distance out of range.

check_operator_capitalization() None

Check if operators are capitalized.

check_operators_with_fields(query: Query) None

Check for operators with fields

check_status() None

Check the output of the linter and report errors to the user

check_unbalanced_parentheses() None

Check query for unbalanced parentheses.

check_unbalanced_quotes_in_terms(query: Query) None

Recursively check for unbalanced quotes in quoted search terms.

check_unknown_token_types() None

Check for unknown token types.

check_unsupported_search_fields_in_query(query: Query) None

Check for the correct format of fields.

Note: compile valid_field_regex with/out flags=re.IGNORECASE

flatten_redundant_artificial_nesting(tokens: list[Token]) None

Flattens redundant artificial nesting: If two artificial open parens are followed eventually by two artificial close parens at the same level, removes the outer ones.

get_precedence(token: str) int

Returns operator precedence for logical and proximity operators.

get_query_with_fields_at_terms(query: Query) Query

Move the search field from the operator to the terms.

Note: utility function for validating search terms with efficient access to search fields (at the level of terms).

handle_fully_quoted_query_str(query_str: str) str

Handle fully quoted query string.

handle_nonstandard_quotes_in_query_str(query_str: str) str

Handle non-standard quotes in query string.

handle_prefix_in_query_str(query_str: str) str

Handle prefix in query string.

Removes tokens before a fully quoted query if they are not connected with a valid operator.

Only applies if quotes are balanced (even number of quotes).

handle_suffix_in_query_str(query_str: str) str

Handle suffix in query string.

Removes tokens after a fully quoted query if they are not connected with a valid operator.

Only applies if quotes are balanced (even number of quotes).

has_fatal_errors() bool

Check if there are any fatal errors.

messages: List[dict]
print_messages() None

Print the latest linter messages.

query: Optional[Query]
abstract syntax_str_to_generic_search_field_set(field_value: str) set[Fields]

Translate a search field

tokens: List[Token]
abstract validate_query_tree(query: Query) None

Validate query tree

abstract validate_tokens(*, tokens: List[Token], query_str: str, search_field_general: str = '') List[Token]

Validate tokens

search_query.parser module

Query parser.

search_query.parser.get_platform(platform_str: str) str

Get the platform from the platform string

search_query.parser.parse(query_str: str, *, search_field_general: str = '', platform: str = 'wos', mode: str = 'strict') Query

Parse a query string.

search_query.parser_base module

Base query parser.

class search_query.parser_base.QueryListParser(query_list: str, *, parser_class: type[QueryStringParser], search_field_general: str, mode: str = 'strict')

Bases: object

LIST_ITEM_REGEX: Pattern = re.compile('^(\\d+).\\s+(.*)$')
abstract parse() Query

Parse the query in list format.

query_dict: dict
tokenize_list() None

Tokenize the query_list.

class search_query.parser_base.QueryStringParser(query_str: str, *, search_field_general: str = '', mode: str = 'strict')

Bases: ABC

Abstract base class for query string parsers

OPERATOR_REGEX: re.Pattern = re.compile('^(AND|OR|NOT)$', re.IGNORECASE)
combine_subsequent_terms() None

Combine all consecutive SEARCH_TERM tokens into one.

linter: QueryStringLinter
abstract parse() Query

Parse the query.

print_tokens() None

Print the tokens in a formatted table.

split_operators_with_missing_whitespace() None

Split operators that are not separated by whitespace.

tokens: list

search_query.query module

Query class.

class search_query.query.Query(value: str, *, operator: bool = True, search_field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: object

Query class.

add_child(child: str | Query) Query

Add a child Query node and set its parent pointer.

property children: List[Query]

Children property.

copy() Query

Return a deep copy of the Query instance without parent references.

classmethod create(value: str, *, operator: bool = True, search_field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic', distance: int = 0) Query

Factory method for query creation.

evaluate(records_dict: dict) dict

Evaluate the query against records using colrev_status labels.

  • rev_included: relevant

  • rev_excluded / rev_prescreen_excluded: irrelevant

  • others: ignored

get_nr_leaves() int

Returns the number of leaves in the query tree

get_parent() Query | None

Return the parent Query node, or None if this node is the root.

get_root() Query

Return the root of the query tree by climbing up parent pointers.

is_term() bool

Check whether the SearchQuery is a term.

property operator: bool

Operator property.

property platform: str

Platform property.

replace(new_query) None
property search_field: SearchField | None

Search field property.

selects(*, record_dict: dict) bool

Indicates whether the query selects a given record.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

set_platform_unchecked(platform: str, silent: bool = False) None

Set the platform for this query node without validation. This is an optional utility for parsers.

to_generic_string() str

Prints the query in generic syntax

to_string() str

Prints the query as a string

to_structured_string() str

Prints the query in generic syntax

translate(target_syntax: str) Query

Translate the query to the target syntax using the provided translator.

property value: str

Value property.

search_query.query_and module

AND Query

class search_query.query_and.AndQuery(children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

AND Query

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

search_query.query_near module

NEAR Query

class search_query.query_near.NEARQuery(value: str, children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, distance: int, platform: str = 'generic')

Bases: Query

NEAR Query

property children: List[Query]

Children property.

property distance: int | None

Distance property.

selects_record(record_dict: dict) bool

Check if the record matches the NEAR query.

search_query.query_not module

NOT Query

class search_query.query_not.NotQuery(children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

NOT Query

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

search_query.query_or module

OR Query

class search_query.query_or.OrQuery(children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

OR Query Class

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

search_query.search_file module

SearchFile parser.

class search_query.search_file.SearchFile(search_string: str, platform: str, authors: list[dict] | None = None, record_info: dict | None = None, date: dict | None = None, filepath: str | Path | None = None, **kwargs: dict)

Bases: object

SearchFile class.

save(filepath: str | Path | None = None) None

Save the search file to a JSON file.

to_dict() dict

Convert the search file to a dictionary.

search_query.search_file.load_search_file(filepath: str | Path) SearchFile

Load a search file from a JSON file.

search_query.serializer_base module

Pubmed serializer.

class search_query.serializer_base.StringSerializer

Bases: ABC

Base class for query serializers.

abstract to_string(query: Query) str

Convert the query to a string.

Args:

query: The query to convert.

Returns:

The string representation of the query.

search_query.serializer_generic module

search_query.serializer_structured module

Structured serializer.

search_query.serializer_structured.to_string_structured(query: Query, *, level: int = 0) str

Convert the query to a string.

search_query.translator_base module

Pubmed query translator.

class search_query.translator_base.QueryTranslator

Bases: object

Translator for queries.

classmethod flatten_nested_operators(query: Query) None

Check if there are double nested operators.

classmethod move_fields_to_operator(query: Query) None

move search fields to operator query

classmethod move_fields_to_terms(query: Query) None

Move the search field from the operator to the terms.

abstract classmethod to_generic_syntax(query: Query) Query

Convert the query to a generic syntax.

abstract classmethod to_specific_syntax(query: Query) Query

Convert the query to a specific syntax.

search_query.utils module

Utilities for SearchQuery.

search_query.utils.format_query_string_positions(query_str: str, positions: List[Tuple[int, int]], color: str = '\x1b[93m') str

Format the query string with multiple positions marked in color.

Module contents

Top-level package for SearchQuery.

class search_query.AndQuery(children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

AND Query

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

class search_query.NEARQuery(value: str, children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, distance: int, platform: str = 'generic')

Bases: Query

NEAR Query

property children: List[Query]

Children property.

property distance: int | None

Distance property.

selects_record(record_dict: dict) bool

Check if the record matches the NEAR query.

class search_query.OrQuery(children: List[str | Query], *, search_field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

OR Query Class

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

class search_query.Query(value: str, *, operator: bool = True, search_field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: object

Query class.

add_child(child: str | Query) Query

Add a child Query node and set its parent pointer.

property children: List[Query]

Children property.

copy() Query

Return a deep copy of the Query instance without parent references.

classmethod create(value: str, *, operator: bool = True, search_field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic', distance: int = 0) Query

Factory method for query creation.

evaluate(records_dict: dict) dict

Evaluate the query against records using colrev_status labels.

  • rev_included: relevant

  • rev_excluded / rev_prescreen_excluded: irrelevant

  • others: ignored

get_nr_leaves() int

Returns the number of leaves in the query tree

get_parent() Query | None

Return the parent Query node, or None if this node is the root.

get_root() Query

Return the root of the query tree by climbing up parent pointers.

is_term() bool

Check whether the SearchQuery is a term.

property operator: bool

Operator property.

property platform: str

Platform property.

replace(new_query) None
property search_field: SearchField | None

Search field property.

selects(*, record_dict: dict) bool

Indicates whether the query selects a given record.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

set_platform_unchecked(platform: str, silent: bool = False) None

Set the platform for this query node without validation. This is an optional utility for parsers.

to_generic_string() str

Prints the query in generic syntax

to_string() str

Prints the query as a string

to_structured_string() str

Prints the query in generic syntax

translate(target_syntax: str) Query

Translate the query to the target syntax using the provided translator.

property value: str

Value property.

class search_query.SearchFile(search_string: str, platform: str, authors: list[dict] | None = None, record_info: dict | None = None, date: dict | None = None, filepath: str | Path | None = None, **kwargs: dict)

Bases: object

SearchFile class.

save(filepath: str | Path | None = None) None

Save the search file to a JSON file.

to_dict() dict

Convert the search file to a dictionary.

search_query.load_search_file(filepath: str | Path) SearchFile

Load a search file from a JSON file.