search_query package

Subpackages

Submodules

search_query.cli module

CLI for search-query.

search_query.cli.lint() None

Main entrypoint for the query linter hook

search_query.cli.translate() None

Main entrypoint for the query translation CLI

search_query.constants module

Constants for search-query

class search_query.constants.Colors

Bases: object

Colors for CLI printing

BLUE = '\x1b[94m'
END = '\x1b[0m'
GREEN = '\x1b[92m'
GREY = '\x1b[90m'
ORANGE = '\x1b[93m'
RED = '\x1b[91m'
class search_query.constants.ExitCodes

Bases: object

Exit codes

FAIL = 1
SUCCESS = 0
class search_query.constants.Fields

Bases: object

Search fields

ABSTRACT = 'abstract'
ACCESSION_NUMBER = 'accession-nr'
ADDRESS = 'address'
AFFILIATION = 'affiliation'
ALL = 'all-fields'
AUTHOR = 'author'
AUTHOR_IDENTIFIERS = 'author-identifiers'
AUTHOR_KEYWORDS = 'keywords-author'
CITY = 'city'
CONFERENCE = 'conference'
COUNTRY_REGION = 'country-region'
DESCRIPTORS = 'descriptors'
DOI = 'doi'
EDITOR = 'editor'
FILTER = 'sb'
FUNDING_AGENCY = 'funding-agency'
FUNDING_TEXT = 'funding-text'
GRANT_NUMBER = 'grant-nr'
GROUP_AUTHOR = 'group-author'
ISBN = 'isbn'
ISSN = 'issn'
ISSN_ISBN = 'issn-isbn'
JOURNAL = 'journal'
KEYWORDS = 'keywords'
KEYWORDS_PLUS = 'keywords-plus'
LANGUAGE = 'language'
MESH_TERM = 'mesh-term'
ORGANIZATION = 'organization'
ORGANIZATION_ENHANCED = 'organization-enhanced'
PROVINCE_STATE = 'province-state'
PUBLICATION_NAME = 'publication-name'
PUBLICATION_TYPE = 'publication-type'
PUBLISHER = 'publisher'
PUBMED_ID = 'pmid'
RESEARCH_AREA = 'research-area'
SOURCE = 'source'
STREET_ADDRESS = 'street-address'
SUBJECT_TERMS = 'subject-terms'
SUBORGANIZATION = 'suborganization'
TEXT_WORD = 'text-word'
TITLE = 'title'
TOPIC = 'topic'
WEB_OF_SCIENCE_CATEGORY = 'wos-category'
YEAR_PUBLICATION = 'year-publication'
ZIP_POSTAL_CODE = 'zip'
classmethod all() list

Return all fields as a list.

class search_query.constants.ListToken(value: str, type: OperatorNodeTokenTypes, level: int, position: Tuple[int, int])

Bases: object

Token class

level: int
position: Tuple[int, int]
type: OperatorNodeTokenTypes
value: str
class search_query.constants.ListTokenTypes(value)

Bases: Enum

List token types

OPERATOR_NODE = 'OPERATOR_NODE'
QUERY_NODE = 'QUERY_NODE'
class search_query.constants.OperatorNodeTokenTypes(value)

Bases: Enum

Operator node token types (list queries)

LIST_ITEM_REFERENCE = 'LIST_ITEM_REFERENCE'
NON_LIST_ITEM_REFERENCE = 'NON_LIST_ITEM_REFERENCE'
class search_query.constants.Operators

Bases: object

AND = 'AND'
NEAR = 'NEAR'
NOT = 'NOT'
OR = 'OR'
RANGE = 'RANGE'
WITHIN = 'WITHIN'
class search_query.constants.PLATFORM(value)

Bases: Enum

Database identifier

EBSCO = 'ebscohost'
GENERIC = 'generic'
PRE_NOTATION = 'pre_notation'
PUBMED = 'pubmed'
STRUCTURED = 'structured'
WOS = 'wos'
class search_query.constants.QueryErrorCode(value)

Bases: Enum

Error codes for the query parser

BOOLEAN_OPERATOR_READABILITY = ('STRUCT_0003', 'boolean-operator-readability', 'Boolean operator readability', '\n**Problematic query**:\n\n.. code-block:: text\n\n    eHealth[ti] | mHealth[ti]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    eHealth[ti] OR mHealth[ti]\n')
CHARACTER_REPLACEMENT = ('PUBMED_0002', 'character-replacement', 'Character replacement', '\n**Problematic query**:\n\n.. code-block:: text\n\n    "healthcare" AND "Industry 4.0"\n\n**Recommended query**:\n\n.. code-block:: text\n\n    "healthcare" AND "Industry 4 0"\n\n**Typical fix**: Be aware that certain characters like . in search terms will be replaced with whitespace due to platform-specific conversions. Specify search fields explicitly within the query instead of relying on general settings.\n')
DATE_FILTER_IN_SUBQUERY = ('QUALITY_0002', 'date-filter-in-subquery', 'Date filter in subquery', '\n**Problematic query**:\n\n.. code-block:: text\n\n    (("digital health"[Title/Abstract] AND "privacy"[Title/Abstract]) AND 2019/01/01:2019/12/01[publication date]) OR ("ehealth"[Title/Abstract])\n    device[ti] OR (wearable[ti] AND 2000:2010[dp])\n\n**Recommended query**:\n\n.. code-block:: text\n\n    (("digital health"[Title/Abstract] AND "privacy"[Title/Abstract]) OR ("ehealth"[Title/Abstract])) AND 2019/01/01:2019/12/01[publication date]\n    (device[ti] OR wearable[ti]) AND 2000:2010[dp]\n\n**Typical fix**: Apply date filters at the top-level of the query instead of inside subqueries to ensure the date restriction applies as intended.\n')
DOI_FORMAT_INVALID = ('TERM_0003', 'doi-format-invalid', 'Invalid DOI format.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    DO=12.1000/xyz\n\n**Recommended query**:\n\n.. code-block:: text\n\n    DO=10.1000/xyz\n\n**Typical fix**: Use a valid DOI format (e.g., starts with 10. followed by a numeric string and suffix).\n')
EBSCO_INVALID_CHARACTER = ('EBSCO_0002', 'invalid-character', 'Search term contains invalid character', '')
EBSCO_WILDCARD_UNSUPPORTED = ('EBSCO_0001', 'wildcard-unsupported', 'Unsupported wildcard in search string.', '\n**Problematic query**:\n\n.. code-block:: text\n\n   # Leading wildcard\n   TI=*Health\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=Health*\n\n**Typical fix**:  Remove unsupported wildcard characters from the query.')
FIELD_EXTRACTED = ('FIELD_0003', 'field-extracted', 'Recommend explicitly specifying the search field in the string', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # EBSCO search with general search field = "Title"\n    Artificial Intelligence AND Future\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # EBSCO search without general search field\n    TI Artificial Intelligence AND TI Future\n\n**Typical fix**: Explicitly specify the search fields in the query string rather than relying on a general search field setting. (EBSCO)\n\n**Rationale**: Researchers may copy the search_string and miss the general_field, incorrectly reproducing the query.\n')
FIELD_IMPLICIT = ('FIELD_0004', 'field-implicit', 'Search field is implicitly specified', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.PUBMED:\n\n    "eHealth" OR "digital health"\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.PUBMED:\n\n    "eHealth"[all] OR "digital health"[all]\n\n**Typical fix**: Explicitly specify the search field in the query string instead of relying on a general search field setting.\n')
FIELD_MISSING = ('FIELD_0002', 'field-missing', 'Search field is missing', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.PUBMED:\n    "eHealth" OR "digital health"\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.PUBMED:\n    "eHealth"[all] OR "digital health"[all]\n')
FIELD_UNSUPPORTED = ('FIELD_0001', 'field-unsupported', 'Search field is not supported for this database', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=term1 AND IY=2020\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=term1 AND PY=2020\n\n**Typical fix**: Replace the unsupported field with a supported one for the selected database.\n')
IMPLICIT_NEAR_VALUE = ('WOS_0004', 'implicit-near-value', 'The value of NEAR operator is implicit', '\n**Problematic query**:\n\n.. code-block:: text\n\n    A NEAR B\n\n**Recommended query**:\n\n.. code-block:: text\n\n    A NEAR/15 B\n\n**Typical fix**: The parser automatically sets NEAR values to 15 (default).\n')
IMPLICIT_PRECEDENCE = ('STRUCT_0001', 'implicit-precedence', 'Operator changed at the same level (explicit parentheses are recommended)', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.PUBMED\n   "health tracking" OR ("remote" AND "monitoring") AND ("mobile application" OR "wearable device")\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.PUBMED\n    ("health tracking" OR ("remote" AND "monitoring")) AND ("mobile application" OR "wearable device")\n\n**Typical fix**: Use explicit parentheses to clarify operator precedence and avoid ambiguity in mixed AND/OR queries.\n')
INVALID_PROXIMITY_USE = ('STRUCT_0004', 'invalid-proximity-use', 'Invalid use of the proximity operator', '\nProximity operators must have a non-negative integer as the distance.\n\n**Problematic query**:\n\n.. code-block:: text\n\n    "digital health"[tiab:~0.5]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    "digital health"[tiab:5]\n')
INVALID_SYNTAX = ('PARSE_0006', 'invalid-syntax', 'Query contains invalid syntax', '**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    eHealth[ti]\n\n    # PLATFORM.PUBMED\n    TI=eHealth\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    TI=eHealth\n\n    # PLATFORM.PUBMED\n    eHealth[ti]\n')
INVALID_TOKEN_SEQUENCE = ('PARSE_0004', 'invalid-token-sequence', 'The sequence of tokens is invalid.', '**Problematic query**:\n\n.. code-block:: texts\n\n    # Example: Two operators in a row\n    eHealth AND OR digital health\n\n**Recommended query**:\n\n.. code-block:: text\n\n    eHealth OR digital health\n\n**Typical fix**: Check the sequence of operators and terms in the query\n')
INVALID_WILDCARD_USE = ('PUBMED_0003', 'invalid-wildcard-use', 'Invalid use of the wildcard operator *', '\n**Problematic query**:\n\n.. code-block:: text\n\n    "health tracking" AND AI*\n\n**Recommended query**:\n\n.. code-block:: text\n\n    "health tracking" AND AID*\n\n**Typical fix**: Avoid using wildcards (*) with short strings (less than 4 characters). Specify search fields directly in the query instead of relying on general search field settings.\n')
ISBN_FORMAT_INVALID = ('TERM_0004', 'isbn-format-invalid', 'Invalid ISBN format.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    IS=978-3-16-148410-0\n\n**Recommended query**:\n\n.. code-block:: text\n\n    IS=978-3-16-148410-0\n\n**Typical fix**: Use a valid ISBN-10 or ISBN-13 format (e.g., 10 or 13 digits, optionally with hyphens in correct positions).\n')
JOURNAL_FILTER_IN_SUBQUERY = ('QUALITY_0003', 'journal-filter-in-subquery', 'Journal (or publication name) filter in subquery', '\n**Problematic query**:\n\n.. code-block:: text\n\n    "activity"[Title/Abstract] OR ("cancer"[Title/Abstract] AND "Lancet"[Journal])\n\n**Recommended query**:\n\n.. code-block:: text\n\n    ("activity"[Title/Abstract] OR "cancer"[Title/Abstract]) AND "Lancet"[Journal]\n\n**Typical fix**: Apply journal (publication name) filters at the top level of the query instead of inside subqueries to ensure the filter applies to the entire result set.\n')
LIST_QUERY_INVALID_REFERENCE = ('PARSE_1002', 'list-query-invalid-reference', 'Invalid list reference in list query', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS:\n    1. TS=("Peer leader*" OR "Shared leader*")\n    2. TS=("acrobatics" OR "acrobat")\n    3. #1 AND #5\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS:\n    1. TS=("Peer leader*" OR "Shared leader*")\n    2. TS=("acrobatics" OR "acrobat")\n    3. #1 AND #2\n\n**Typical fix**: Reference only existing list items in your query.\n')
LIST_QUERY_MISSING_ROOT_NODE = ('PARSE_1001', 'list-query-missing-root-node', 'List format query without root node (typically containing operators)', '\n**Problematic query**:\n\n.. code-block:: text\n\n    1. TS=("Peer leader*" OR "Shared leader*")\n    2. TS=("acrobatics" OR "acrobat" OR "acrobats")\n\n**Recommended query**:\n\n.. code-block:: text\n\n    1. TS=("Peer leader*" OR "Shared leader*")\n    2. TS=("acrobatics" OR "acrobat" OR "acrobats")\n    3. #1 AND #2\n\n**Typical fix**: Add a root-level operator to combine the list items into a single query.\n')
NEAR_DISTANCE_TOO_LARGE = ('WOS_0002', 'near-distance-too-large', 'NEAR distance is too large (max: 15).', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=term1 NEAR/20 term2\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=term1 NEAR/15 term2\n\n**Typical fix**: Reduce the NEAR distance to 15 or less.\n')
NESTED_QUERY_WITH_FIELD = ('PUBMED_0001', 'nested-query-with-field', 'A Nested query cannot have a search field.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    eHealth[ti] AND ("health tracking" OR "remote monitoring")[tiab]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    eHealth[ti] AND ("health tracking"[tiab] OR "remote monitoring"[tiab])\n\n**Typical fix**: Remove the search field from the nested query (operator) since nested queries cannot have search fields.\n')
NON_STANDARD_QUOTES = ('TERM_0001', 'non-standard-quotes', 'Non-standard quotes', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TS=“carbon”\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TS="carbon"\n\n**Typical fix**: Replace non-standard quotes (e.g., ”) with standard ASCII quotes (").\n')
OPERATOR_CAPITALIZATION = ('STRUCT_0002', 'operator-capitalization', 'Operators should be capitalized', '\n**Problematic query**:\n\n.. code-block:: text\n\n    dHealth and mHealth\n\n**Recommended query**:\n\n.. code-block:: text\n\n    dHealth AND mHealth\n\n**Typical fix**: Capitalize the operator\n')
POTENTIAL_WILDCARD_USE = ('QUALITY_0006', 'potential-wildcard-use', 'Potential wildcard use', '\n**Problematic query**:\n\n.. code-block:: text\n\n    computation OR computational OR computer OR computer science\n\n**Recommended query**:\n\n.. code-block:: text\n\n    comput*')
QUERY_IN_QUOTES = ('PARSE_0007', 'query-in-quotes', 'The whole Search string is in quotes.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    "eHealth[ti] AND digital health[ti]"\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    eHealth[ti] AND digital health[ti]\n')
QUERY_STRUCTURE_COMPLEX = ('QUALITY_0001', 'query-structure-unnecessarily-complex', 'Query structure is more complex than necessary', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.EBSCO\n    TI "sleep" OR TI "sleep disorders"\n\n    # PLATFORM.EBSCO\n    TI "sleep" AND TI "sleep disorders"\n\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.EBSCO\n    TI "sleep"\n\n    # PLATFORM.EBSCO\n    TI "sleep disorders"\n\n\n**Typical fix**: Remove redundant terms when one term is already covered by a broader (OR) or encompassing (AND) term in the query.')
REDUNDANT_TERM = ('QUALITY_0005', 'redundant-term', 'Redundant term in the query', '\n**Problematic query (AND)**:\n\n.. code-block:: text\n\n    "digital health" AND "health"\n\n**Recommended query (AND)**:\n\n.. code-block:: text\n\n    "digital health"\n\n.. note::\n\n    The term "digital health" is more specific than "health".\n    The AND query will not retrieve results that match "health" but not "digital health".\n    Therefore, the more specific term ("digital health") is sufficient.\n\n**Problematic query (OR)**:\n\n.. code-block:: text\n\n    "digital health" OR "health"\n\n**Recommended query (OR)**:\n\n.. code-block:: text\n\n    "health"\n\n.. note::\n\n    The term "health" is broader than "digital health".\n    In the OR query, all results that match "digital health" will also match "health".\n    Therefore, the broader term ("health") is sufficient.\n\n**Typical fix**: Remove redundant terms that do not add value to the query.')
TOKENIZING_FAILED = ('PARSE_0001', 'tokenizing-failed', 'Fatal error during tokenization', '**Typical fix**: Check the query syntax and ensure it is correctly formatted.')
TOO_MANY_TERMS = ('WOS_0001', 'too-many-terms', 'Too many search terms in the query', '\n**Explanation:** The query contains too many search terms, which may lead to performance issues or exceed platform limits.\n\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    # too many terms\n    TI=(eHealth OR digital health OR telemedicine OR mHealth OR ...)\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    # split into multiple queries\n    TI=(eHealth OR digital health OR telemedicine OR ...)\n    TI=(mHealth OR telehealth OR ...)\n')
UNBALANCED_PARENTHESES = ('PARSE_0002', 'unbalanced-parentheses', 'Parentheses are unbalanced in the query', '\n**Problematic query**:\n\n.. code-block:: text\n\n    (a AND b OR c\n\n**Recommended query**:\n\n.. code-block:: text\n\n    (a AND b) OR c\n\n**Typical fix**: Check the parentheses in the query\n')
UNBALANCED_QUOTES = ('PARSE_0003', 'unbalanced-quotes', 'Quotes are unbalanced in the query', '\n**Problematic query**:\n\n.. code-block:: text\n\n    "eHealth[ti]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    "eHealth"[ti]\n\n**Typical fix**: Add the missing closing quote to balance the quotation marks.')
UNNECESSARY_PARENTHESES = ('QUALITY_0004', 'unnecessary-parentheses', 'Unnecessary parentheses in queries', '\n\n**Problematic query**:\n\n.. code-block:: text\n\n    ("digital health" OR "eHealth") OR ("remote monitoring" OR "telehealth")\n\n**Recommended query**:\n\n.. code-block:: text\n\n    "digital health" OR "eHealth" OR "remote monitoring" OR "telehealth\n\n**Explanation**: Parentheses are unnecessary when all operators used are **associative and have equal precedence** (like a series of ORs or a series of ANDs). In such cases, the grouping does not influence the evaluation result and adds unnecessary complexity.')
UNSUPPORTED_PREFIX = ('PARSE_0008', 'unsupported-prefix', 'Unsupported prefix in search query', '\n**Problematic query**:\n\n.. code-block:: text\n\n   Pubmed with no restrictions: (eHealth[ti])\n\n**Recommended query**:\n\n.. code-block:: text\n\n    eHealth[ti]\n\n**Typical fix**: Remove unsupported prefixes or introductory text from the search query to ensure it runs correctly.\n')
UNSUPPORTED_PREFIX_PLATFORM_IDENTIFIER = ('PARSE_0010', 'unsupported-prefix-platform-identifier', 'Query starts with platform identifier', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    WOS: eHealth[ti]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    eHealth[ti]\n')
UNSUPPORTED_SUFFIX = ('PARSE_0009', 'unsupported-suffix', 'Unsupported suffix in search query', '\n**Problematic query**:\n\n.. code-block:: text\n\n   (eHealth[ti]) Sort by: Publication Date\n\n**Recommended query**:\n\n.. code-block:: text\n\n    (eHealth[ti])\n\n**Typical fix**: Remove unsupported suffixes or trailing text from the search query to avoid errors.\n')
WILDCARD_AFTER_SPECIAL_CHAR = ('WOS_0009', 'wildcard-after-special-char', 'Wildcard cannot be preceded by special characters.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=(term1 OR term2!*)\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=(term1 OR term2*)\n\n**Typical fix**: Remove the special character before the wildcard or rephrase the query to avoid combining them.\n')
WILDCARD_IN_YEAR = ('WOS_0006', 'wildcard-in-year', 'Wildcard characters (*, ?, $) not supported in year search.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    dHealth[ti] AND 200*[dp]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    dHealth[ti] AND 2000:2010[dp]\n\n**Typical fix**: Replace with year range.\n')
WILDCARD_LEFT_SHORT_LENGTH = ('WOS_0007', 'wildcard-left-short-length', 'Left-hand wildcard must be followed by at least three characters.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=*te\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=abc*te\n\n**Typical fix**: Ensure the term before a left-hand wildcard (*) has at least three characters.\n')
WILDCARD_RIGHT_SHORT_LENGTH = ('WOS_0008', 'wildcard-right-short-length', 'Right-hand wildcard must preceded by at least three characters.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=te*\n    TS=ca*\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=tech*\n    TS=cat*\n\n**Typical fix**: Replace short wildcard prefix with at least three characters or use a more specific term.\n')
WILDCARD_STANDALONE = ('WOS_0010', 'wildcard-standalone', 'Wildcard cannot be standalone.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=term1 AND "?"\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=term1\n\n**Typical fix**: Replace the standalone wildcard with a complete search term or remove it entirely.\n')
WOS_INVALID_CHARACTER = ('WOS_0012', 'invalid-character', 'Search term contains invalid character', '\n**Problematic query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    "@digital-native"[ti]\n\n**Recommended query**:\n\n.. code-block:: text\n\n    # PLATFORM.WOS\n    "digital-native"[ti]\n\n')
WOS_WILDCARD_UNSUPPORTED = ('WOS_0011', 'wildcard-unsupported', 'Unsupported wildcard in search string.', '\n**Problematic query**:\n\n.. code-block:: text\n\n   dHealth!\n\n**Recommended query**:\n\n.. code-block:: text\n\n    dHealth\n\n**Typical fix**:  Remove unsupported wildcard characters from the query.')
YEAR_FORMAT_INVALID = ('TERM_0002', 'year-format-invalid', 'Invalid year format.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    TI=term1 AND PY=20xy\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=term1 AND PY=2020\n\n**Typical fix**: Use a valid numeric year format (e.g., 4-digit year).\n')
YEAR_SPAN_VIOLATION = ('WOS_0005', 'year-span-violation', 'Year span must be five or less.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    A AND PY=2000-2020\n\n**Recommended query**:\n\n.. code-block:: text\n\n    A AND PY=2015-2020\n\n**Typical fix**: The parser automatically adjusts the year span to 5 years.\n')
YEAR_WITHOUT_TERMS = ('WOS_0003', 'year-without-terms', 'A search for publication years must include at least another search term.', '\n**Problematic query**:\n\n.. code-block:: text\n\n    PY=2000\n\n**Recommended query**:\n\n.. code-block:: text\n\n    TI=term AND PY=2000\n\n**Typical fix**: Combine the year filter with at least one other search term.\n')
class search_query.constants.SearchField(value: str, *, position: Tuple[int, int] | None = None)

Bases: object

SearchField class.

copy() SearchField

Return a copy of the SearchField instance.

class search_query.constants.Token(value: str, type: TokenTypes, position: Tuple[int, int])

Bases: object

Token class

is_operator() bool

Check if token is an operator

position: Tuple[int, int]
type: TokenTypes
value: str
class search_query.constants.TokenTypes(value)

Bases: Enum

Token types

FIELD = 'FIELD'
LOGIC_OPERATOR = 'LOGIC_OPERATOR'
PARENTHESIS_CLOSED = 'PARENTHESIS_CLOSED'
PARENTHESIS_OPEN = 'PARENTHESIS_OPEN'
PROXIMITY_OPERATOR = 'PROXIMITY_OPERATOR'
RANGE_OPERATOR = 'RANGE_OPERATOR'
TERM = 'TERM'
UNKNOWN = 'UNKNOWN'

search_query.database module

Database and filters.

search_query.database.list_queries() List[str]

List all available predefined query identifiers (without .json).

search_query.database.list_queries_with_details() dict

List all available queries.

search_query.database.load_query(name: str) Query

Load a query object from JSON by name.

search_query.database_queries module

Database and filters.

search_query.exception module

Exceptions of SearchQuery.

exception search_query.exception.ListQuerySyntaxError(linter: QueryListLinter)

Bases: SearchQueryException

ListQuerySyntaxError Exception

exception search_query.exception.QuerySyntaxError(linter: QueryStringLinter)

Bases: SearchQueryException

QuerySyntaxError Exception

exception search_query.exception.SearchQueryException

Bases: Exception

Base class for all exceptions raised by this package

search_query.linter module

Query linter hook.

search_query.linter.lint_file(search_file: SearchFile) dict

Lint a search file and return the messages.

search_query.linter.lint_query_string(search_string: str, *, platform: str, field_general: str = '') dict

Lint a query string and return the messages.

search_query.linter.pre_commit_hook(file_path: str) int

Entrypoint for the query linter hook

search_query.linter_base module

Validator for search queries.

class search_query.linter_base.QueryListLinter(parser: QueryListParser, string_parser_class: Type[QueryStringParser], original_query_str: str = '')

Bases: object

Class for Query List Validation

add_message(error: QueryErrorCode, *, list_position: int, positions: List[tuple[int, int]] | None = None, details: str = '', fatal: bool = False) None

Add a linter message.

check_status() None

Check the output of the linter and report errors to the user

has_fatal_errors() bool

Check if there are any fatal errors.

print_messages() None

Print the latest linter messages.

class search_query.linter_base.QueryStringLinter(query_str: str, *, original_str: str | None = None, silent: bool = False)

Bases: object

Class for Query String Validation

FAULTY_OPERATOR_REGEX = '\\b(?:[aA][nN][dD]|[oO][rR]|[nN][oO][tT])\\b'
OPERATOR_PRECEDENCE = {'AND': 1, 'NEAR': 3, 'NOT': 2, 'OR': 0, 'WITHIN': 3}
PARENTHESIS_REGEX = '[\\(\\)]'
PLATFORM: PLATFORM = 'generic'
VALID_fieldS_REGEX: re.Pattern
add_artificial_parentheses_for_operator_precedence(index: int = 0, output: list | None = None) tuple[int, list[Token]]

Adds artificial parentheses with position (-1, -1) to enforce operator precedence.

add_higher_value(output: list[Token], previous_value: int, value: int, art_par: int) tuple[list[Token], int]

Adds open parenthesis to higher value operators

add_message(error: QueryErrorCode, *, positions: Sequence[Tuple[int, int]] | None = None, details: str = '', fatal: bool = False) None

Add a linter message.

check_boolean_operator_readability(*, faulty_operators: str = '|&') None

Check for readability of boolean operators.

check_general_field() None

Check the general search field

check_invalid_characters_in_term(invalid_characters: str, error: QueryErrorCode) None

Check a search term for invalid characters

check_invalid_characters_in_term_query(query: Query, invalid_characters: str, error: QueryErrorCode) None

Check a search term for invalid characters

check_missing_tokens() None

Check missing tokens

check_near_distance_in_range(*, max_value: int) None

Check for NEAR with a specified distance out of range.

check_operator_capitalization() None

Check if operators are capitalized.

check_operators_with_fields(query: Query) None

Check for operators with fields

check_status() None

Check the output of the linter and report errors to the user

check_unbalanced_parentheses() None

Check query for unbalanced parentheses.

check_unbalanced_quotes_in_terms(query: Query) None

Recursively check for unbalanced quotes in quoted search terms.

check_unknown_token_types() None

Check for unknown token types.

check_unsupported_fields_in_query(query: Query) None

Check for the correct format of fields.

Note: compile valid_FIELD_REGEX with/out flags=re.IGNORECASE

flatten_redundant_artificial_nesting(tokens: list[Token]) None

Flattens redundant artificial nesting: If two artificial open parens are followed eventually by two artificial close parens at the same level, removes the outer ones.

get_precedence(token: str) int

Returns operator precedence for logical and proximity operators.

get_query_with_fields_at_terms(query: Query) Query

Move the search field from the operator to the terms.

Note: utility function for validating search terms with efficient access to search fields (at the level of terms).

handle_fully_quoted_query_str(parser: QueryStringParser) None

Handle fully quoted query string.

handle_nonstandard_quotes_in_query_str(parser: QueryStringParser) None

Handle non-standard quotes in query string.

handle_prefix_in_query_str(parser: QueryStringParser, *, prefix_regex: re.Pattern) None

Handle prefixes in query string.

handle_suffix_in_query_str(parser: QueryStringParser) None

Handle suffix in query string.

Removes tokens after a fully quoted query if they are not connected with a valid operator.

Only applies if quotes are balanced (even number of quotes).

has_fatal_errors() bool

Check if there are any fatal errors.

messages: List[dict]
print_messages() None

Print the latest linter messages.

query: Optional[Query]
abstract syntax_str_to_generic_field_set(field_value: str) set[Fields]

Translate a search field

tokens: List[Token]
abstract validate_query_tree(query: Query) None

Validate query tree

abstract validate_tokens(*, tokens: List[Token], query_str: str, field_general: str = '') List[Token]

Validate tokens

search_query.parser module

Query parser.

search_query.parser.get_platform(platform_str: str) str

Get the platform from the platform string

search_query.parser.parse(query_str: str, *, field_general: str = '', platform: str = 'wos') Query

Parse a query string.

search_query.parser_base module

Base query parser.

class search_query.parser_base.QueryListParser(query_list: str, *, parser_class: type[QueryStringParser], field_general: str)

Bases: object

LIST_ITEM_REFERENCE = re.compile('#\\d+')
LIST_QUERY_LINE_REGEX: Pattern = re.compile('^(\\d+).\\s+(.*)$')
assign_linter_messages(parser_messages, linter) None

Assign linter messages to the appropriate query nodes.

build_query_str() Tuple[str, dict]

Build the query string from the list format.

abstract parse() Query

Parse the query in list format.

query_dict: dict
tokenize_list() None

Tokenize the query_list.

tokenize_operator_node(query_str: str, node_nr: int) list

Tokenize the query string into list-references and logic operator tokens.

class search_query.parser_base.QueryStringParser(query_str: str, *, field_general: str = '', offset: dict | None = None, original_str: str | None = None, silent: bool = False)

Bases: ABC

Abstract base class for query string parsers

LOGIC_OPERATOR_REGEX = re.compile('\\b(AND|OR|NOT)\\b', re.IGNORECASE)
OPERATOR_REGEX: re.Pattern = re.compile('^(AND|OR|NOT)$', re.IGNORECASE)
adjust_token_positions() None

Adjust virtual positions of tokens using offset mapping.

combine_subsequent_terms() None

Combine all consecutive TERM tokens into one.

linter: QueryStringLinter
abstract parse() Query

Parse the query.

print_tokens() None

Print the tokens in a formatted table.

split_operators_with_missing_whitespace() None

Split operators that are not separated by whitespace.

tokens: list

search_query.query module

Query class.

class search_query.query.Query(value: str, *, operator: bool = True, field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: object

Query class.

add_child(child: str | Query) Query

Add a child Query node and set its parent pointer.

property children: List[Query]

Children property.

copy() Query

Return a deep copy of the Query instance without parent references.

classmethod create(value: str, *, operator: bool = True, field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic', distance: int = 0) Query

Factory method for query creation.

evaluate(records_dict: dict) dict

Evaluate the query against records using colrev_status labels.

  • rev_included: relevant

  • rev_excluded / rev_prescreen_excluded: irrelevant

  • others: ignored

property field: SearchField | None

Search field property.

get_nr_leaves() int

Returns the number of leaves in the query tree

get_parent() Query | None

Return the parent Query node, or None if this node is the root.

get_root() Query

Return the root of the query tree by climbing up parent pointers.

is_term() bool

Check whether the SearchQuery is a term.

property operator: bool

Operator property.

property platform: str

Platform property.

replace(new_query: Query) None

Replace this query with a new query in the parent’s children list.

selects(*, record_dict: dict) bool

Indicates whether the query selects a given record.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

set_platform_unchecked(platform: str, silent: bool = False) None

Set the platform for this query node without validation. This is an optional utility for parsers.

to_generic_string() str

Prints the query in generic syntax

to_string() str

Prints the query as a string

to_string_structured_2() str

Prints the query in a structured expression format.

to_structured_string() str

Prints the query in generic syntax

translate(target_syntax: str) Query

Translate the query to the target syntax using the provided translator.

property value: str

Value property.

search_query.query_and module

AND Query

class search_query.query_and.AndQuery(children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

AND Query

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

search_query.query_near module

NEAR Query

class search_query.query_near.NEARQuery(value: str, children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, distance: int, platform: str = 'generic')

Bases: Query

NEAR Query

property children: List[Query]

Children property.

property distance: int | None

Distance property.

selects_record(record_dict: dict) bool

Check if the record matches the NEAR query.

search_query.query_not module

NOT Query

class search_query.query_not.NotQuery(children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

NOT Query

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

search_query.query_or module

OR Query

class search_query.query_or.OrQuery(children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

OR Query Class

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

search_query.search_file module

SearchFile parser.

class search_query.search_file.SearchFile(search_string: str, platform: str, authors: list[dict] | None = None, record_info: dict | None = None, date: dict | None = None, filepath: str | Path | None = None, **kwargs: dict)

Bases: object

SearchFile class.

save(filepath: str | Path | None = None) None

Save the search file to a JSON file.

to_dict() dict

Convert the search file to a dictionary.

search_query.search_file.load_search_file(filepath: str | Path) SearchFile

Load a search file from a JSON file.

search_query.serializer_base module

Pubmed serializer.

class search_query.serializer_base.StringSerializer

Bases: ABC

Base class for query serializers.

abstract to_string(query: Query) str

Convert the query to a string.

Args:

query: The query to convert.

Returns:

The string representation of the query.

search_query.serializer_generic module

search_query.serializer_structured module

Structured serializer.

search_query.serializer_structured.to_string_structured(query: Query, *, level: int = 0) str

Convert the query to a string.

search_query.serializer_structured.to_string_structured_2(query: Query, level: int = 0) str

Convert the query into a multiline, indented Boolean-style expression.

search_query.translator_base module

Pubmed query translator.

class search_query.translator_base.QueryTranslator

Bases: object

Translator for queries.

classmethod flatten_nested_operators(query: Query) None

Check if there are double nested operators.

classmethod move_fields_to_operator(query: Query) None

move search fields to operator query

classmethod move_fields_to_terms(query: Query) None

Move the search field from the operator to the terms.

abstract classmethod to_generic_syntax(query: Query) Query

Convert the query to a generic syntax.

abstract classmethod to_specific_syntax(query: Query) Query

Convert the query to a specific syntax.

search_query.utils module

Utilities for SearchQuery.

search_query.utils.format_query_string_positions(query_str: str, positions: List[Tuple[int, int]], color: str = '\x1b[93m') str

Format the query string with multiple positions marked in color.

Module contents

Top-level package for SearchQuery.

class search_query.AndQuery(children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

AND Query

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

class search_query.NEARQuery(value: str, children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, distance: int, platform: str = 'generic')

Bases: Query

NEAR Query

property children: List[Query]

Children property.

property distance: int | None

Distance property.

selects_record(record_dict: dict) bool

Check if the record matches the NEAR query.

class search_query.OrQuery(children: List[str | Query], *, field: SearchField | str | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: Query

OR Query Class

property children: List[Query]

Children property.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

class search_query.Query(value: str, *, operator: bool = True, field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic')

Bases: object

Query class.

add_child(child: str | Query) Query

Add a child Query node and set its parent pointer.

property children: List[Query]

Children property.

copy() Query

Return a deep copy of the Query instance without parent references.

classmethod create(value: str, *, operator: bool = True, field: SearchField | None = None, children: List[str | Query] | None = None, position: Tuple[int, int] | None = None, platform: str = 'generic', distance: int = 0) Query

Factory method for query creation.

evaluate(records_dict: dict) dict

Evaluate the query against records using colrev_status labels.

  • rev_included: relevant

  • rev_excluded / rev_prescreen_excluded: irrelevant

  • others: ignored

property field: SearchField | None

Search field property.

get_nr_leaves() int

Returns the number of leaves in the query tree

get_parent() Query | None

Return the parent Query node, or None if this node is the root.

get_root() Query

Return the root of the query tree by climbing up parent pointers.

is_term() bool

Check whether the SearchQuery is a term.

property operator: bool

Operator property.

property platform: str

Platform property.

replace(new_query: Query) None

Replace this query with a new query in the parent’s children list.

selects(*, record_dict: dict) bool

Indicates whether the query selects a given record.

selects_record(record_dict: dict) bool

Abstract method to be implemented by subclasses to select records.

set_platform_unchecked(platform: str, silent: bool = False) None

Set the platform for this query node without validation. This is an optional utility for parsers.

to_generic_string() str

Prints the query in generic syntax

to_string() str

Prints the query as a string

to_string_structured_2() str

Prints the query in a structured expression format.

to_structured_string() str

Prints the query in generic syntax

translate(target_syntax: str) Query

Translate the query to the target syntax using the provided translator.

property value: str

Value property.

class search_query.SearchFile(search_string: str, platform: str, authors: list[dict] | None = None, record_info: dict | None = None, date: dict | None = None, filepath: str | Path | None = None, **kwargs: dict)

Bases: object

SearchFile class.

save(filepath: str | Path | None = None) None

Save the search file to a JSON file.

to_dict() dict

Convert the search file to a dictionary.

search_query.load_search_file(filepath: str | Path) SearchFile

Load a search file from a JSON file.