search_query.wos package

Submodules

search_query.wos.constants module

Constants for Web-of-Science.

search_query.wos.constants.generic_search_field_to_syntax_field(generic_search_field: str) str

Convert a set of generic search fields to a set of syntax strings.

search_query.wos.constants.map_to_standard(syntax_str: str) str

Normalize search field string to a standard WOS field syntax.

search_query.wos.constants.search_field_general_to_syntax(search_field: str) str

Map the general search field to the standard syntax of WOS.

search_query.wos.constants.syntax_str_to_generic_search_field_set(field_value: str) set

Translate a search field string to a generic set of Fields.

search_query.wos.linter module

Web-of-Science query linter.

class search_query.wos.linter.WOSQueryListLinter(parser: search_query.wos.parser.WOSListParser, string_parser_class: Type[search_query.wos.parser.WOSParser])

Bases: QueryListLinter

messages: dict
parser: search_query.wos.parser.WOSListParser
validate_list_tokens() None

Lint the list parser.

class search_query.wos.linter.WOSQueryStringLinter(query_str: str = '')

Bases: QueryStringLinter

Linter for WOS Query Strings

DOI_VALUE_REGEX = re.compile('^10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$', re.IGNORECASE)
ISBN_VALUE_REGEX = re.compile('^(?:\\d{1,5}-\\d{1,7}-\\d{1,7}-[\\dX]|\\d{3}-\\d{1,5}-\\d{1,7}-\\d{1,7}-\\d{1})$', re.IGNORECASE)
ISSN_VALUE_REGEX = re.compile('^\\d{4}-\\d{3}[\\dX]$', re.IGNORECASE)
PLATFORM: <PLATFORM.WOS: 'wos'> = 'wos'
VALID_FIELDS_REGEX: re.Pattern = re.compile('ab=|abstract=|la=|language=|ad=|address=|all=|all fields=|ai=|author identifiers=|ak=|author keywords=|au=|author=|cf=|conference=|ci=|city=|cu=|country/region=|do=|doi=|ed=|editor=|fg=|grant number=, re.IGNORECASE)
VALID_TOKEN_SEQUENCES = {TokenTypes.FIELD: [TokenTypes.SEARCH_TERM, TokenTypes.PARENTHESIS_OPEN], TokenTypes.LOGIC_OPERATOR: [TokenTypes.SEARCH_TERM, TokenTypes.FIELD, TokenTypes.PARENTHESIS_OPEN], TokenTypes.PARENTHESIS_CLOSED: [TokenTypes.PARENTHESIS_CLOSED, TokenTypes.LOGIC_OPERATOR, TokenTypes.PROXIMITY_OPERATOR], TokenTypes.PARENTHESIS_OPEN: [TokenTypes.FIELD, TokenTypes.SEARCH_TERM, TokenTypes.PARENTHESIS_OPEN], TokenTypes.PROXIMITY_OPERATOR: [TokenTypes.SEARCH_TERM, TokenTypes.PARENTHESIS_OPEN, TokenTypes.FIELD], TokenTypes.SEARCH_TERM: [TokenTypes.SEARCH_TERM, TokenTypes.LOGIC_OPERATOR, TokenTypes.PROXIMITY_OPERATOR, TokenTypes.PARENTHESIS_CLOSED]}
WILDCARD_CHARS = ['?', '$', '*']
YEAR_VALUE_REGEX = re.compile('^\\d{4}(-\\d{4})?$')
check_doi_format(query: Query) None

Check for the correct format of DOI.

check_format_left_hand_wildcards(query: Query) None

Check for wrong usage among left-hand wildcards in the search string.

check_implicit_near() None

Check for implicit NEAR operator.

check_invalid_syntax() None

Check for invalid syntax in the query string.

check_invalid_token_sequences() None

Check for the correct order of tokens in the query.

check_issn_isbn_format(query: Query) None

Check for the correct format of ISSN and ISBN.

check_nr_search_terms(query: Query) None

Check the number of search terms in the query.

check_search_fields_general() None

Check the general search field (from JSON).

check_unsupported_right_hand_wildcards(query: Query, index: int) None

Check for unsupported right-hand wildcards in the search string.

check_unsupported_wildcards(query: Query) None

Check for unsupported characters in the search string.

check_wildcards(query: Query) None

Check for the usage of wildcards in the search string.

check_year_format(query: Query) None

Check for the correct format of year.

check_year_without_search_terms(query: Query) None

Check if the year is used without a search terms.

get_nr_terms_all(query: Query) int

Get the number of terms in the query.

messages: List[dict]
query: Optional[Query]
syntax_str_to_generic_search_field_set(field_value: str) set

Translate a search field

tokens: List[Token]
validate_query_tree(query: Query) None

Validate the query tree. This method is called after the query tree has been built.

validate_tokens(*, tokens: List[Token], query_str: str, search_field_general: str = '') List[Token]

Performs a pre-linting

search_query.wos.parser module

Web-of-Science query parser.

class search_query.wos.parser.WOSListParser(query_list: str, search_field_general: str, mode: str)

Bases: QueryListParser

Parser for Web-of-Science (list format) queries.

LIST_ITEM_REFERENCE = re.compile('#\\d+')
LIST_ITEM_REGEX: Pattern = re.compile('^(\\d+).\\s+(.*)$')
OPERATOR_NODE_REGEX = re.compile('#\\d+|AND|OR')
parse() Query

Parse the list of queries.

query_dict: dict
tokenize_operator_node(query_str: str, node_nr: int) list

Tokenize the query_list.

class search_query.wos.parser.WOSParser(query_str: str, *, search_field_general: str = '', mode: str = 'strict')

Bases: QueryStringParser

Parser for Web-of-Science queries.

LOGIC_OPERATOR_REGEX = re.compile('\\b(AND|OR|NOT)\\b', re.IGNORECASE)
OPERATOR_REGEX: re.Pattern = re.compile('\\b(AND|OR|NOT)\\b|\\b(NEAR/\\d{1,2}|NEAR)\\b')
PARENTHESIS_REGEX = re.compile('[\\(\\)]')
PROXIMITY_OPERATOR_REGEX = re.compile('\\b(NEAR/\\d{1,2}|NEAR)\\b', re.IGNORECASE)
SEARCH_FIELDS_REGEX = re.compile('\\b(?!and\\b)[a-zA-Z]+(?:\\s(?!and\\b)[a-zA-Z]+)*')
SEARCH_FIELD_REGEX = re.compile('\\b\\w{2}=|\\b\\w{3}=')
SEARCH_TERM_REGEX = re.compile('\\*?[\\w\\-/\\.\\!\\*,&\\\\]+(?:[\\*\\$\\?][\\w\\-/\\.\\!\\*,&\\\\]*)*|"[^"]+"')
combine_subsequent_terms() None

Combine subsequent terms in the list of tokens.

linter: QueryStringLinter
parse() Query

Parse a query string.

parse_query_tree(index: int = 0, search_field: SearchField | None = None) Tuple[Query, int]

Parse tokens starting at the given index, handling parentheses, operators, search fields and terms recursively.

pattern = re.compile('\\b\\w{2}=|\\b\\w{3}=|\\b(AND|OR|NOT)\\b|\\b(NEAR/\\d{1,2}|NEAR)\\b|\\*?[\\w\\-/\\.\\!\\*,&\\\\]+(?:[\\*\\$\\?][\\w\\-/\\.\\!\\*,&\\\\]*)*|"[^"]+"|[\\(\\)]')
tokenize() None

Tokenize the query_str.

tokens: list

search_query.wos.serializer module

WOS serializer.

search_query.wos.serializer.to_string_wos(query: Query) str

Serialize the Query tree into a Web of Science (WoS) search string.

search_query.wos.translator module

WOS query translator.

class search_query.wos.translator.WOSTranslator

Bases: QueryTranslator

Translator for WOS queries.

classmethod combine_equal_search_fields(query: Query) None

Combine queries with the same search field into an OR query.

classmethod to_generic_syntax(query: Query) Query

Convert the query to a generic syntax.

classmethod to_specific_syntax(query: Query) Query

Convert the query to a specific syntax.

classmethod translate_search_fields_to_generic(query: Query) None

Translate search fields.

Module contents

Top-level package for WoS.