Improve

Modification

 1from search_query import AndQuery, OrQuery
 2
 3def add_plural_wildcards(query):
 4   """Recursively add wildcards for plural forms in term values."""
 5   if query.operator:
 6      for child in query.children:
 7            add_plural_wildcards(child)
 8   else:
 9      val = query.value
10      if not val.endswith("*") and not val.endswith("s"):
11            query.value = val + "*"
12
13
14BE_AE_MAP = {
15   "labour": "labor",
16   "organisation": "organization",
17   "analyse": "analyze",
18   "optimisation": "optimization",
19   "behaviour": "behavior",
20}
21
22def expand_spelling_variants(query):
23   """Recursively expand BE/AE spelling variations in term values."""
24   if query.operator:
25      for child in query.children:
26            expand_spelling_variants(child)
27   else:
28      val_lower = query.value.lower()
29      if val_lower in BE_AE_MAP:
30            be = val_lower
31            ae = BE_AE_MAP[be]
32            query.value = f'({be} OR {ae})'
33
34
35query = AndQuery([
36   OrQuery(["labour", "employment"], field="title"),
37   OrQuery(["robot", "algorith"], field="title")
38], field="title")
39
40expand_spelling_variants(query)
41add_plural_wildcards(query)
42
43print(query.to_string(platform="pubmed"))

Evaluation

See Cooper et al. 2018

 1from search_query import AndQuery, OrQuery
 2records_dict = {
 3   "r1": {
 4      "title": "Microsourcing platforms for online labor",
 5      "colrev_status": "rev_included"
 6   },
 7   "r2": {
 8      "title": "Online work and the future of microsourcing",
 9      "colrev_status": "rev_included"
10   },
11   "r3": {
12      "title": "Microsourcing case studies",
13      "colrev_status": "rev_excluded"
14   },
15   "r4": {
16      "title": "Freelancing and online job platforms",
17      "colrev_status": "rev_excluded"
18   },
19}
20
21query = AndQuery([
22   OrQuery(["microsourcing"], field="title"),
23   OrQuery(["online"], field="title")
24], field="title")
25
26# Evaluate the search
27results = query.evaluate(records_dict)
28print(f"Recall: {results['recall']}")
29print(f"Precision: {results['precision']}")
30print(f"F1 Score: {results['f1_score']}")
31# Output:
32# Recall: 1.0
33# Precision: 1.0
34# F1 Score: 1.0

References

Cooper C, Varley-Campbell J, Booth A, et al. (2018) Systematic review identifies six metrics and one method for assessin
   literature search effectiveness but no consensus on appropriate use. Journal of Clinical Epidemiology 99: 53–63.
   DOI: 10.1016/J.JCLINEPI.2018.02.025.