#!/usr/bin/env python3
"""The CoLRev review manager (main entrypoint)."""
from __future__ import annotations
import logging
import os
import pprint
import typing
from datetime import timedelta
from pathlib import Path
import git
import requests_cache
import yaml
import colrev.dataset
import colrev.exceptions as colrev_exceptions
import colrev.logger
import colrev.ops.check
import colrev.ops.checker
import colrev.process.operation
import colrev.record.qm.quality_model
import colrev.settings
from colrev.constants import Colors
from colrev.constants import Filepaths
from colrev.constants import OperationsType
from colrev.paths import PathManager
[docs]class ReviewManager:
"""Class for managing individual CoLRev review project (repositories)"""
# pylint: disable=import-outside-toplevel
# pylint: disable=redefined-outer-name
# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-public-methods
# pylint: disable=too-many-arguments
notified_next_operation = None
"""ReviewManager was notified for the upcoming process and
will provide access to the Dataset"""
dataset: colrev.dataset.Dataset
"""The review dataset object"""
path: Path
"""Path of the project repository"""
shell_mode = False
def __init__(
self,
*,
path_str: typing.Optional[str] = None,
force_mode: bool = False,
verbose_mode: bool = False,
high_level_operation: bool = False,
navigate_to_home_dir: bool = True,
exact_call: str = "",
skip_upgrade: bool = True,
) -> None:
self.force_mode = force_mode
"""Force mode variable (bool)"""
self.verbose_mode = verbose_mode
"""Verbose mode variable (bool)"""
self.high_level_operation = high_level_operation
"""A high-level operation was called (bool)"""
# Note : mostly for formatting output
if navigate_to_home_dir:
self.path = self._get_project_home_dir(path_str=path_str)
else:
self.path = Path.cwd()
self.paths = PathManager(self.path)
self.exact_call = exact_call
try:
if self.paths.settings.is_file():
self.paths.data.mkdir(parents=True, exist_ok=True)
self.paths.search.mkdir(parents=True, exist_ok=True)
self.paths.pdf.mkdir(parents=True, exist_ok=True)
self.paths.output.mkdir(parents=True, exist_ok=True)
report_logger, logger = self.get_loggers()
self.report_logger = report_logger
self.logger = logger
self.environment_manager = self.get_environment_manager()
self.p_printer = pprint.PrettyPrinter(indent=4, width=140, compact=False)
# run update before settings/data (which may require changes/fail without update)
if not skip_upgrade: # pragma: no cover
self._check_update()
self.settings = self.load_settings()
self.dataset = colrev.dataset.Dataset(review_manager=self)
except Exception as exc: # pylint: disable=broad-except
if (self.path / Path(".git")).is_dir():
if git.Repo().active_branch.name == "gh-pages": # pragma: no cover
raise colrev_exceptions.RepoSetupError(
msg="Currently on gh-pages branch. Switch to main: "
+ f"{Colors.ORANGE}git switch main{Colors.END}"
)
if not force_mode:
raise exc
# pylint: disable=too-many-arguments
[docs] def update_config(
self,
*,
force_mode: bool = False,
verbose_mode: bool = False,
high_level_operation: bool = False,
exact_call: str = "",
) -> None:
"""Update review_manager's state"""
self.force_mode = force_mode
self.verbose_mode = verbose_mode
self.high_level_operation = high_level_operation
self.exact_call = exact_call
report_logger, logger = self.get_loggers()
self.report_logger = report_logger
self.logger = logger
[docs] def get_loggers(self) -> typing.Tuple[logging.Logger, logging.Logger]:
"""return loggers"""
if self.verbose_mode:
return colrev.logger.setup_report_logger(
review_manager=self, level=logging.DEBUG
), colrev.logger.setup_logger(review_manager=self, level=logging.DEBUG)
return colrev.logger.setup_report_logger(
review_manager=self, level=logging.INFO
), colrev.logger.setup_logger(review_manager=self, level=logging.INFO)
def _check_update(self) -> None:
# Once the following has run for all repositories,
# it should only be called when the versions differ.
# last_version, current_version = self.get_colrev_versions()
# if last_version != current_version:
upgrade_operation = self.get_upgrade()
upgrade_operation.main()
[docs] def get_committer(self) -> typing.Tuple[str, str]:
"""Get the committer name and email"""
return self.environment_manager.get_name_mail_from_git()
def _get_project_home_dir(self, *, path_str: typing.Optional[str] = None) -> Path:
if path_str:
original_dir = Path(path_str)
else:
original_dir = Path.cwd()
while ".git" not in [f.name for f in original_dir.iterdir() if f.is_dir()]:
if original_dir.parent == original_dir: # reached root
break
original_dir = original_dir.parent
if original_dir.parent == original_dir: # reached root
raise colrev.exceptions.RepoSetupError(
"Failed to locate a .git directory. "
"Ensure you are within a Git repository, "
"or set navigate_to_home_dir=False for init."
)
return original_dir
[docs] def load_settings(self) -> colrev.settings.Settings:
"""Load the settings"""
self.settings = colrev.settings.load_settings(settings_path=self.paths.settings)
return self.settings
[docs] def save_settings(self) -> None:
"""Save the settings"""
colrev.settings.save_settings(review_manager=self)
[docs] def reset_report_logger(self) -> None:
"""Reset the report logger"""
colrev.logger.reset_report_logger(review_manager=self)
[docs] def check_repo(self) -> dict:
"""Check the repository"""
checker = colrev.ops.checker.Checker(review_manager=self)
return checker.check_repo()
[docs] def in_virtualenv(self) -> bool: # pragma: no cover
"""Check whether CoLRev operates in a virtual environment"""
return colrev.ops.checker.Checker.in_virtualenv()
[docs] def check_repository_setup(self) -> None:
"""Check the repository setup"""
checker = colrev.ops.checker.Checker(review_manager=self)
checker.check_repository_setup()
[docs] def get_colrev_versions(self) -> list[str]:
"""Get the CoLRev versions"""
checker = colrev.ops.checker.Checker(review_manager=self)
return checker.get_colrev_versions()
[docs] def report(self, *, msg_file: Path) -> None:
"""Append commit-message report if not already available
(Entrypoint for pre-commit hooks)
"""
import colrev.ops.commit
import colrev.ops.correct
with open(msg_file, encoding="utf8") as file:
available_contents = file.read()
with open(msg_file, "w", encoding="utf8") as file:
file.write(available_contents)
# Don't append if it's already there
# update = False
# if "Command" not in available_contents:
# update = True
# if "Properties" in available_contents:
# update = False
# if update:
commit = colrev.ops.commit.Commit(
review_manager=self,
msg=available_contents,
manual_author=True,
script_name="MANUAL",
)
commit.update_report(msg_file=msg_file)
if (
not self.settings.is_curated_masterdata_repo()
and self.dataset.records_changed()
): # pragma: no cover
colrev.ops.check.CheckOperation(self) # to notify
corrections_operation = colrev.ops.correct.Corrections(review_manager=self)
corrections_operation.check_corrections_of_records()
[docs] def sharing(self) -> dict:
"""Check whether sharing requirements are met
(Entrypoint for pre-commit hooks)
"""
self.notified_next_operation = OperationsType.check
advisor = self.get_advisor()
sharing_advice = advisor.get_sharing_instructions()
return sharing_advice
[docs] def update_status_yaml(
self, *, add_to_git: bool = True, records: typing.Optional[dict] = None
) -> None:
"""Update the STATUS_FILE"""
status_stats = self.get_status_stats(records=records)
exported_dict = status_stats.model_dump()
exported_dict.pop("origin_states_dict")
exported_dict.pop("perc_curated")
exported_dict.pop("screening_statistics")
exported_dict.pop("nr_origins")
with open(self.paths.status, "w", encoding="utf8") as file:
yaml.dump(exported_dict, file, allow_unicode=True)
if add_to_git:
self.dataset.add_changes(self.paths.STATUS_FILE)
[docs] def get_upgrade(self) -> colrev.ops.upgrade.Upgrade: # pragma: no cover
"""Get an upgrade object"""
import colrev.ops.upgrade
return colrev.ops.upgrade.Upgrade(review_manager=self)
[docs] def get_repare(self) -> colrev.ops.repare.Repare: # pragma: no cover
"""Get a a repare object"""
import colrev.ops.repare
return colrev.ops.repare.Repare(review_manager=self)
[docs] def get_remove_operation(self) -> colrev.ops.remove.Remove: # pragma: no cover
"""Get a a remove object"""
import colrev.ops.remove
return colrev.ops.remove.Remove(review_manager=self)
[docs] def get_merge_operation(self) -> colrev.ops.merge.Merge: # pragma: no cover
"""Get a merge object"""
import colrev.ops.merge
return colrev.ops.merge.Merge(review_manager=self)
[docs] def get_advisor(self) -> colrev.ops.advisor.Advisor: # pragma: no cover
"""Get an advisor object"""
import colrev.ops.advisor
return colrev.ops.advisor.Advisor(review_manager=self)
[docs] def get_checker(self) -> colrev.ops.checker.Checker: # pragma: no cover
"""Get a checker object"""
return colrev.ops.checker.Checker(review_manager=self)
[docs] def get_qm(self) -> colrev.record.qm.quality_model.QualityModel: # pragma: no cover
"""Get the quality model"""
return colrev.record.qm.quality_model.QualityModel(
defects_to_ignore=self.settings.prep.defects_to_ignore
)
[docs] def get_pdf_qm(
self,
) -> colrev.record.qm.quality_model.QualityModel: # pragma: no cover
"""Get the PDF quality model"""
return colrev.record.qm.quality_model.QualityModel(
defects_to_ignore=self.settings.pdf_get.defects_to_ignore,
pdf_mode=True,
path=self.path,
)
[docs] def get_status_stats(
self, *, records: typing.Optional[dict] = None
) -> colrev.process.status.StatusStats: # pragma: no cover
"""Get a status stats object"""
import colrev.process.status
colrev.ops.check.CheckOperation(self)
if records is None:
records = self.dataset.load_records_dict()
return colrev.process.status.get_status_stats(
review_manager=self, records=records
)
[docs] def get_completeness_condition(self) -> bool:
"""Get the completeness condition"""
status_stats = self.get_status_stats()
return status_stats.completeness_condition
[docs] @classmethod
def get_package_manager(
cls,
) -> colrev.package_manager.package_manager.PackageManager: # pragma: no cover
"""Get a package manager object"""
import colrev.package_manager.package_manager
return colrev.package_manager.package_manager.PackageManager()
[docs] @classmethod
def get_grobid_service(
cls,
) -> colrev.env.grobid_service.GrobidService: # pragma: no cover
"""Get a grobid service object"""
import colrev.env.grobid_service
environment_manager = cls.get_environment_manager()
return colrev.env.grobid_service.GrobidService(
environment_manager=environment_manager
)
[docs] def get_tei(
self,
*,
pdf_path: typing.Optional[Path] = None,
tei_path: typing.Optional[Path] = None,
) -> colrev.env.tei_parser.TEIParser: # type: ignore # pragma: no cover
"""Get a tei object"""
import colrev.env.tei_parser
return colrev.env.tei_parser.TEIParser(
environment_manager=self.environment_manager,
pdf_path=self.path / pdf_path if pdf_path else None,
tei_path=self.path / tei_path if tei_path else None,
)
[docs] @classmethod
def get_environment_manager(
cls,
) -> colrev.env.environment_manager.EnvironmentManager: # pragma: no cover
"""Get an environment manager"""
import colrev.env.environment_manager
return colrev.env.environment_manager.EnvironmentManager()
[docs] @classmethod
def get_cached_session(cls) -> requests_cache.CachedSession: # pragma: no cover
"""Get a cached session"""
return requests_cache.CachedSession(
str(Filepaths.PREP_REQUESTS_CACHE_FILE),
backend="sqlite",
expire_after=timedelta(days=30),
)
[docs] @classmethod
def get_resources(cls) -> colrev.env.resources.Resources: # pragma: no cover
"""Get a resources object"""
import colrev.env.resources
return colrev.env.resources.Resources()
[docs] def get_search_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.search.Search: # pragma: no cover
"""Get a search operation object"""
import colrev.ops.search
return colrev.ops.search.Search(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_load_operation(
self,
notify_state_transition_operation: bool = True,
hide_load_explanation: bool = False,
) -> colrev.ops.load.Load: # pragma: no cover
"""Get a load operation object"""
import colrev.ops.load
return colrev.ops.load.Load(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
hide_load_explanation=hide_load_explanation,
)
[docs] def get_prep_operation(
self,
*,
notify_state_transition_operation: bool = True,
polish: bool = False,
cpu: int = 4,
debug: bool = False,
) -> colrev.ops.prep.Prep: # pragma: no cover
"""Get a prep operation object"""
if debug:
import colrev.ops.prep_debug
return colrev.ops.prep_debug.PrepDebug(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
polish=polish,
)
import colrev.ops.prep
return colrev.ops.prep.Prep(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
polish=polish,
cpu=cpu,
)
[docs] def get_prep_man_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.prep_man.PrepMan: # pragma: no cover
"""Get a prep-man operation object"""
import colrev.ops.prep_man
return colrev.ops.prep_man.PrepMan(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_dedupe_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.dedupe.Dedupe: # pragma: no cover
"""Get a dedupe operation object"""
import colrev.ops.dedupe
return colrev.ops.dedupe.Dedupe(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_prescreen_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.prescreen.Prescreen: # pragma: no cover
"""Get a prescreen operation object"""
import colrev.ops.prescreen
return colrev.ops.prescreen.Prescreen(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_pdf_get_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.pdf_get.PDFGet: # pragma: no cover
"""Get a pdf-get operation object"""
import colrev.ops.pdf_get
return colrev.ops.pdf_get.PDFGet(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_pdf_get_man_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.pdf_get_man.PDFGetMan: # pragma: no cover
"""Get a pdf-get-man operation object"""
import colrev.ops.pdf_get_man
return colrev.ops.pdf_get_man.PDFGetMan(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_pdf_prep_operation(
self, *, reprocess: bool = False, notify_state_transition_operation: bool = True
) -> colrev.ops.pdf_prep.PDFPrep: # pragma: no cover
"""Get a pdfprep operation object"""
import colrev.ops.pdf_prep
return colrev.ops.pdf_prep.PDFPrep(
review_manager=self,
reprocess=reprocess,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_pdf_prep_man_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.pdf_prep_man.PDFPrepMan: # pragma: no cover
"""Get a pdf-prep-man operation object"""
import colrev.ops.pdf_prep_man
return colrev.ops.pdf_prep_man.PDFPrepMan(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_screen_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.screen.Screen: # pragma: no cover
"""Get a screen operation object"""
import colrev.ops.screen
return colrev.ops.screen.Screen(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_data_operation(
self, *, notify_state_transition_operation: bool = True
) -> colrev.ops.data.Data: # pragma: no cover
"""Get a data operation object"""
import colrev.ops.data
return colrev.ops.data.Data(
review_manager=self,
notify_state_transition_operation=notify_state_transition_operation,
)
[docs] def get_status_operation(self) -> colrev.ops.status.Status: # pragma: no cover
"""Get a status operation object"""
import colrev.ops.status
return colrev.ops.status.Status(review_manager=self)
[docs] def get_validate_operation(
self,
) -> colrev.ops.validate.Validate: # pragma: no cover
"""Get a validate operation object"""
import colrev.ops.validate
return colrev.ops.validate.Validate(review_manager=self)
[docs] def get_trace_operation(self) -> colrev.ops.trace.Trace: # pragma: no cover
"""Get a trace operation object"""
import colrev.ops.trace
return colrev.ops.trace.Trace(review_manager=self)
[docs] def get_distribute_operation(
self,
) -> colrev.ops.distribute.Distribute: # pragma: no cover
"""Get a distribute operation object"""
import colrev.ops.distribute
return colrev.ops.distribute.Distribute(review_manager=self)
# pylint: disable=line-too-long
[docs] def get_push_operation(self, **kwargs) -> colrev.ops.push.Push: # type: ignore # pragma: no cover
"""Get a push operation object"""
import colrev.ops.push
return colrev.ops.push.Push(review_manager=self, **kwargs)
[docs] def get_pull_operation(self) -> colrev.ops.pull.Pull: # pragma: no cover
"""Get a pull operation object"""
import colrev.ops.pull
return colrev.ops.pull.Pull(review_manager=self)
[docs] def get_connecting_review_manager(
self,
*,
path_str: typing.Optional[str] = None,
force_mode: bool = False,
verbose_mode: bool = False,
) -> ReviewManager: # pragma: no cover
"""Get a (connecting) ReviewManager object for another CoLRev repository"""
return type(self)(
path_str=path_str, force_mode=force_mode, verbose_mode=verbose_mode
)
[docs] @classmethod
def in_test_environment(cls) -> bool:
"""Check whether CoLRev runs in a test environment"""
return "pytest" in os.getcwd()
[docs] @classmethod
def in_ci_environment(
cls,
) -> bool:
"""Check whether CoLRev runs in a continuous-integration environment"""
identifier_list = [
"GITHUB_ACTIONS",
"CIRCLECI",
"TRAVIS",
"GITLAB_CI",
]
return any("true" == os.getenv(x) for x in identifier_list)