Source code for qurry.qurrium.experiment.export

"""The module of exporting the experiment data. (:mod:`qurry.qurrium.experiment.export`)"""

import os
from typing import Optional, NamedTuple, Union, Any
from collections.abc import Hashable
from pathlib import Path
import warnings
import gc
import tqdm

from .arguments import CommonparamsDict, REQUIRED_FOLDER
from ...tools import ParallelManager, set_pbar_description
from ...capsule import quickJSON, DEFAULT_ENCODING, DEFAULT_INDENT, DEFAULT_MODE


[docs] class Export(NamedTuple): """Data-stored namedtuple with all experiments data which is jsonable.""" exp_id: str """ID of experiment, which will be packed into `.args.json`.""" exp_name: str """Name of the experiment, which will be packed into `.args.json`. If this experiment is called by multimanager, then this name will never apply as filename.""" # Arguments for multi-experiment serial: Optional[int] """Index of experiment in :class:`~qurry.qurrium.multimanager.multimanager.MultiManager`, which will be packed into `.args.json`.""" summoner_id: Optional[str] """ID of experiment of the :class:`~qurry.qurrium.multimanager.multimanager.MultiManager`, which will be packed into `.args.json`.""" summoner_name: Optional[str] """Name of experiment of the :class:`~qurry.qurrium.multimanager.multimanager.MultiManager`, which will be packed into `.args.json`.""" filename: str """The name of file to be exported, it will be decided by :meth:`~qurry.qurrium.experiment.experiment.ExperimentPrototype.export` when it's called. """ files: dict[str, str] """The list of file to be exported. ### Single experiment: For the :meth:`write` function actually exports 4 different files respecting to `adventure`, `legacy`, `tales`, and `reports` like: .. code-block:: python files = { 'folder': './bla_exp/', 'qurryinfo': './bla_exp/qurryinfo.json', 'args': './bla_exp/args/bla_exp.id={exp_id}.args.json', 'advent': './bla_exp/advent/bla_exp.id={exp_id}.advent.json', 'legacy': './bla_exp/legacy/bla_exp.id={exp_id}.legacy.json', 'tales.dummyx1': './bla_exp/tales/bla_exp.id={exp_id}.dummyx1.json', 'tales.dummyx2': './bla_exp/tales/bla_exp.id={exp_id}.dummyx2.json', ... 'tales.dummyxn': './bla_exp/tales/bla_exp.id={exp_id}.dummyxn.json', 'reports': './bla_exp/reports/bla_exp.id={exp_id}.reports.json', 'reports.tales.dummyz1': './bla_exp/tales/bla_exp.id={exp_id}.dummyz1.reports.json', 'reports.tales.dummyz2': './bla_exp/tales/bla_exp.id={exp_id}.dummyz2.reports.json', ... 'reports.tales.dummyzm': './bla_exp/tales/bla_exp.id={exp_id}.dummyzm.reports.json', } which `bla_exp` is the example filename. ### Multi-experiment: If this experiment is called by :class:`~qurry.qurrium.multimanager.multimanager.MultiManager`, then the it will be named after `summoner_name` as known as the name of :class:`~qurry.qurrium.multimanager.multimanager.MultiManager`. .. code-block:: python files = { 'folder': './BLABLA_project/', 'qurryinfo': './BLABLA_project/qurryinfo.json', 'args': './BLABLA_project/args/index={serial}.id={exp_id}.args.json', 'advent': './BLABLA_project/advent/index={serial}.id={exp_id}.advent.json', 'legacy': './BLABLA_project/legacy/index={serial}.id={exp_id}.legacy.json', 'tales.dummyx1': './BLABLA_project/tales/index={serial}.id={exp_id}.dummyx1.json', 'tales.dummyx2': './BLABLA_project/tales/index={serial}.id={exp_id}.dummyx2.json', ... 'tales.dummyxn': './BLABLA_project/tales/index={serial}.id={exp_id}.dummyxn.json', 'reports': './BLABLA_project/reports/index={serial}.id={exp_id}.reports.json', 'reports.tales.dummyz1': './BLABLA_project/tales/index={serial}.id={exp_id}.dummyz1.reports.json', 'reports.tales.dummyz2': './BLABLA_project/tales/index={serial}.id={exp_id}.dummyz2.reports.json', ... 'reports.tales.dummyzm': './BLABLA_project/tales/index={serial}.id={exp_id}.dummyzm.reports.json', } which `BLBLA_project` is the example :class:`~qurry.qurrium.multimanager.multimanager.MultiManager` name stored at `summoner_name` in :class:`~qurry.qurrium.experiment.arguments.Commonparams.summoner_name`. At this senerio, the `exp_name` will never apply as filename. """ args: dict[str, Any] """Construct the experiment's parameters, which will be packed into `.args.json`.""" commons: CommonparamsDict """Construct the experiment's common parameters, which will be packed into `.args.json`.""" outfields: dict[str, Any] """Recording the data of other unused arguments, which will be packed into `.args.json`.""" adventures: dict[str, Any] """Recording the data of 'beforeward', which will be packed into `.advent.json`. *~ A Great Adventure begins ~*""" legacy: dict[str, Any] """Recording the data of 'afterward', which will be packed into `.legacy.json`. *~ The Legacy remains from the achievement of ancestors ~*""" tales: dict[str, Any] """Recording the data of 'side_product' in 'afterward' and 'beforewards' for API, which will be packed into `.*.tales.json`. *~ Tales of braves circulate ~*""" reports: dict[Hashable, dict[str, Any]] """Recording the data of 'reports', which will be packed into `.reports.json`. ### Reports format: .. code-block:: python reports = { 1: { ...quantities, 'input': { ... }, 'header': { ... }, }, 2: { ...quantities, 'input': { ... }, 'header': { ... }, }, ... {serial}: { ...quantities, 'input': { ... }, 'header': { ... }, }, } *~ The guild concludes the results. ~*""" tales_reports: dict[str, dict[Hashable, dict[str, Any]]] """Recording the data of 'side_product' in 'reports' for API, which will be packed into `.*.reprts.json`. ### Tales Reports format: .. code-block:: python tales_reports = { 'dummyz1': { 1: { ... }, 2: { ... }, ... {serial}: { ... }, }, 'dummyz2': { 1: { ... }, 2: { ... }, ... {serial}: { ... }, }, ... 'dummyz': { 1: { ... }, 2: { ... }, ... {serial}: { ... }, }, } *~ Tales of braves circulate ~*"""
[docs] def write( self, multiprocess: bool = False, pbar: Optional[tqdm.tqdm] = None, ) -> tuple[str, dict[str, str]]: """Export the experiment data, if there is a previous export, then will overwrite. Hint: This function will traversal all objects in the export_set, so it will ensure the jsonable of all objects. And this will reduce the performance of exporting. Args: multiprocess (bool, optional): Whether to use multiprocess to export, Defaults to False. It's dangerous to use multiprocess to export. It may cause memory leak. pbar (Optional[tqdm.tqdm], optional): The progress bar for exporting. Defaults to None. Returns: tuple[str, dict[str, str]]: The first element is the id of experiment, the second element is the dictionary of files of experiment. """ export_set: dict[ str, Union[ dict[str, Any], list[Any], tuple[Any, ...], dict[Hashable, dict[str, Any]], ], ] = {} # args ............... # arguments, commonparams, outfields, files export_set["args"] = { "arguments": self.args, "commonparams": self.commons, "outfields": self.outfields, "files": self.files, } # advent ............. # adventures export_set["advent"] = { "files": self.files, "adventures": self.adventures, } # legacy ............. # legacy export_set["legacy"] = { "files": self.files, "legacy": self.legacy, } # tales .............. # tales for tk, tv in self.tales.items(): export_set[f"tales.{tk}"] = tv if isinstance(tv, (dict, list, tuple)) else [tv] if f"tales.{tk}" not in self.files: warnings.warn(f"tales.{tk} is not in export_names, it's not exported.") # reports ............ # reports export_set["reports"] = { "files": self.files, "reports": self.reports, } # reports.tales ...... # tales_reports for tk, tv in self.tales_reports.items(): export_set[f"reports.tales.{tk}"] = tv if isinstance(tv, (dict, list, tuple)) else [tv] if f"reports.tales.{tk}" not in self.files: warnings.warn(f"reports.tales.{tk} is not in export_names, it's not exported.") # Exportation set_pbar_description( pbar, ( "Exporting " + (f"{self.summoner_name}/" if self.summoner_name else "") + f"{self.exp_name}..." ), ) folder = Path(self.commons["save_location"]) / Path(self.files["folder"]) if not os.path.exists(folder): os.mkdir(folder) for k in REQUIRED_FOLDER: if not os.path.exists(folder / k): os.mkdir(folder / k) if multiprocess: pool = ParallelManager() pool.starmap( quickJSON, [ ( content, str(Path(self.commons["save_location"]) / self.files[filekey]), DEFAULT_MODE, DEFAULT_INDENT, DEFAULT_ENCODING, True, # although it reduces the performance for it will traversal all object, # but it will ensure the jsonable # since all objects are not jsonable by default. Path("./"), ) for filekey, content in export_set.items() ], ) else: for filekey, content in export_set.items(): quickJSON( content=content, filename=str(Path(self.commons["save_location"]) / self.files[filekey]), mode=DEFAULT_MODE, indent=DEFAULT_INDENT, encoding=DEFAULT_ENCODING, jsonable=True, # although it reduces the performance for it will traversal all object, # but it will ensure the jsonable since all objects are not jsonable by default. save_location=Path("./"), ) del export_set gc.collect() return self.exp_id, self.files