Optim

vmc/optim/optimizer/VMCOptimizer

 1class VMCOptimizer(BaseVMCOptimizer):
 2
 3    def __init__(
 4        self,
 5        nqs: DDP,
 6        sampler_param: dict,
 7        electron_info: ElectronInfo,
 8        opt: Optimizer,
 9        lr_scheduler: Union[List[LRScheduler], LRScheduler] = None,
10        max_iter: int = 2000,
11        dtype: Dtype = None,
12        HF_init: int = 0,
13        external_model: any = None,
14        check_point: str = None,
15        read_model_only: bool = False,
16        only_sample: bool = False,
17        pre_CI: CIWavefunction = None,
18        pre_train_info: dict = None,
19        clean_opt_state: bool = False,
20        noise_lambda: float = 0.05,
21        method_grad: str = "AD",
22        sr: bool = False,
23        method_jacobian: str = "vector",
24        interval: int = 100,
25        prefix: str = "VMC",
26        MAX_AD_DIM: int = -1,
27        kfac: KFACPreconditioner = None,  # type: ignore
28        use_clip_grad: bool = False,
29        max_grad_norm: float = 1.0,
30        max_grad_value: float = 1.0,
31        start_clip_grad: int = None,
32        clip_grad_method: str = "l2",
33        clip_grad_scheduler: Optional[Callable[[int], float]] = None,
34        use_3sigma: bool = False,
35        k_step_clip: int = 100,
36        use_spin_raising: bool = False,
37        spin_raising_coeff: float = 1.0,
38        only_output_spin_raising: bool = False,
39        spin_raising_scheduler: Optional[Callable[[int], float]] = None,
40    )

opt-params

 1from utils import ElectronInfo, Dtype
 2
 3opt_type = optim.AdamW
 4opt_params = {"lr": 0.001, "betas": (0.9, 0.999)}
 5opt = opt_type(model.parameters(), **opt_params)
 6
 7prefix = "vmc"
 8def clip_grad_scheduler(step):
 9   if step <= 4000:
10      max_grad = 1.0
11   elif step <= 8000:
12      max_grad = 0.1
13   else:
14      max_grad = 0.01
15   return max_grad
16
17vmc_opt_params = {
18    "nqs": model,
19    "opt": opt,
20    # "lr_scheduler": lr_scheduler,
21    # "read_model_only": True,
22    "dtype": dtype,
23    "sampler_param": sampler_param,
24    # "only_sample": True,
25    "electron_info": electron_info,
26    # "use_spin_raising": True,
27    # "spin_raising_coeff": 1.0,
28    # "only_output_spin_raising": True,
29    "max_iter": 5000,
30    "interval": 100,
31    "MAX_AD_DIM": 80000,
32    # "check_point": f"./h50/focus-init/checkpoint/H50-2.00-oao-mps-rnn-dcut-30-222-focus-20w-checkpoint.pth",
33    "prefix": prefix,
34    "use_clip_grad": True,
35    "max_grad_norm": 1,
36    "start_clip_grad": -1,
37    "clip_grad_scheduler": clip_grad_scheduler,
38}
  • nqs: Ansatz(e.g. Transformer, MPS-RNN, Graph-MPS-RNN).

  • opt: Optimizer(e.g., Adam, Adamw, SGD).

  • lr_scheduler: LRScheduler, Default: None.

  • read_model_only: Read model from the checkpoint file.

  • dtype: data-dtype: (e.g., Dtype(dtype=torch.complex128, device="cuda"))

  • sampler_param: see sample-param

  • only_sample: No calculating gradient. This is used to calculate energy.

  • max_iter: the number of the iteration.

  • interval: the time of the saving the checkpoint file.

  • MAX_AD_DIM: the nbatch of the backward.

  • check_point: Read model/optimizer/lr_scheduler from the checkpoint file, Default: None.

  • prefix: the prefix of the checkpoint file, e.g., vmc-checkpoint.pth.

  • use_clip_grad: clip gradient, Default: False.

  • max_grad_norm: the max of the l2-norm when clipping gradient.

  • start_clip_grad: clip gradient from the k-th iteration.

  • clip_grad_scheduler: the scheduler of clipping gradient, this is Callable[[int], float].