Bitsandbytes 文档

AdEMAMix

Hugging Face's logo
加入 Hugging Face 社区

并获取增强的文档体验

开始使用

AdEMAMix

AdEMAMixAdam 优化器的一种变体。

bitsandbytes 还支持分页优化器,这些优化器利用 CUDA 的统一内存,在 GPU 内存耗尽时将内存从 GPU 传输到 CPU。

AdEMAMix

class bitsandbytes.optim.AdEMAMix

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 is_paged: bool = False )

AdEMAMix8bit

class bitsandbytes.optim.AdEMAMix8bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

AdEMAMix32bit

class bitsandbytes.optim.AdEMAMix32bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

PagedAdEMAMix

class bitsandbytes.optim.PagedAdEMAMix

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 )

PagedAdEMAMix8bit

class bitsandbytes.optim.PagedAdEMAMix8bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

PagedAdEMAMix32bit

bitsandbytes.optim.PagedAdEMAMix32bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

< > 在 GitHub 上更新