Lighteval 文档

使用 Python API

Hugging Face's logo
加入 Hugging Face 社区

并获得增强的文档体验

开始使用

使用 Python API

Lighteval 可以在自定义的 Python 脚本中使用。要评估一个模型,你需要设置一个 EvaluationTrackerPipelineParameters、一个 model 或一个 model_config,以及一个 Pipeline

之后,只需运行管道并保存结果即可。

import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.utils import EnvConfig
from lighteval.utils.imports import is_accelerate_available

if is_accelerate_available():
    from datetime import timedelta
    from accelerate import Accelerator, InitProcessGroupKwargs
    accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
else:
    accelerator = None

def main():
    evaluation_tracker = EvaluationTracker(
        output_dir="./results",
        save_details=True,
        push_to_hub=True,
        hub_results_org="your user name",
    )

    pipeline_params = PipelineParameters(
        launcher_type=ParallelismManager.ACCELERATE,
        env_config=EnvConfig(cache_dir="tmp/"),
        custom_task_directory=None, # if using a custom task
        # Remove the 2 parameters below once your configuration is tested
        override_batch_size=1,
        max_samples=10
    )

    model_config = VLLMModelConfig(
            model_name="HuggingFaceH4/zephyr-7b-beta",
            dtype="float16",
            use_chat_template=True,
    )

    task = "helm|mmlu|5|1"

    pipeline = Pipeline(
        tasks=task,
        pipeline_parameters=pipeline_params,
        evaluation_tracker=evaluation_tracker,
        model_config=model_config,
    )

    pipeline.evaluate()
    pipeline.save_and_push_results()
    pipeline.show_results()

if __name__ == "__main__":
    main()
< > 在 GitHub 上更新