model (str, 可选) — 用于 chat-completion 的模型。可以是 Hugging Face Hub 上托管的模型 ID 或已部署的 Inference Endpoint 的 URL。如果未提供,将使用基于聊天的文本生成的默认推荐模型。有关更多详细信息,请参阅 https://huggingface.co/tasks/text-generation。如果 model 是模型 ID,则会将其作为 model 参数传递到服务器。如果要在请求负载中设置 model 时定义自定义 URL,则必须在初始化 InferenceClient 时设置 base_url。
>>> from huggingface_hub import InferenceClient
>>> messages = [{"role": "user", "content": "What is the capital of France?"}]
>>> client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
>>> client.chat_completion(messages, max_tokens=100)
ChatCompletionOutput(
choices=[
ChatCompletionOutputComplete(
finish_reason='eos_token',
index=0,
message=ChatCompletionOutputMessage(
role='assistant',
content='The capital of France is Paris.',
name=None,
tool_calls=None
),
logprobs=None
)
],
created=1719907176,
id='',
model='meta-llama/Meta-Llama-3-8B-Instruct',
object='text_completion',
system_fingerprint='2.0.4-sha-f426a33',
usage=ChatCompletionOutputUsage(
completion_tokens=8,
prompt_tokens=17,
total_tokens=25
)
)
流式传输示例
>>> from huggingface_hub import InferenceClient
>>> messages = [{"role": "user", "content": "What is the capital of France?"}]
>>> client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
>>> for token in client.chat_completion(messages, max_tokens=10, stream=True):
... print(token)
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content='The', role='assistant'), index=0, finish_reason=None)], created=1710498504)
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' capital', role='assistant'), index=0, finish_reason=None)], created=1710498504)
(...)
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' may', role='assistant'), index=0, finish_reason=None)], created=1710498504)
使用 OpenAI 语法的示例
# instead of `from openai import OpenAI`from huggingface_hub import InferenceClient
# instead of `client = OpenAI(...)`
client = InferenceClient(
base_url=...,
api_key=...,
)
output = client.chat.completions.create(
model="meta-llama/Meta-Llama-3-8B-Instruct",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Count to 10"},
],
stream=True,
max_tokens=1024,
)
for chunk in output:
print(chunk.choices[0].delta.content)
直接使用具有额外(提供商特定)参数的第三方提供商的示例。使用量将从您的 Together AI 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="together", # Use Together AI provider... api_key="<together_api_key>", # Pass your Together API key directly... )
>>> client.chat_completion(
... model="meta-llama/Meta-Llama-3-8B-Instruct",
... messages=[{"role": "user", "content": "What is the capital of France?"}],
... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
... )
通过 Hugging Face Routing 使用第三方提供商的示例。使用量将从您的 Hugging Face 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="sambanova", # Use Sambanova provider... api_key="hf_...", # Pass your HF token... )
>>> client.chat_completion(
... model="meta-llama/Meta-Llama-3-8B-Instruct",
... messages=[{"role": "user", "content": "What is the capital of France?"}],
... )
使用图像 + 文本作为输入的示例
>>> from huggingface_hub import InferenceClient
# provide a remote URL>>> image_url ="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"# or a base64-encoded image>>> image_path = "/path/to/image.jpeg">>> withopen(image_path, "rb") as f:
... base64_image = base64.b64encode(f.read()).decode("utf-8")
>>> image_url = f"data:image/jpeg;base64,{base64_image}">>> client = InferenceClient("meta-llama/Llama-3.2-11B-Vision-Instruct")
>>> output = client.chat.completions.create(
... messages=[
... {
... "role": "user",
... "content": [
... {
... "type": "image_url",
... "image_url": {"url": image_url},
... },
... {
... "type": "text",
... "text": "Describe this image in one sentence.",
... },
... ],
... },
... ],
... )
>>> output
The image depicts the iconic Statue of Liberty situated in New York Harbor, New York, on a clear day.
使用工具的示例
>>> client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
>>> messages = [
... {
... "role": "system",
... "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
... },
... {
... "role": "user",
... "content": "What's the weather like the next 3 days in San Francisco, CA?",
... },
... ]
>>> tools = [
... {
... "type": "function",
... "function": {
... "name": "get_current_weather",
... "description": "Get the current weather",
... "parameters": {
... "type": "object",
... "properties": {
... "location": {
... "type": "string",
... "description": "The city and state, e.g. San Francisco, CA",
... },
... "format": {
... "type": "string",
... "enum": ["celsius", "fahrenheit"],
... "description": "The temperature unit to use. Infer this from the users location.",
... },
... },
... "required": ["location", "format"],
... },
... },
... },
... {
... "type": "function",
... "function": {
... "name": "get_n_day_weather_forecast",
... "description": "Get an N-day weather forecast",
... "parameters": {
... "type": "object",
... "properties": {
... "location": {
... "type": "string",
... "description": "The city and state, e.g. San Francisco, CA",
... },
... "format": {
... "type": "string",
... "enum": ["celsius", "fahrenheit"],
... "description": "The temperature unit to use. Infer this from the users location.",
... },
... "num_days": {
... "type": "integer",
... "description": "The number of days to forecast",
... },
... },
... "required": ["location", "format", "num_days"],
... },
... },
... },
... ]
>>> response = client.chat_completion(
... model="meta-llama/Meta-Llama-3-70B-Instruct",
... messages=messages,
... tools=tools,
... tool_choice="auto",
... max_tokens=500,
... )
>>> response.choices[0].message.tool_calls[0].function
ChatCompletionOutputFunctionDefinition(
arguments={
'location': 'San Francisco, CA',
'format': 'fahrenheit',
'num_days': 3
},
name='get_n_day_weather_forecast',
description=None
)
使用 response_format 的示例
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
>>> messages = [
... {
... "role": "user",
... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I saw and when?",
... },
... ]
>>> response_format = {
... "type": "json",
... "value": {
... "properties": {
... "location": {"type": "string"},
... "activity": {"type": "string"},
... "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5},
... "animals": {"type": "array", "items": {"type": "string"}},
... },
... "required": ["location", "activity", "animals_seen", "animals"],
... },
... }
>>> response = client.chat_completion(
... messages=messages,
... response_format=response_format,
... max_tokens=500,
... )
>>> response.choices[0].message.content
'{
y": "bike ride",
": ["puppy", "cat", "raccoon"],
_seen": 3,
n": "park"}'
prompt_name (str, 可选) — 应该用于编码的提示的名称。如果未设置,则不会应用任何提示。必须是Sentence Transformers 配置 prompts 字典中的键。例如,如果 prompt_name 是 “query” 且 prompts 是 {“query”: “query: ”,…},那么句子 “What is the capital of France?” 将被编码为 “query: What is the capital of France?”,因为提示文本将添加到任何要编码的文本之前。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.fill_mask("The goal of life is <mask>.")
[
FillMaskOutputElement(score=0.06897063553333282, token=11098, token_str=' happiness', sequence='The goal of life is happiness.'),
FillMaskOutputElement(score=0.06554922461509705, token=45075, token_str=' immortality', sequence='The goal of life is immortality.')
]
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.image_to_text("cat.jpg")
'a cat standing in a grassy field '>>> client.image_to_text("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg")
'a dog laying on the grass next to a flower pot '
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.question_answering(question="What's my name?", context="My name is Clara and I live in Berkeley.")
QuestionAnsweringOutputElement(answer='Clara', end=16, score=0.9326565265655518, start=11)
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.sentence_similarity(
... "Machine learning is so easy.",
... other_sentences=[
... "Deep learning is so straightforward.",
... "This is so difficult, like rocket science.",
... "I can't believe how much I struggled with this.",
... ],
... )
[0.7785726189613342, 0.45876261591911316, 0.2906220555305481]
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.summarization("The Eiffel tower...")
SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
exp(-entropy(softmax(next_token_logits))) 时,才会被考虑。后一个术语在直觉上是预期的下一个 token 概率,按 sqrt(eta_cutoff) 缩放。在论文中,建议值范围为 3e-4 到 2e-3,具体取决于模型的大小。有关更多详细信息,请参阅 Truncation Sampling as Language Model Desmoothing。
>>> from huggingface_hub import InferenceClient
>>> lyrics = '''
... [verse]
... In the town where I was born
... Lived a man who sailed to sea
... And he told us of his life
... In the land of submarines
... So we sailed on to the sun
... 'Til we found a sea of green
... And we lived beneath the waves
... In our yellow submarine
... [chorus]
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... '''>>> genres = "pavarotti-style tenor voice">>> client = InferenceClient(
... provider="fal-ai",
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
... api_key=...,
... )
>>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
>>> withopen("output.mp3", "wb") as f:
... f.write(audio)
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="fal-ai", # Using fal.ai provider... api_key="fal-ai-api-key", # Pass your fal.ai API key... )
>>> video = client.text_to_video(
... "A majestic lion running in a fantasy forest",
... model="tencent/HunyuanVideo",
... )
>>> withopen("lion.mp4", "wb") as file:
... file.write(video)
通过 Hugging Face Routing 使用第三方提供商的示例。使用量将从您的 Hugging Face 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="replicate", # Using replicate provider... api_key="hf_...", # Pass your HF token... )
>>> video = client.text_to_video(
... "A cat running in a park",
... model="genmo/mochi-1-preview",
... )
>>> withopen("cat.mp4", "wb") as file:
... file.write(video)
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.token_classification("My name is Sarah Jessica Parker but you can call me Jessica")
[
TokenClassificationOutputElement(
entity_group='PER',
score=0.9971321225166321,
word='Sarah Jessica Parker',
start=11,
end=31,
),
TokenClassificationOutputElement(
entity_group='PER',
score=0.9773476123809814,
word='Jessica',
start=52,
end=59,
)
]
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.translation("My name is Wolfgang and I live in Berlin")
'Mein Name ist Wolfgang und ich lebe in Berlin.'>>> client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
指定语言
>>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
"Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> text = (
... "A new model offers an explanation for how the Galilean satellites formed around the solar system's"... "largest world. Konstantin Batygin did not set out to solve one of the solar system's most puzzling"... " mysteries when he went for a run up a hill in Nice, France."... )
>>> labels = ["space & cosmos", "scientific discovery", "microbiology", "robots", "archeology"]
>>> client.zero_shot_classification(text, labels)
[
ZeroShotClassificationOutputElement(label='scientific discovery', score=0.7961668968200684),
ZeroShotClassificationOutputElement(label='space & cosmos', score=0.18570658564567566),
ZeroShotClassificationOutputElement(label='microbiology', score=0.00730885099619627),
ZeroShotClassificationOutputElement(label='archeology', score=0.006258360575884581),
ZeroShotClassificationOutputElement(label='robots', score=0.004559356719255447),
]
>>> client.zero_shot_classification(text, labels, multi_label=True)
[
ZeroShotClassificationOutputElement(label='scientific discovery', score=0.9829297661781311),
ZeroShotClassificationOutputElement(label='space & cosmos', score=0.755190908908844),
ZeroShotClassificationOutputElement(label='microbiology', score=0.0005462635890580714),
ZeroShotClassificationOutputElement(label='archeology', score=0.00047131875180639327),
ZeroShotClassificationOutputElement(label='robots', score=0.00030448526376858354),
]
multi_label=True 和自定义 hypothesis_template 的示例
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.zero_shot_classification(
... text="I really like our dinner and I'm very happy. I don't like the weather though.",
... labels=["positive", "negative", "pessimistic", "optimistic"],
... multi_label=True,
... hypothesis_template="This text is {} towards the weather"... )
[
ZeroShotClassificationOutputElement(label='negative', score=0.9231801629066467),
ZeroShotClassificationOutputElement(label='pessimistic', score=0.8760990500450134),
ZeroShotClassificationOutputElement(label='optimistic', score=0.0008674879791215062),
ZeroShotClassificationOutputElement(label='positive', score=0.0005250611575320363)
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.audio_classification("audio.flac")
[
AudioClassificationOutputElement(score=0.4976358711719513, label='hap'),
AudioClassificationOutputElement(score=0.3677836060523987, label='neu'),
...
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> audio_output = await client.audio_to_audio("audio.flac")
>>> asyncfor i, item inenumerate(audio_output):
>>> withopen(f"output_{i}.flac", "wb") as f:
f.write(item.blob)
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.automatic_speech_recognition("hello_world.flac").text
"hello world"
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> messages = [{"role": "user", "content": "What is the capital of France?"}]
>>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
>>> await client.chat_completion(messages, max_tokens=100)
ChatCompletionOutput(
choices=[
ChatCompletionOutputComplete(
finish_reason='eos_token',
index=0,
message=ChatCompletionOutputMessage(
role='assistant',
content='The capital of France is Paris.',
name=None,
tool_calls=None
),
logprobs=None
)
],
created=1719907176,
id='',
model='meta-llama/Meta-Llama-3-8B-Instruct',
object='text_completion',
system_fingerprint='2.0.4-sha-f426a33',
usage=ChatCompletionOutputUsage(
completion_tokens=8,
prompt_tokens=17,
total_tokens=25
)
)
流式传输示例
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> messages = [{"role": "user", "content": "What is the capital of France?"}]
>>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
>>> asyncfor token inawait client.chat_completion(messages, max_tokens=10, stream=True):
... print(token)
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content='The', role='assistant'), index=0, finish_reason=None)], created=1710498504)
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' capital', role='assistant'), index=0, finish_reason=None)], created=1710498504)
(...)
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' may', role='assistant'), index=0, finish_reason=None)], created=1710498504)
使用 OpenAI 语法的示例
# Must be run in an async context# instead of `from openai import OpenAI`from huggingface_hub import AsyncInferenceClient
# instead of `client = OpenAI(...)`
client = AsyncInferenceClient(
base_url=...,
api_key=...,
)
output = await client.chat.completions.create(
model="meta-llama/Meta-Llama-3-8B-Instruct",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Count to 10"},
],
stream=True,
max_tokens=1024,
)
for chunk in output:
print(chunk.choices[0].delta.content)
直接使用具有额外(提供商特定)参数的第三方提供商的示例。使用量将从您的 Together AI 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="together", # Use Together AI provider... api_key="<together_api_key>", # Pass your Together API key directly... )
>>> client.chat_completion(
... model="meta-llama/Meta-Llama-3-8B-Instruct",
... messages=[{"role": "user", "content": "What is the capital of France?"}],
... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
... )
通过 Hugging Face Routing 使用第三方提供商的示例。使用量将从您的 Hugging Face 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="sambanova", # Use Sambanova provider... api_key="hf_...", # Pass your HF token... )
>>> client.chat_completion(
... model="meta-llama/Meta-Llama-3-8B-Instruct",
... messages=[{"role": "user", "content": "What is the capital of France?"}],
... )
使用图像 + 文本作为输入的示例
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
# provide a remote URL>>> image_url ="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"# or a base64-encoded image>>> image_path = "/path/to/image.jpeg">>> withopen(image_path, "rb") as f:
... base64_image = base64.b64encode(f.read()).decode("utf-8")
>>> image_url = f"data:image/jpeg;base64,{base64_image}">>> client = AsyncInferenceClient("meta-llama/Llama-3.2-11B-Vision-Instruct")
>>> output = await client.chat.completions.create(
... messages=[
... {
... "role": "user",
... "content": [
... {
... "type": "image_url",
... "image_url": {"url": image_url},
... },
... {
... "type": "text",
... "text": "Describe this image in one sentence.",
... },
... ],
... },
... ],
... )
>>> output
The image depicts the iconic Statue of Liberty situated in New York Harbor, New York, on a clear day.
使用工具的示例
# Must be run in an async context>>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
>>> messages = [
... {
... "role": "system",
... "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
... },
... {
... "role": "user",
... "content": "What's the weather like the next 3 days in San Francisco, CA?",
... },
... ]
>>> tools = [
... {
... "type": "function",
... "function": {
... "name": "get_current_weather",
... "description": "Get the current weather",
... "parameters": {
... "type": "object",
... "properties": {
... "location": {
... "type": "string",
... "description": "The city and state, e.g. San Francisco, CA",
... },
... "format": {
... "type": "string",
... "enum": ["celsius", "fahrenheit"],
... "description": "The temperature unit to use. Infer this from the users location.",
... },
... },
... "required": ["location", "format"],
... },
... },
... },
... {
... "type": "function",
... "function": {
... "name": "get_n_day_weather_forecast",
... "description": "Get an N-day weather forecast",
... "parameters": {
... "type": "object",
... "properties": {
... "location": {
... "type": "string",
... "description": "The city and state, e.g. San Francisco, CA",
... },
... "format": {
... "type": "string",
... "enum": ["celsius", "fahrenheit"],
... "description": "The temperature unit to use. Infer this from the users location.",
... },
... "num_days": {
... "type": "integer",
... "description": "The number of days to forecast",
... },
... },
... "required": ["location", "format", "num_days"],
... },
... },
... },
... ]
>>> response = await client.chat_completion(
... model="meta-llama/Meta-Llama-3-70B-Instruct",
... messages=messages,
... tools=tools,
... tool_choice="auto",
... max_tokens=500,
... )
>>> response.choices[0].message.tool_calls[0].function
ChatCompletionOutputFunctionDefinition(
arguments={
'location': 'San Francisco, CA',
'format': 'fahrenheit',
'num_days': 3
},
name='get_n_day_weather_forecast',
description=None
)
使用 response_format 的示例
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
>>> messages = [
... {
... "role": "user",
... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I saw and when?",
... },
... ]
>>> response_format = {
... "type": "json",
... "value": {
... "properties": {
... "location": {"type": "string"},
... "activity": {"type": "string"},
... "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5},
... "animals": {"type": "array", "items": {"type": "string"}},
... },
... "required": ["location", "activity", "animals_seen", "animals"],
... },
... }
>>> response = await client.chat_completion(
... messages=messages,
... response_format=response_format,
... max_tokens=500,
... )
>>> response.choices[0].message.content
'{
y": "bike ride",
": ["puppy", "cat", "raccoon"],
_seen": 3,
n": "park"}'
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
prompt_name (str, optional) — 应该用于编码的提示名称。如果未设置,则不会应用任何提示。必须是Sentence Transformers配置prompts字典中的键。例如,如果 prompt_name 是 “query” 且 prompts 是 {“query”: “query: ”,…},则句子 “What is the capital of France?” 将被编码为 “query: What is the capital of France?”,因为提示文本将在任何要编码的文本之前添加。
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.fill_mask("The goal of life is <mask>.")
[
FillMaskOutputElement(score=0.06897063553333282, token=11098, token_str=' happiness', sequence='The goal of life is happiness.'),
FillMaskOutputElement(score=0.06554922461509705, token=45075, token_str=' immortality', sequence='The goal of life is immortality.')
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient("https://jzgu0buei5.us-east-1.aws.endpoints.huggingface.cloud")
>>> await client.health_check()
True
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.image_classification("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg")
[ImageClassificationOutputElement(label='Blenheim spaniel', score=0.9779096841812134), ...]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.image_segmentation("cat.jpg")
[ImageSegmentationOutputElement(score=0.989008, label='LABEL_184', mask=<PIL.PngImagePlugin.PngImageFile image mode=L size=400x300 at 0x7FDD2B129CC0>), ...]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> image = await client.image_to_image("cat.jpg", prompt="turn the cat into a tiger")
>>> image.save("tiger.jpg")
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.image_to_text("cat.jpg")
'a cat standing in a grassy field '>>> await client.image_to_text("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg")
'a dog laying on the grass next to a flower pot '
# Must be run in an async contextthon>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
# Discover zero-shot-classification models currently deployed>>> models = await client.list_deployed_models()
>>> models["zero-shot-classification"]
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
# List from only 1 framework>>> await client.list_deployed_models("text-generation-inference")
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.object_detection("people.jpg")
[ObjectDetectionOutputElement(score=0.9486683011054993, label='person', box=ObjectDetectionBoundingBox(xmin=59, ymin=39, xmax=420, ymax=510)), ...]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.question_answering(question="What's my name?", context="My name is Clara and I live in Berkeley.")
QuestionAnsweringOutputElement(answer='Clara', end=16, score=0.9326565265655518, start=11)
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.sentence_similarity(
... "Machine learning is so easy.",
... other_sentences=[
... "Deep learning is so straightforward.",
... "This is so difficult, like rocket science.",
... "I can't believe how much I struggled with this.",
... ],
... )
[0.7785726189613342, 0.45876261591911316, 0.2906220555305481]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.summarization("The Eiffel tower...")
SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.text_classification("I like you")
[
TextClassificationOutputElement(label='POSITIVE', score=0.9998695850372314),
TextClassificationOutputElement(label='NEGATIVE', score=0.0001304351753788069),
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> image = await client.text_to_image("An astronaut riding a horse on the moon.")
>>> image.save("astronaut.png")
>>> image = await client.text_to_image(
... "An astronaut riding a horse on the moon.",
... negative_prompt="low resolution, blurry",
... model="stabilityai/stable-diffusion-2-1",
... )
>>> image.save("better_astronaut.png")
直接使用第三方提供商的示例。 使用量将计入您的 fal.ai 帐户。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="fal-ai", # Use fal.ai provider... api_key="fal-ai-api-key", # Pass your fal.ai API key... )
>>> image = client.text_to_image(
... "A majestic lion in a fantasy forest",
... model="black-forest-labs/FLUX.1-schnell",
... )
>>> image.save("lion.png")
通过 Hugging Face Routing 使用第三方提供商的示例。使用量将从您的 Hugging Face 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="replicate", # Use replicate provider... api_key="hf_...", # Pass your HF token... )
>>> image = client.text_to_image(
... "An astronaut riding a horse on the moon.",
... model="black-forest-labs/FLUX.1-dev",
... )
>>> image.save("astronaut.png")
使用 Replicate 提供商和额外参数的示例
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="replicate", # Use replicate provider... api_key="hf_...", # Pass your HF token... )
>>> image = client.text_to_image(
... "An astronaut riding a horse on the moon.",
... model="black-forest-labs/FLUX.1-schnell",
... extra_body={"output_quality": 100},
... )
>>> image.save("astronaut.png")
# Must be run in an async context>>> from pathlib import Path
>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> audio = await client.text_to_speech("Hello world")
>>> Path("hello_world.flac").write_bytes(audio)
>>> from huggingface_hub import InferenceClient
>>> lyrics = '''
... [verse]
... In the town where I was born
... Lived a man who sailed to sea
... And he told us of his life
... In the land of submarines
... So we sailed on to the sun
... 'Til we found a sea of green
... And we lived beneath the waves
... In our yellow submarine
... [chorus]
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... '''>>> genres = "pavarotti-style tenor voice">>> client = InferenceClient(
... provider="fal-ai",
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
... api_key=...,
... )
>>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
>>> withopen("output.mp3", "wb") as f:
... f.write(audio)
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="fal-ai", # Using fal.ai provider... api_key="fal-ai-api-key", # Pass your fal.ai API key... )
>>> video = client.text_to_video(
... "A majestic lion running in a fantasy forest",
... model="tencent/HunyuanVideo",
... )
>>> withopen("lion.mp4", "wb") as file:
... file.write(video)
通过 Hugging Face Routing 使用第三方提供商的示例。使用量将从您的 Hugging Face 账户中扣除。
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
... provider="replicate", # Using replicate provider... api_key="hf_...", # Pass your HF token... )
>>> video = client.text_to_video(
... "A cat running in a park",
... model="genmo/mochi-1-preview",
... )
>>> withopen("cat.mp4", "wb") as file:
... file.write(video)
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.token_classification("My name is Sarah Jessica Parker but you can call me Jessica")
[
TokenClassificationOutputElement(
entity_group='PER',
score=0.9971321225166321,
word='Sarah Jessica Parker',
start=11,
end=31,
),
TokenClassificationOutputElement(
entity_group='PER',
score=0.9773476123809814,
word='Jessica',
start=52,
end=59,
)
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.translation("My name is Wolfgang and I live in Berlin")
'Mein Name ist Wolfgang und ich lebe in Berlin.'>>> await client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
指定语言
>>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
"Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.visual_question_answering(
... image="https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg",
... question="What is the animal doing?"... )
[
VisualQuestionAnsweringOutputElement(score=0.778609573841095, answer='laying down'),
VisualQuestionAnsweringOutputElement(score=0.6957435607910156, answer='sitting'),
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> text = (
... "A new model offers an explanation for how the Galilean satellites formed around the solar system's"... "largest world. Konstantin Batygin did not set out to solve one of the solar system's most puzzling"... " mysteries when he went for a run up a hill in Nice, France."... )
>>> labels = ["space & cosmos", "scientific discovery", "microbiology", "robots", "archeology"]
>>> await client.zero_shot_classification(text, labels)
[
ZeroShotClassificationOutputElement(label='scientific discovery', score=0.7961668968200684),
ZeroShotClassificationOutputElement(label='space & cosmos', score=0.18570658564567566),
ZeroShotClassificationOutputElement(label='microbiology', score=0.00730885099619627),
ZeroShotClassificationOutputElement(label='archeology', score=0.006258360575884581),
ZeroShotClassificationOutputElement(label='robots', score=0.004559356719255447),
]
>>> await client.zero_shot_classification(text, labels, multi_label=True)
[
ZeroShotClassificationOutputElement(label='scientific discovery', score=0.9829297661781311),
ZeroShotClassificationOutputElement(label='space & cosmos', score=0.755190908908844),
ZeroShotClassificationOutputElement(label='microbiology', score=0.0005462635890580714),
ZeroShotClassificationOutputElement(label='archeology', score=0.00047131875180639327),
ZeroShotClassificationOutputElement(label='robots', score=0.00030448526376858354),
]
multi_label=True 和自定义 hypothesis_template 的示例
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.zero_shot_classification(
... text="I really like our dinner and I'm very happy. I don't like the weather though.",
... labels=["positive", "negative", "pessimistic", "optimistic"],
... multi_label=True,
... hypothesis_template="This text is {} towards the weather"... )
[
ZeroShotClassificationOutputElement(label='negative', score=0.9231801629066467),
ZeroShotClassificationOutputElement(label='pessimistic', score=0.8760990500450134),
ZeroShotClassificationOutputElement(label='optimistic', score=0.0008674879791215062),
ZeroShotClassificationOutputElement(label='positive', score=0.0005250611575320363)
]
# Must be run in an async context>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.zero_shot_image_classification(
... "https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg",
... labels=["dog", "cat", "horse"],
... )
[ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...]
loaded (bool) — 模型当前是否已加载到 HF 的 Inference API 中。模型是按需加载的,这导致用户的第一个请求花费更长的时间。如果模型已加载,则可以确保它处于健康状态。
framework (str) — The name of the framework that the model was built with, such as ‘transformers’ or ‘text-generation-inference’.
This Dataclass represents the model status in the HF Inference API.
InferenceAPI
InferenceAPI is the legacy way to call the Inference API. The interface is more simplistic and requires knowing the input parameters and output format for each task. It also lacks the ability to connect to other services like Inference Endpoints or AWS SageMaker. InferenceAPI will soon be deprecated so we recommend using InferenceClient whenever possible. Check out this guide to learn how to switch from InferenceAPI to InferenceClient in your scripts.
>>> from huggingface_hub.inference_api import InferenceApi
>>> # Mask-fill example>>> inference = InferenceApi("bert-base-uncased")
>>> inference(inputs="The goal of life is [MASK].")
[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
>>> # Question Answering example>>> inference = InferenceApi("deepset/roberta-base-squad2")
>>> inputs = {
... "question": "What's my name?",
... "context": "My name is Clara and I live in Berkeley.",
... }
>>> inference(inputs)
{'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'}
>>> # Zero-shot example>>> inference = InferenceApi("typeform/distilbert-base-uncased-mnli")
>>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!">>> params = {"candidate_labels": ["refund", "legal", "faq"]}
>>> inference(inputs, params)
{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
>>> # Overriding configured task>>> inference = InferenceApi("bert-base-uncased", task="feature-extraction")
>>> # Text-to-image>>> inference = InferenceApi("stabilityai/stable-diffusion-2-1")
>>> inference("cat")
<PIL.PngImagePlugin.PngImageFile image (...)>
>>> # Return as raw response to parse the output yourself>>> inference = InferenceApi("mio/amadeus")
>>> response = inference("hello world", raw_response=True)
>>> response.headers
{"Content-Type": "audio/flac", ...}
>>> response.content # raw bytes from serverb'(...)'