import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def chat_greeter(msg, history): messages = history + [{"role": "user", "content": msg}] response = {"role": "assistant", "content": ""} for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95): token = message.choices[0].delta.content response["content"] += token yield response # with gr.Blocks() as demo: # chatbot = gr.Chatbot(type="messages") # msg = gr.Textbox() # clear = gr.ClearButton([msg, chatbot]) # # msg.submit(chat_greeter, [msg, chatbot], [chatbot]) demo = gr.ChatInterface(chat_greeter, type="messages") demo.launch()