import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def chat_greeter(msg, history): messages = history + [{"role": "user", "content": msg}] response = {"role": "assistant", "content": ""} for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95): token = message.choices[0].delta.content response["content"] += token yield response demo = gr.ChatInterface(chat_greeter, type="messages") # demo.queue().deploy_discord(to_id="echo-chatbot-discord-bot", discord_bot_token="MTI4NjEzNDcyNTMyODc2OTAyNA.G_vDur.RNHVFJ_KAS8vH6ht06Iz4o21OsEOK2xCVySoHs") demo.launch()