import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def chat_greeter(msg, history):
    messages = history + [{"role": "user", "content": msg}]
    response = {"role": "assistant", "content": ""}
    for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
        token = message.choices[0].delta.content
        response["content"] += token
        yield response


# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot(type="messages")
#     msg = gr.Textbox()
#     clear = gr.ClearButton([msg, chatbot])
#
#     msg.submit(chat_greeter, [msg, chatbot], [chatbot])

demo = gr.ChatInterface(chat_greeter, type="messages")
demo.launch()