Spaces:

gokaygokay
/

Gemma-2-llamacpp

Running on Zero

App Files Files Community

Gemma-2-llamacpp / app.py

alex16052G

app.py

86e0b44 verified about 2 months ago

raw

history blame

5.71 kB

	import gradio as gr

	import os

	from huggingface_hub.file_download import http_get
	from llama_cpp import Llama

	SYSTEM_PROMPT = "Tú eres ABI, un asistente automático de habla española. Hablas con las personas y las ayudas."

	def obtener_tokens_mensaje(modelo, rol, contenido):
	contenido = f"{rol}\n{contenido}\n</s>"
	contenido = contenido.encode("utf-8")
	return modelo.tokenize(contenido, special=True)

	def obtener_tokens_sistema(modelo):
	mensaje_sistema = {"role": "system", "content": SYSTEM_PROMPT}
	return obtener_tokens_mensaje(modelo, **mensaje_sistema)

	def cargar_modelo(
	directorio: str = ".",
	nombre_modelo: str = "ecastera/eva-mistral-7b-spanish-GGUF",
	url_modelo: str = "https://huggingface.co/ecastera/eva-mistral-7b-spanish-GGUF/resolve/main/Turdus-trained-20-int4.gguf"
	):
	ruta_modelo_final = os.path.join(directorio, nombre_modelo)

	print("Descargando todos los archivos...")
	if not os.path.exists(ruta_modelo_final):
	with open(ruta_modelo_final, "wb") as f:
	http_get(url_modelo, f)
	os.chmod(ruta_modelo_final, 0o777)
	print("¡Archivos descargados!")

	modelo = Llama(
	model_path=ruta_modelo_final,
	n_ctx=2048
	)

	print("¡Modelo cargado!")
	return modelo

	MODELO = cargar_modelo()

	def usuario(mensaje, historial):
	nuevo_historial = historial + [[mensaje, None]]
	return "", nuevo_historial

	def bot(
	historial,
	prompt_sistema,
	top_p,
	top_k,
	temp
	):
	modelo = MODELO
	tokens = obtener_tokens_sistema(modelo)[:]

	for mensaje_usuario, mensaje_bot in historial[:-1]:
	tokens_mensaje = obtener_tokens_mensaje(modelo=modelo, rol="usuario", contenido=mensaje_usuario)
	tokens.extend(tokens_mensaje)
	if mensaje_bot:
	tokens_mensaje = obtener_tokens_mensaje(modelo=modelo, rol="bot", contenido=mensaje_bot)
	tokens.extend(tokens_mensaje)

	ultimo_mensaje_usuario = historial[-1][0]
	tokens_mensaje = obtener_tokens_mensaje(modelo=modelo, rol="usuario", contenido=ultimo_mensaje_usuario)
	tokens.extend(tokens_mensaje)

	tokens_rol = modelo.tokenize("bot\n".encode("utf-8"), special=True)
	tokens.extend(tokens_rol)
	generador = modelo.generate(
	tokens,
	top_k=top_k,
	top_p=top_p,
	temp=temp
	)

	texto_parcial = ""
	for i, token in enumerate(generador):
	if token == modelo.token_eos():
	break
	texto_parcial += modelo.detokenize([token]).decode("utf-8", "ignore")
	historial[-1][1] = texto_parcial
	yield historial

	with gr.Blocks(
	theme=gr.themes.Soft()
	) as demo:
	favicon = '<img src="" width="48px" style="display: inline">'
	gr.Markdown(
	f"""<h1><center>{favicon}Saiga Mistral 7B GGUF Q4_K</center></h1>
	Esta es una demo de un modelo basado en Mistral que habla español
	"""
	)
	with gr.Row():
	with gr.Column(scale=5):
	prompt_sistema = gr.Textbox(label="Prompt del sistema", placeholder="", value=SYSTEM_PROMPT, interactive=False)
	chatbot = gr.Chatbot(label="Diálogo", height=400)
	with gr.Column(min_width=80, scale=1):
	with gr.Tab(label="Parámetros de generación"):
	top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.9,
	step=0.05,
	interactive=True,
	label="Top-p",
	)
	top_k = gr.Slider(
	minimum=10,
	maximum=100,
	value=30,
	step=5,
	interactive=True,
	label="Top-k",
	)
	temp = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=0.01,
	step=0.01,
	interactive=True,
	label="Temperatura"
	)
	with gr.Row():
	with gr.Column():
	msg = gr.Textbox(
	label="Enviar mensaje",
	placeholder="Enviar mensaje",
	show_label=False,
	)
	with gr.Column():
	with gr.Row():
	submit = gr.Button("Enviar")
	stop = gr.Button("Detener")
	clear = gr.Button("Limpiar")
	with gr.Row():
	gr.Markdown(
	"""ADVERTENCIA: El modelo puede generar textos que sean incorrectos fácticamente o inapropiados éticamente. No nos hacemos responsables de esto."""
	)

	# Presionando Enter
	evento_enviar = msg.submit(
	fn=usuario,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False,
	).success(
	fn=bot,
	inputs=[
	chatbot,
	prompt_sistema,
	top_p,
	top_k,
	temp
	],
	outputs=chatbot,
	queue=True,
	)

	# Presionando el botón
	evento_click_enviar = submit.click(
	fn=usuario,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False,
	).success(
	fn=bot,
	inputs=[
	chatbot,
	prompt_sistema,
	top_p,
	top_k,
	temp
	],
	outputs=chatbot,
	queue=True,
	)

	# Detener generación
	stop.click(
	fn=None,
	inputs=None,
	outputs=None,
	cancels=[evento_enviar, evento_click_enviar],
	queue=False,
	)

	# Limpiar historial
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue(max_size=128)
	demo.launch()