binqiangliu commited on
Commit
498ddeb
1 Parent(s): a837a98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -1,11 +1,9 @@
1
  # import dependencies
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
4
-
5
  import os
6
  import gradio as gr
7
  #from google.colab import drive
8
-
9
  import chromadb
10
  from langchain.llms import HuggingFacePipeline
11
  from langchain.document_loaders import TextLoader
@@ -17,6 +15,9 @@ from langchain.document_loaders import PyPDFDirectoryLoader
17
  from langchain.chains import ConversationalRetrievalChain
18
  from langchain.memory import ConversationBufferMemory
19
 
 
 
 
20
  # specify model huggingface mode name
21
  model_name = "anakin87/zephyr-7b-alpha-sharded"
22
  #https://huggingface.co/anakin87/zephyr-7b-alpha-sharded
@@ -31,10 +32,11 @@ def load_quantized_model(model_name: str):
31
  :return: Loaded quantized model.
32
  """
33
  bnb_config = BitsAndBytesConfig(
34
- load_in_4bit=True,
 
35
  #bnb_4bit_use_double_quant=True,
36
  bnb_4bit_use_double_quant=False,
37
- bnb_4bit_quant_type="nf4",
38
  #bnb_4bit_compute_dtype=torch.bfloat16
39
  )
40
 
@@ -42,7 +44,6 @@ def load_quantized_model(model_name: str):
42
  model_name,
43
  load_in_4bit=True,
44
  #torch_dtype=torch.bfloat16,
45
- #torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
46
  quantization_config=bnb_config
47
  )
48
  return model
@@ -57,7 +58,7 @@ def initialize_tokenizer(model_name: str):
57
  """
58
  tokenizer = AutoTokenizer.from_pretrained(model_name)
59
  tokenizer.bos_token_id = 1 # Set beginning of sentence token id
60
- return tokenizer
61
 
62
  # load model
63
  model = load_quantized_model(model_name)
@@ -125,7 +126,6 @@ def create_conversation(query: str, chat_history: list) -> tuple:
125
  chat_history.append((query, result['answer']))
126
  return '', chat_history
127
 
128
-
129
  except Exception as e:
130
  chat_history.append((query, e))
131
  return '', chat_history
 
1
  # import dependencies
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
 
4
  import os
5
  import gradio as gr
6
  #from google.colab import drive
 
7
  import chromadb
8
  from langchain.llms import HuggingFacePipeline
9
  from langchain.document_loaders import TextLoader
 
15
  from langchain.chains import ConversationalRetrievalChain
16
  from langchain.memory import ConversationBufferMemory
17
 
18
+ #import locale
19
+ #locale.getpreferredencoding = lambda: "UTF-8"
20
+
21
  # specify model huggingface mode name
22
  model_name = "anakin87/zephyr-7b-alpha-sharded"
23
  #https://huggingface.co/anakin87/zephyr-7b-alpha-sharded
 
32
  :return: Loaded quantized model.
33
  """
34
  bnb_config = BitsAndBytesConfig(
35
+ #load_in_4bit=True,
36
+ load_in_4bit=False,
37
  #bnb_4bit_use_double_quant=True,
38
  bnb_4bit_use_double_quant=False,
39
+ bnb_4bit_quant_type="nf4"
40
  #bnb_4bit_compute_dtype=torch.bfloat16
41
  )
42
 
 
44
  model_name,
45
  load_in_4bit=True,
46
  #torch_dtype=torch.bfloat16,
 
47
  quantization_config=bnb_config
48
  )
49
  return model
 
58
  """
59
  tokenizer = AutoTokenizer.from_pretrained(model_name)
60
  tokenizer.bos_token_id = 1 # Set beginning of sentence token id
61
+ return tokenizer
62
 
63
  # load model
64
  model = load_quantized_model(model_name)
 
126
  chat_history.append((query, result['answer']))
127
  return '', chat_history
128
 
 
129
  except Exception as e:
130
  chat_history.append((query, e))
131
  return '', chat_history