以Llama-3.2-1B为例
Llama-3.2-1B · 模型库
安装库
pip install modelscope
运行程序
#模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('LLM-Research/Llama-3.2-1B')
下载好的模型在 C:\Users\Administrator\.cache\modelscope\hub\models\LLM-Research\Llama-3___2-1B
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM# 模型所在目录
model_path = r"C:\Users\Administrator\.cache\modelscope\hub\models\LLM-Research\Llama-3___2-1B"# 加载分词器和模型
try:tokenizer = AutoTokenizer.from_pretrained(model_path)model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, use_safetensors=True)
except Exception as e:print(f"加载模型时出现错误: {e}")import syssys.exit(1)# 将模型移到 GPU 上(如果可用)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
while True:# 输入文本input_text = input("请输入一些内容: ")# 对输入文本进行分词input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)# 生成输出try:output = model.generate(input_ids, max_length=150, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)except Exception as e:print(f"生成输出时出现错误: {e}")import syssys.exit(1)# 将输出的 ID 转换为文本output_text = tokenizer.decode(output[0], skip_special_tokens=True)# 打印输入和输出print("输入内容:", input_text)print("输出内容:", output_text)
运行结果(因为模型参数量只有2B,效果不好),成功本地部署人工智障