ollama本地部署deepseek
# 安装ollama curl -fsSL https://ollama.com/install.sh | sh # linux & mac brew install ollama # mac # 运行服务 ollama serve # 前台运行 sudo systemctl start ollama # 常驻服务 # 停止服务 sudo systemctl stop ollama.service # 运行模型 https://ollama.com/library ollama run qwen2.5:7b # 阿里千问 ollama run deepseek-r1:8b # deepseek推理模型 # 停止运行模型 ollama stop qwen2.5:7b ollama stop deepseek-r1:8b # 删除模型 ollama rm qwen2.5:7b # 通过restapi调用 curl -X POST http://localhost:11434/api/generate -d '{ "model": "deepseek-r1:8b", "prompt":"介绍下deepseek-r1:8b模型" }' # 通过python api调用 import ollama response = ollama.chat(model='deepseek-r1:8b', messages=[ {'role': 'user', 'content': '介绍一下deepseek-r1:8b模型'} ]) # 使用webui# webui https://github.com/rtcfirefly/ollama-ui
# 调优&自定义 创建Modelfile文件,内容如下 FROM deepseek-r1:8b # 这里加入自定义参数, 比如调整tokenizer之类 然后创建模型 ollama create my-deepseek-r1 -f Modelfile 之后就可以运行自定义模型了 ollama run my-deepseek-r1
# 其他优化
deepseek-r1:8b 本地部署需要gpu显存最少8GB,gpu不足会自动用cpu运行,但速度会变慢。
ollama会自动使用gpu,强制使用则在启动时OLLAMA_BACKEND="cuda" ollama run deepseek-r1:8b
linux遇到oom问题,可尝试调整ulimit
ulimit -n 65535 ulimit -u 4096
python调用本地部署deepseek
from langchain_ollama.llms import OllamaLLM
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
template = "写一个关于 {name} 的故事"
prompt_template = ChatPromptTemplate.from_template(template)
embeddings = OllamaEmbeddings()
llm =OllamaLLM(base_url="127.0.0.1:11434",model="deepseek-r1:7b")
chain = prompt_template | llm | StrOutputParser()
res =chain.invoke({"name": "狗"})
print(res)
deepseek-v3调用本地工具
from langchain_ollama import OllamaLLM
from langchain_openai import ChatOpenAI
import os
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from pydantic import SecretStr
from langchain.tools import StructuredTool
api_key = os.getenv("DeepSeekR1Key")
if not api_key:
raise ValueError("DeepSeekApiKey environment variable is not set")
chat_model = ChatOpenAI(
api_key=SecretStr(api_key), # 换成你的key
base_url="https://api.siliconflow.cn/v1",
model="deepseek-ai/DeepSeek-V3",)
your_prompt = ChatPromptTemplate.from_messages([
("system", "你好"),
("user", "{input}"),
("assistant", "问候某人"),
MessagesPlaceholder(variable_name="agent_scratchpad")
])
def your_function():
print("your tool")
your_tool= StructuredTool.from_function(
func=your_function,
name="your_function",
description="发送邮件给某人"
)
agent = create_openai_tools_agent(chat_model, [],your_prompt)
agent_executor = AgentExecutor(agent=agent, tools=[your_tool], max_iterations=3,verbose=True)
response = agent_executor.invoke(
{"input": "帮我发送邮件某人"}
)
deepseek-chat 实现会话功能
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage,HumanMessage,SystemMessage
model = ChatOpenAI(
base_url="https://api.deepseek.com/v1",
model="deepseek-chat",
api_key="you api key" # type: ignore
)
chat_history = []
system_message = SystemMessage(content="You are a helpful AI assistant")
chat_history.append(system_message)
while True:
query = input("You:")
if query.lower() == "exit":
break
chat_history.append(HumanMessage(content=query))
result = model.invoke(chat_history)
response = result.content
chat_history.append(AIMessage(content=response))
print(f"AI:{response}")
deepseek配合chromadb rag
1、准备知识库数据
2、chunk 数据,并向量化持久化存储到chromadb中
import os from langchain.text_splitter import CharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain_community.document_loaders import TextLoader from langchain_chroma import Chroma from langchain.embeddings.base import Embeddings import os from dotenv import load_dotenv from langchain_ollama.llms import OllamaLLM from langchain_community.embeddings import OllamaEmbeddings from langchain_deepseek import ChatDeepSeek embeddings = OllamaEmbeddings( base_url="http://127.0.0.1:11434", model="bge-m3", ) load_dotenv() api_key = os.getenv("DeepSeekR1Key") if not api_key: raise ValueError("DeepSeekApiKey environment variable is not set") current_dir = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(current_dir, "books", "pfdsj.txt") persistent_directory = os.path.join(current_dir, "db", "chroma_db") if not os.path.exists(persistent_directory): print("Persistent directory does not exist") if not os.path.exists(file_path): raise FileNotFoundError( f"The file {file_path} does not exist.please check the path" ) # 创建文本加载器,用于读取文本文件 loader = TextLoader(file_path, encoding="utf-8") # 加载文档内容 documents = loader.load() # 创建文本分割器,将文档分割成更小的块 # chunk_size: 每个文本块的大小 # chunk_overlap: 相邻文本块之间的重叠部分 text_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=100, length_function=len ) # 将文档分割成多个文本块 docs = text_splitter.split_documents(documents) # 使用Chroma向量数据库存储文档 # 将文档向量化并持久化保存到指定目录 db = Chroma.from_documents( docs, embeddings, persist_directory=persistent_directory ) else: print("Vector store already exists. No need to initialize.")
3、查询数据
import os from langchain.text_splitter import CharacterTextSplitter from pydantic import SecretStr from langchain_community.document_loaders import TextLoader from langchain_chroma import Chroma from langchain.embeddings.base import Embeddings import os from dotenv import load_dotenv from langchain_ollama import OllamaEmbeddings load_dotenv() embeddings = OllamaEmbeddings( base_url="http://127.0.0.1:11434", model="bge-m3" ) current_dir = os.path.dirname(os.path.abspath(__file__)) persistent_directory = os.path.join(current_dir, "db", "chroma_db") db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings) query = "孙少平住在哪里?" retriever = db.as_retriever( search_type="similarity_score_threshold", search_kwargs={"k": 3, "score_threshold": 0.3}, ) retriever_docs = retriever.invoke(query)
4、大模型整合检索结果
to be continue...