Replies: 3 comments 1 reply
-
|
我链接的ollama模型 |
Beta Was this translation helpful? Give feedback.
-
|
忘了说我的系统了. |
Beta Was this translation helpful? Give feedback.
-
|
Try for Mac <3 : https://github.com/ggml-org/LlamaBarn How llama.cpp/llama-server: https://unsloth.ai/docs/zh/mo-xing/gemma-4#llama.cpp-zhi-nan |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
我从昨天听说ollama和lmstudio是llama套壳后就下了这里的源码编译启动,没想到还有个webui页但全是英文的我又看不懂,(真心希望老大搞个中文语言),先把源码下到我的windows笔记本上,用vscode打开,网页上复制了在里面查找,可累死我了,一夜没睡,汉化了重新编译,再启动看,里面各个按钮点开看,还有英文,又来,总算汉化了差不多了.













那个模型我就想要把ollama下载的模型利用起来,先用ollama list列出模型列表然后找那个实际文件,做软链接到llama.cpp/models下,但是好像谷歌的Gemini模型llama.cpp都不能加载不知道为什么,我ollama下了三四个用llama全不能加载.希望老大看下.
还有启动老是要打很长命令,叫deepseek帮我做了个脚本来启动.
chen@mm llama.cpp % cat runllama.sh
#!/bin/bash
配置路径
LLAMA_SERVER="$HOME/llama.cpp/bin/llama-server"
MODELS_DIR="$HOME/llama.cpp/models"
DEFAULT_HOST="0.0.0.0"
DEFAULT_PORT="8080"
DEFAULT_N_GPU_LAYERS="auto"
颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
获取模型列表函数(兼容 macOS)
get_models() {
local models=()
for f in "$MODELS_DIR"/*.gguf; do
if [[ -f "$f" ]]; then
basename "$f" .gguf
fi
done
printf '%s\n' "${models[@]}"
}
显示帮助
show_help() {
echo "用法: $0 [选项]"
echo ""
echo "选项:"
echo " -h, --help 显示帮助"
echo " -m, --model MODEL 直接指定模型名称(跳过选择)"
echo " --host HOST 服务器主机地址 (默认: $DEFAULT_HOST)"
echo " -p, --port PORT 服务器端口 (默认: $DEFAULT_PORT)"
echo ""
echo "示例:"
echo " $0 # 交互式选择"
echo " $0 -m qwen3.5_9b # 直接启动指定模型"
echo " $0 --host 127.0.0.1 --port 8080 # 只允许本地访问"
}
检查 llama-server 是否存在
if [[ ! -f "$LLAMA_SERVER" ]]; then$LLAMA_SERVER$ {NC}"
echo -e "${RED}错误: 找不到 llama-server 在
exit 1
fi
检查模型目录
if [[ ! -d "$MODELS_DIR" ]]; then$MODELS_DIR$ {NC}"
echo -e "${RED}错误: 模型目录不存在
exit 1
fi
获取模型列表(兼容 macOS 不使用 mapfile)
models=()
for f in "$MODELS_DIR"/*.gguf; do
if [[ -f "$f" ]]; then
models+=("$(basename "$f" .gguf)")
fi
done
if [[ ${#models[@]} -eq 0 ]]; then$MODELS_DIR 中没有找到模型文件$ {NC}"
echo -e "${RED}错误: 在
echo "请先创建模型软链接"
exit 1
fi
解析命令行参数
HOST="$DEFAULT_HOST"
PORT="$DEFAULT_PORT"
MODEL_NAME=""
while [[ $# -gt 0 ]]; do$1$ {NC}"
case $1 in
-h|--help)
show_help
exit 0
;;
-m|--model)
MODEL_NAME="$2"
shift 2
;;
--host)
HOST="$2"
shift 2
;;
-p|--port)
PORT="$2"
shift 2
;;
*)
echo -e "${RED}未知参数:
show_help
exit 1
;;
esac
done
如果没有指定模型,显示选择菜单
if [[ -z "$MODEL_NAME" ]]; then
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} llama-server 启动脚本${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
fi
验证模型文件
MODEL_FILE="$MODELS_DIR/${MODEL_NAME}.gguf"$MODEL_FILE$ {NC}"
if [[ ! -f "$MODEL_FILE" ]]; then
echo -e "${RED}错误: 找不到模型文件
exit 1
fi
echo ""
echo -e "${GREEN}已选择模型: ${MODEL_NAME}${NC}"
echo ""
从模型文件获取原生上下文大小
echo -e "${YELLOW}正在读取模型信息...${NC}"
MODEL_CTX=""
if [[ -f "$LLAMA_SERVER" ]]; then
# 尝试从模型文件获取上下文大小
MODEL_CTX=$("$HOME/llama.cpp/build/bin/llama-gguf" -f "$MODEL_FILE" 2>/dev/null | grep "llama.context_length" | awk '{print $2}')
fi
默认参数
DEFAULT_TEMP="0.80"
DEFAULT_TOP_K="40"
DEFAULT_TOP_P="0.95"
DEFAULT_PRESENCE_PENALTY="0.00"
DEFAULT_FREQUENCY_PENALTY="0.00"
DEFAULT_CTX_SIZE="${MODEL_CTX:-4096}"
DEFAULT_N_PREDICT="512"
DEFAULT_BATCH_SIZE="512"
DEFAULT_UBATCH_SIZE="512"
echo -e "${GREEN}模型原生上下文:$DEFAULT_CTX_SIZE$ {NC}"
echo ""
显示默认值并询问修改
echo -e "${YELLOW}========================================${NC}"
echo -e "${YELLOW}服务器参数配置${NC}"
echo -e "${YELLOW}========================================${NC}"
echo -e "${BLUE}提示: 直接回车使用默认值${NC}"
echo ""
主机地址
read -p "主机地址 [默认: $HOST]: " input
HOST=${input:-$HOST}
端口
read -p "端口 [默认: $PORT]: " input
PORT=${input:-$PORT}
echo ""
echo -e "${YELLOW}========================================${NC}"
echo -e "${YELLOW}模型参数配置${NC}"
echo -e "${YELLOW}========================================${NC}"
echo ""
Temperature
read -p "Temperature [默认: $DEFAULT_TEMP]: " input
TEMP=${input:-$DEFAULT_TEMP}
Top K
read -p "Top K [默认: $DEFAULT_TOP_K]: " input
TOP_K=${input:-$DEFAULT_TOP_K}
Top P
read -p "Top P [默认: $DEFAULT_TOP_P]: " input
TOP_P=${input:-$DEFAULT_TOP_P}
Presence Penalty
read -p "Presence Penalty [默认: $DEFAULT_PRESENCE_PENALTY]: " input
PRESENCE_PENALTY=${input:-$DEFAULT_PRESENCE_PENALTY}
Frequency Penalty
read -p "Frequency Penalty [默认: $DEFAULT_FREQUENCY_PENALTY]: " input
FREQUENCY_PENALTY=${input:-$DEFAULT_FREQUENCY_PENALTY}
上下文大小
read -p "上下文大小 [默认: $DEFAULT_CTX_SIZE]: " input
CTX_SIZE=${input:-$DEFAULT_CTX_SIZE}
最大预测 token 数
read -p "最大预测 Token 数 [默认: $DEFAULT_N_PREDICT, -1=无限制]: " input
N_PREDICT=${input:-$DEFAULT_N_PREDICT}
Batch Size
read -p "Batch Size [默认: $DEFAULT_BATCH_SIZE]: " input
BATCH_SIZE=${input:-$DEFAULT_BATCH_SIZE}
GPU 层数
read -p "GPU 层数 [默认: $DEFAULT_N_GPU_LAYERS, auto=自动, all=全部, 数字=指定层数]: " input
N_GPU_LAYERS=${input:-$DEFAULT_N_GPU_LAYERS}
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN}配置总结${NC}"
echo -e "${GREEN}========================================${NC}"
echo -e "模型: ${BLUE}$MODEL_NAME${NC}"
echo -e "主机: ${BLUE}http://$HOST:$PORT${NC}"
echo -e "Temperature: ${BLUE}$TEMP${NC}"
echo -e "Top K: ${BLUE}$TOP_K${NC}"
echo -e "Top P: ${BLUE}$TOP_P${NC}"
echo -e "Presence Penalty: ${BLUE}$PRESENCE_PENALTY${NC}"
echo -e "Frequency Penalty:${BLUE}$FREQUENCY_PENALTY${NC}"
echo -e "上下文大小: ${BLUE}$CTX_SIZE${NC}"
echo -e "最大 Token: ${BLUE}$N_PREDICT${NC}"
echo -e "Batch Size: ${BLUE}$BATCH_SIZE${NC}"
echo -e "GPU 层数: ${BLUE}$N_GPU_LAYERS${NC}"
echo ""
read -p "启动服务器? (y/n): " confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
echo "已取消"
exit 0
fi
echo ""
echo -e "${GREEN}正在启动 llama-server...${NC}"
echo "================================================"
构建命令
CMD="$LLAMA_SERVER
-m "$MODEL_FILE"
--host $HOST
--port $PORT
--temp $TEMP
--top-k $TOP_K
--top-p $TOP_P
--presence-penalty $PRESENCE_PENALTY
--frequency-penalty $FREQUENCY_PENALTY
--ctx-size $CTX_SIZE
--n-predict $N_PREDICT
--batch-size $BATCH_SIZE"
添加 GPU 层数参数
if [[ "$N_GPU_LAYERS" != "auto" ]]; then
if [[ "$N_GPU_LAYERS" == "all" ]]; then
CMD="$CMD -ngl 999"
else
CMD="$CMD -ngl $N_GPU_LAYERS"
fi
fi
echo -e "${BLUE}执行命令:${NC}"
echo "$CMD"
echo "================================================"
echo ""
执行命令
eval "$CMD"
chen@mm llama.cpp %
最后希望llama.cpp越来越好.
Beta Was this translation helpful? Give feedback.
All reactions