雄安工作总结；WDD-AI服务器

构建内容prompt
2026-06-08 09:28:55 +08:00 · 2026-05-27 17:44:02 +08:00
18 changed files with 1646 additions and 0 deletions
--- a/13-构建专家-SHELL/shell脚本/shell-prompt-2.md
+++ b/13-构建专家-SHELL/shell脚本/shell-prompt-2.md
--- a/13-构建专家-SHELL/shell脚本/shell-prompt.md
+++ b/13-构建专家-SHELL/shell脚本/shell-prompt.md
--- a/13-构建专家-SHELL/构建专家/build-release.ps1
+++ b/13-构建专家-SHELL/构建专家/build-release.ps1
@@ -0,0 +1,420 @@
+#!/usr/bin/env pwsh
+#Requires -Version 7.5
+
+[CmdletBinding()]
+param(
+    [ValidateSet("sync", "build", "all", "clean")]
+    [string]$Action = "all",
+
+    [ValidateSet("linux-x86_64", "linux-aarch64", "all")]
+    [string]$Target = "all",
+
+    [ValidateSet("dev", "release")]
+    [string]$BuildProfile = "dev",
+
+    [Alias("RunnableHostIp", "TargetHostIp")]
+    [string[]]$RuntimeHostIp = @(),
+
+    [string]$OutputDir = "build/release",
+
+    [Parameter(Mandatory = $true)]
+    [string]$LinuxHostUser,
+
+    [Parameter(Mandatory = $true)]
+    [string]$LinuxHostIp,
+
+    [Parameter(Mandatory = $true)]
+    [string]$LinuxRemoteWorkspaceDir,
+
+    [Parameter(Mandatory = $true)]
+    [string]$WindowsSshKeyPath,
+
+    [Parameter(Mandatory = $true)]
+    [string]$WindowsRsyncExe,
+
+    [string[]]$RsyncExcludes = @(
+        ".git/",
+        ".idea/",
+        ".vscode/",
+        "build/",
+        "bin/"
+    ),
+
+    [bool]$ObfuscateBuild = $true,
+    [bool]$UpxBuild = $true,
+    [bool]$EmbedRkeBinaries = $true,
+    [string]$RkeVersion = "v1.8.13",
+    [string]$GarbleSeed = "",
+    [bool]$GarbleLiterals = $false,
+    [string]$GarbleMatch = "",
+    [bool]$AllowK8sBreakingGarble = $false,
+    [string]$UpxArgs = "--best --lzma"
+)
+
+Set-StrictMode -Version Latest
+$ErrorActionPreference = "Stop"
+
+if ($BuildProfile -eq "release") {
+    throw "本地 PowerShell 构建禁止 release 模式。release 构建仅允许在受控 Runner 中执行，并由超级管理员凭据授权。"
+}
+
+$ModuleName = "rmdc-watchdog"
+$ModuleRoot = Split-Path -Parent $PSScriptRoot
+$WorkspaceRoot = Split-Path -Parent $ModuleRoot
+
+function Write-Log {
+    param(
+        [Parameter(Mandatory = $true)][ValidateSet("INFO", "WARN", "SUCCESS")][string]$Level,
+        [Parameter(Mandatory = $true)][string]$Message
+    )
+    $ts = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+    Write-Host "[$ts] [$Level] $Message"
+}
+
+function Resolve-FullPath {
+    param([Parameter(Mandatory = $true)][string]$Path)
+    if (-not (Test-Path -LiteralPath $Path)) {
+        throw "路径不存在：$Path"
+    }
+    return (Resolve-Path -LiteralPath $Path).Path
+}
+
+function Assert-AbsoluteWindowsPath {
+    param(
+        [Parameter(Mandatory = $true)][string]$Path,
+        [Parameter(Mandatory = $true)][string]$Name
+    )
+    if (-not [System.IO.Path]::IsPathRooted($Path)) {
+        throw "$Name 必须是 Windows 绝对路径：$Path"
+    }
+}
+
+function Assert-AbsoluteLinuxPath {
+    param(
+        [Parameter(Mandatory = $true)][string]$Path,
+        [Parameter(Mandatory = $true)][string]$Name
+    )
+    if (-not $Path.StartsWith("/")) {
+        throw "$Name 必须是 Linux 绝对路径：$Path"
+    }
+}
+
+function Assert-IPAddress {
+    param(
+        [Parameter(Mandatory = $true)][string]$Value,
+        [Parameter(Mandatory = $true)][string]$Name
+    )
+    $parsed = [System.Net.IPAddress]::None
+    if (-not [System.Net.IPAddress]::TryParse($Value, [ref]$parsed)) {
+        throw "$Name 必须是有效 IP 地址：$Value"
+    }
+}
+
+function Normalize-IPAddressList {
+    param(
+        [Parameter(Mandatory = $true)][string[]]$Values,
+        [Parameter(Mandatory = $true)][string]$Name
+    )
+    $result = [System.Collections.Generic.List[string]]::new()
+    foreach ($value in $Values) {
+        if ([string]::IsNullOrWhiteSpace($value)) {
+            continue
+        }
+        $parts = $value -split '[,;\s]+'
+        foreach ($part in $parts) {
+            if ([string]::IsNullOrWhiteSpace($part)) {
+                continue
+            }
+            $candidate = $part.Trim()
+            Assert-IPAddress -Value $candidate -Name $Name
+            if (-not $result.Contains($candidate)) {
+                [void]$result.Add($candidate)
+            }
+        }
+    }
+    if ($result.Count -eq 0) {
+        throw "$Name 必须至少包含一个有效 IP 地址。"
+    }
+    return @($result)
+}
+
+function Get-SshExe {
+    if ($null -ne $script:ResolvedRsyncExe) {
+        $rsyncDir = Split-Path -Parent $script:ResolvedRsyncExe
+        $rsyncSsh = Join-Path $rsyncDir "ssh.exe"
+        if (Test-Path -LiteralPath $rsyncSsh) {
+            return $rsyncSsh
+        }
+    }
+
+    $cmd = Get-Command ssh.exe -ErrorAction SilentlyContinue
+    if ($null -eq $cmd) {
+        $cmd = Get-Command ssh -ErrorAction SilentlyContinue
+    }
+    if ($null -eq $cmd) {
+        throw "未找到 ssh/ssh.exe，请安装 OpenSSH 客户端或使用 cwRsync 自带 ssh.exe。"
+    }
+    return $cmd.Source
+}
+
+function Convert-ToRsyncPath {
+    param([Parameter(Mandatory = $true)][string]$WindowsPath)
+
+    $full = [System.IO.Path]::GetFullPath($WindowsPath)
+    if ($full -match '^[A-Za-z]:\\') {
+        $drive = $full.Substring(0, 1).ToLowerInvariant()
+        $rest = $full.Substring(2) -replace '\\', '/'
+        return "/cygdrive/$drive$rest"
+    }
+    if ($full.StartsWith("/")) {
+        return $full
+    }
+    throw "无法转换为 rsync 可识别路径：$WindowsPath"
+}
+
+function Invoke-External {
+    param(
+        [Parameter(Mandatory = $true)][string]$Exe,
+        [Parameter(Mandatory = $true)][string[]]$Arguments,
+        [Parameter()][string]$StdinContent
+    )
+
+    Write-Log -Level INFO -Message ("执行命令: {0} {1}" -f $Exe, ($Arguments -join " "))
+
+    if ($PSBoundParameters.ContainsKey("StdinContent")) {
+        $tmp = [System.IO.Path]::GetTempFileName()
+        try {
+            $content = $StdinContent -replace "`r`n", "`n" -replace "`r", "`n"
+            $utf8NoBom = New-Object System.Text.UTF8Encoding($false)
+            [System.IO.File]::WriteAllText($tmp, $content, $utf8NoBom)
+            $proc = Start-Process -FilePath $Exe -ArgumentList $Arguments -RedirectStandardInput $tmp -Wait -NoNewWindow -PassThru
+            if ($null -eq $proc -or $proc.ExitCode -ne 0) {
+                $exitCode = if ($null -eq $proc) { -1 } else { $proc.ExitCode }
+                throw "命令执行失败，退出码：$exitCode"
+            }
+        }
+        finally {
+            Remove-Item -Path $tmp -Force -ErrorAction SilentlyContinue
+        }
+        return
+    }
+
+    & $Exe @Arguments
+    if ($LASTEXITCODE -ne 0) {
+        throw "命令执行失败，退出码：$LASTEXITCODE"
+    }
+}
+
+function New-RemoteShellScript {
+    param([Parameter(Mandatory = $true)][string]$Body)
+    @"
+set -Eeuo pipefail
+
+log() {
+  printf '[%s] [REMOTE] %s\n' "`$(date '+%F %T')" "`$*"
+}
+
+$Body
+"@
+}
+
+function Invoke-RemoteBash {
+    param([Parameter(Mandatory = $true)][string]$ScriptContent)
+    $sshArgs = @(
+        "-i", $script:ResolvedSshKeyPath,
+        "-o", "StrictHostKeyChecking=no",
+        "-o", "UserKnownHostsFile=/dev/null",
+        "$script:LinuxHostUser@$script:LinuxHostIp",
+        "bash", "-s", "--"
+    )
+    Invoke-External -Exe $script:SshExe -Arguments $sshArgs -StdinContent $ScriptContent
+}
+
+function Convert-ToShellSingleQuoted {
+    param([Parameter(Mandatory = $true)][AllowEmptyString()][string]$Value)
+    return "'" + ($Value -replace "'", "'""'""'") + "'"
+}
+
+function Get-LocalBranch {
+    param([Parameter(Mandatory = $true)][string]$RepoPath)
+    $branch = (git -C $RepoPath symbolic-ref --quiet --short HEAD 2>$null)
+    if ([string]::IsNullOrWhiteSpace($branch)) {
+        $branch = (git -C $RepoPath rev-parse --short HEAD 2>$null)
+    }
+    if ([string]::IsNullOrWhiteSpace($branch)) {
+        return "detached"
+    }
+    return $branch.Trim()
+}
+
+function Get-LocalGitTag {
+    param([Parameter(Mandatory = $true)][string]$RepoPath)
+    $tag = (git -C $RepoPath describe --tags --abbrev=0 2>$null)
+    if ([string]::IsNullOrWhiteSpace($tag)) {
+        return "v0.0.0"
+    }
+    return $tag.Trim()
+}
+
+function Get-LocalCommit {
+    param([Parameter(Mandatory = $true)][string]$RepoPath)
+    $commit = (git -C $RepoPath rev-parse --short HEAD 2>$null)
+    if ([string]::IsNullOrWhiteSpace($commit)) {
+        return "unknown"
+    }
+    return $commit.Trim()
+}
+
+Assert-AbsoluteLinuxPath -Path $LinuxRemoteWorkspaceDir -Name "LinuxRemoteWorkspaceDir"
+Assert-AbsoluteWindowsPath -Path $WindowsSshKeyPath -Name "WindowsSshKeyPath"
+Assert-AbsoluteWindowsPath -Path $WindowsRsyncExe -Name "WindowsRsyncExe"
+
+$ResolvedWorkspaceRoot = Resolve-FullPath -Path $WorkspaceRoot
+$ResolvedSshKeyPath = Resolve-FullPath -Path $WindowsSshKeyPath
+$ResolvedRsyncExe = Resolve-FullPath -Path $WindowsRsyncExe
+$SshExe = Get-SshExe
+
+$GoWorkPath = Join-Path $ResolvedWorkspaceRoot "go.work"
+if (-not (Test-Path -LiteralPath $GoWorkPath)) {
+    throw "workspace 根目录缺少 go.work：$ResolvedWorkspaceRoot"
+}
+
+$LocalBranch = Get-LocalBranch -RepoPath $ModuleRoot
+$LocalGitTag = Get-LocalGitTag -RepoPath $ModuleRoot
+$LocalCommit = Get-LocalCommit -RepoPath $ModuleRoot
+$RemoteModuleDir = "$LinuxRemoteWorkspaceDir/$ModuleName"
+
+$EffectiveRuntimeHostIps = ""
+if ($BuildProfile -eq "dev") {
+    $normalizedRuntimeHostIps = Normalize-IPAddressList -Values $RuntimeHostIp -Name "RuntimeHostIp"
+    $EffectiveRuntimeHostIps = ($normalizedRuntimeHostIps -join ",")
+    $ObfuscateBuild = $false
+    $UpxBuild = $false
+}
+else {
+    $ObfuscateBuild = $true
+    $UpxBuild = $true
+}
+
+Write-Log -Level INFO -Message "workspace=$ResolvedWorkspaceRoot"
+Write-Log -Level INFO -Message "module=$ModuleName branch=$LocalBranch tag=$LocalGitTag commit=$LocalCommit target=$Target profile=$BuildProfile runtime_host_ip=$EffectiveRuntimeHostIps"
+Write-Log -Level INFO -Message "remote=${LinuxHostUser}@${LinuxHostIp}:${LinuxRemoteWorkspaceDir}"
+
+function Invoke-RemotePrepareDir {
+    $workspaceQ = Convert-ToShellSingleQuoted -Value $LinuxRemoteWorkspaceDir
+    $script = New-RemoteShellScript -Body @"
+log "prepare workspace: $LinuxRemoteWorkspaceDir"
+mkdir -p $workspaceQ
+"@
+    Invoke-RemoteBash -ScriptContent $script
+}
+
+function Invoke-RsyncSync {
+    $localRsyncPath = Convert-ToRsyncPath -WindowsPath $ResolvedWorkspaceRoot
+    $rsyncSshKeyPath = Convert-ToRsyncPath -WindowsPath $ResolvedSshKeyPath
+    $rsyncSshExePath = Convert-ToRsyncPath -WindowsPath $SshExe
+    $remoteTarget = "${LinuxHostUser}@${LinuxHostIp}:${LinuxRemoteWorkspaceDir}/"
+
+    $rsyncArgs = @(
+        "-az",
+        "--delete",
+        "--force",
+        "--omit-dir-times",
+        "--no-perms",
+        "--no-owner",
+        "--no-group"
+    )
+    foreach ($exclude in $RsyncExcludes) {
+        $rsyncArgs += @("--exclude", $exclude)
+    }
+    $rsyncArgs += @(
+        "-e", "`"$rsyncSshExePath`" -i `"$rsyncSshKeyPath`" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null",
+        "$localRsyncPath/",
+        $remoteTarget
+    )
+
+    Invoke-External -Exe $ResolvedRsyncExe -Arguments $rsyncArgs
+}
+
+function Invoke-RemoteClean {
+    $workspaceQ = Convert-ToShellSingleQuoted -Value $LinuxRemoteWorkspaceDir
+    $script = New-RemoteShellScript -Body @"
+log "cleanup workspace: $LinuxRemoteWorkspaceDir"
+rm -rf $workspaceQ
+"@
+    Invoke-RemoteBash -ScriptContent $script
+}
+
+function Invoke-RemoteBuild {
+    $targetQ = Convert-ToShellSingleQuoted -Value $Target
+    $outputQ = Convert-ToShellSingleQuoted -Value $OutputDir
+    $moduleDirQ = Convert-ToShellSingleQuoted -Value $RemoteModuleDir
+    $branchQ = Convert-ToShellSingleQuoted -Value $LocalBranch
+    $obfuscateBuildValue = if ($ObfuscateBuild) { "true" } else { "false" }
+    $upxBuildValue = if ($UpxBuild) { "true" } else { "false" }
+    $embedRkeValue = if ($EmbedRkeBinaries) { "true" } else { "false" }
+    $obfuscateBuildQ = Convert-ToShellSingleQuoted -Value $obfuscateBuildValue
+    $upxBuildQ = Convert-ToShellSingleQuoted -Value $upxBuildValue
+    $embedRkeQ = Convert-ToShellSingleQuoted -Value $embedRkeValue
+    $rkeVersionQ = Convert-ToShellSingleQuoted -Value $RkeVersion
+    $garbleSeedQ = Convert-ToShellSingleQuoted -Value $GarbleSeed
+    $garbleLiteralsValue = if ($GarbleLiterals) { "true" } else { "false" }
+    $garbleLiteralsQ = Convert-ToShellSingleQuoted -Value $garbleLiteralsValue
+    $garbleMatchQ = Convert-ToShellSingleQuoted -Value $GarbleMatch
+    $allowK8sBreakingGarbleValue = if ($AllowK8sBreakingGarble) { "true" } else { "false" }
+    $allowK8sBreakingGarbleQ = Convert-ToShellSingleQuoted -Value $allowK8sBreakingGarbleValue
+    $upxArgsQ = Convert-ToShellSingleQuoted -Value $UpxArgs
+    $gitTagQ = Convert-ToShellSingleQuoted -Value $LocalGitTag
+    $gitBranchQ = Convert-ToShellSingleQuoted -Value $LocalBranch
+    $gitCommitQ = Convert-ToShellSingleQuoted -Value $LocalCommit
+    $buildProfileQ = Convert-ToShellSingleQuoted -Value $BuildProfile
+    $runtimeHostIpQ = Convert-ToShellSingleQuoted -Value $EffectiveRuntimeHostIps
+
+    $script = New-RemoteShellScript -Body @"
+log "build module=$ModuleName branch=$LocalBranch target=$Target profile=$BuildProfile"
+cd $moduleDirQ
+if [ -d .git ]; then
+  git checkout $branchQ >/dev/null 2>&1 || true
+fi
+export BUILD_PROFILE=$buildProfileQ
+export BUILD_RUNTIME_HOST_IP=$runtimeHostIpQ
+export OBFUSCATE_BUILD=$obfuscateBuildQ
+export UPX_BUILD=$upxBuildQ
+export EMBED_RKE_BINARIES=$embedRkeQ
+export RKE_VERSION=$rkeVersionQ
+export STRICT_SECURITY=1
+export GARBLE_SEED=$garbleSeedQ
+export GARBLE_LITERALS=$garbleLiteralsQ
+export GARBLE_MATCH=$garbleMatchQ
+export ALLOW_K8S_BREAKING_GARBLE=$allowK8sBreakingGarbleQ
+export UPX_ARGS=$upxArgsQ
+export BUILD_GIT_TAG=$gitTagQ
+export BUILD_GIT_BRANCH=$gitBranchQ
+export BUILD_GIT_COMMIT=$gitCommitQ
+./scripts/build-release.sh $targetQ $outputQ
+log "build done: module=$ModuleName"
+"@
+    Invoke-RemoteBash -ScriptContent $script
+}
+
+switch ($Action) {
+    "clean" {
+        Invoke-RemoteClean
+        Write-Log -Level SUCCESS -Message "远端清理完成"
+    }
+    "sync" {
+        Invoke-RemotePrepareDir
+        Invoke-RsyncSync
+        Write-Log -Level SUCCESS -Message "rsync 同步完成"
+    }
+    "build" {
+        Invoke-RemoteBuild
+        Write-Log -Level SUCCESS -Message "远端构建完成"
+    }
+    "all" {
+        Invoke-RemotePrepareDir
+        Invoke-RsyncSync
+        Invoke-RemoteBuild
+        Write-Log -Level SUCCESS -Message "rsync 同步 + 远端构建完成"
+    }
+}
--- a/13-构建专家-SHELL/构建专家/最终构建-prompt.md
+++ b/13-构建专家-SHELL/构建专家/最终构建-prompt.md
@@ -0,0 +1,236 @@
+你是一名资深 DevOps、Docker、Docker Compose、PowerShell、Linux 与 Go 构建专家，负责设计、编写、审查并优化生产级构建、同步、发布与远程执行流程。
+
+你的输出应专业、严谨、可落地，优先提供可直接执行的脚本、配置文件和操作步骤。所有脚本必须具备明确参数、严格校验、清晰日志、稳定错误处理和良好的跨平台路径兼容性。
+
+一、核心能力要求
+
+1. Docker 与 Docker Compose
+   - 能够编写生产级 Dockerfile。
+   - 能够设计 docker-compose.yml 与相关环境配置。
+   - 能够处理多阶段构建、构建缓存、镜像体积优化、基础镜像选择、权限控制等问题。
+   - 能够为不同 CPU 架构构建镜像，包括 linux/amd64 与 linux/arm64。
+   - 能够使用 Docker Buildx 创建并推送多架构镜像。
+   - 能够处理 Docker 构建过程中的网络、权限、依赖、平台架构不匹配等问题。
+
+2. Go 构建
+   - 熟悉 Go module、go.work、交叉编译、CGO、ldflags、版本信息注入等构建机制。
+   - 能够处理 Go 项目在不同 Linux 架构下的构建问题。
+   - 能够区分 dev 与 release 构建模式。
+   - 能够根据构建目标生成 linux-x86_64、linux-aarch64 或 all 构建产物。
+   - 能够处理混淆、压缩、内嵌二进制资源、版本号、Git 分支、Git Tag、Git Commit 等构建元信息。
+
+3. 中国大陆服务器环境适配
+   - 当目标服务器位于中国大陆境内时，必须配置必要的加速源。
+   - 应覆盖 Docker registry mirror、Go module proxy、Linux 包管理器镜像源等。
+   - 加速配置应集中管理，避免在脚本中分散硬编码。
+   - 应优先保证构建过程在网络不稳定环境下可重复执行。
+
+二、Windows 远程操作 Linux 服务器要求
+
+1. 本地环境
+   - 本地控制端为 Windows。
+   - 远程执行入口使用 PowerShell 7.5 或更高版本。
+   - PowerShell 脚本文件头应使用：
+     #!/usr/bin/env pwsh
+     #Requires -Version 7.5
+   - PowerShell 脚本必须启用：
+     Set-StrictMode -Version Latest
+     $ErrorActionPreference = "Stop"
+
+2. rsync 与 ssh
+   - 文件同步必须使用 Windows 上的 rsync.exe。
+   - rsync.exe 路径必须通过参数传入，并且必须是 Windows 绝对路径。
+   - ssh.exe 应使用 rsync.exe 同目录下的 ssh.exe。
+   - 不应依赖隐式 PATH 查找 rsync。
+   - 调用 rsync 时应显式指定远程 shell：
+     -e "<ssh.exe 绝对路径> -i <私钥路径> -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+   - Windows 路径传递给 rsync 前应转换为 /cygdrive/<drive>/... 格式。
+   - Windows 私钥路径、rsync.exe 路径、ssh.exe 路径均应解析为绝对路径后再使用。
+
+3. SSH 跳板机
+   - 脚本应支持 SSH 跳板机模式。
+   - 跳板机参数应集中定义在脚本参数区，包括：
+     - JumpHost
+     - JumpUser
+     - JumpPort
+     - JumpSshKeyPath
+     - EnableJumpHost
+   - ssh 与 rsync 的调用均应支持 ProxyJump 或 ProxyCommand。
+   - 跳板机私钥路径必须是 Windows 绝对路径。
+   - 当直连目标服务器网络质量较差时，应允许通过中间服务器转发连接。
+
+三、PowerShell 脚本设计规范
+
+1. 参数结构
+   PowerShell 脚本应使用 [CmdletBinding()] 与 param 块集中定义参数。
+
+   操作类型参数应包含：
+   - Action，允许值：
+     - sync
+     - build
+     - all
+     - clean
+   - 默认值为 all。
+
+   构建目标参数应包含：
+   - Target，允许值：
+     - linux-x86_64
+     - linux-aarch64
+     - all
+   - 默认值为 all。
+
+   构建模式参数应包含：
+   - BuildProfile，允许值：
+     - dev
+     - release
+   - 默认值为 dev。
+
+   远程服务器参数应包含：
+   - LinuxHostUser，必填。
+   - LinuxHostIp，必填。
+   - LinuxRemoteWorkspaceDir，必填，必须是 Linux 绝对路径。
+   - WindowsSshKeyPath，必填，必须是 Windows 绝对路径。
+   - WindowsRsyncExe，必填，必须是 Windows 绝对路径。
+
+   运行时主机参数应包含：
+   - RuntimeHostIp，支持字符串数组。
+   - 支持别名 RunnableHostIp 与 TargetHostIp。
+   - dev 模式下必须至少包含一个有效 IP 地址。
+   - 支持用逗号、分号、空白字符分隔多个 IP。
+   - 应去重并校验 IP 合法性。
+
+   输出目录参数应包含：
+   - OutputDir，默认值为 build/release。
+
+2. 默认排除项
+   rsync 同步时应默认排除以下目录：
+   - .git/
+   - .idea/
+   - .vscode/
+   - build/
+   - bin/
+
+3. 脚本应包含构建控制参数
+
+4. 应该区分dev 与 release 模式
+
+5. 路径与参数校验
+   脚本应实现并使用以下校验能力：
+   - 校验 Windows 路径是否为绝对路径。
+   - 校验 Linux 路径是否以 / 开头。
+   - 校验文件或目录是否存在。
+   - 校验 IP 地址格式。
+   - 对 IP 列表进行拆分、去空、去重、校验。
+   - 对传入 shell 的字符串进行单引号安全转义。
+
+6. 日志规范
+   脚本应提供统一日志函数。
+   日志格式应包含时间、级别和消息，例如：
+   [yyyy-MM-dd HH:mm:ss] [INFO] message
+   [yyyy-MM-dd HH:mm:ss] [WARN] message
+   [yyyy-MM-dd HH:mm:ss] [SUCCESS] message
+
+7. 外部命令调用
+   - 所有外部命令调用必须检查退出码。
+   - 执行失败时应抛出包含退出码的错误。
+   - 支持通过临时文件向远程 bash 标准输入传递脚本内容。
+   - 临时文件应使用 UTF-8 无 BOM 写入。
+   - 临时文件使用后必须清理。
+   - 输出执行命令日志时应打印可排查的命令与参数。
+
+8. Git 元信息
+   脚本应自动获取以下信息：
+   - 当前 Git 分支。
+   - 如果处于 detached 状态，则使用短 Commit。
+   - 最近 Git Tag；若不存在则使用 v0.0.0。
+   - 当前短 Commit；若获取失败则使用 unknown。
+
+9. 工作目录约定
+   - 应从当前脚本目录向上推导模块根目录与 workspace 根目录。
+   - workspace 根目录必须包含 go.work。
+   - 远程模块目录应由 LinuxRemoteWorkspaceDir 与模块名拼接生成。
+   - 远程构建前应进入远程模块目录。
+
+四、远程 Linux Shell 执行规范
+
+1. 远程脚本模板
+   远程 shell 脚本必须使用：
+   set -Eeuo pipefail
+
+   必须定义日志函数，格式示例：
+   log() {
+     printf '[%s] [REMOTE] %s\n' "$(date '+%F %T')" "$*"
+   }
+
+2. 远程执行方式
+   - PowerShell 应通过 ssh 执行：
+     bash -s --
+   - shell 内容通过标准输入传递。
+   - SSH 参数必须包含：
+     -i <私钥路径>
+     -o StrictHostKeyChecking=no
+     -o UserKnownHostsFile=/dev/null
+   - 启用跳板机时应附加 ProxyJump 或等价 ProxyCommand 配置。
+
+3. 远程目录准备
+   sync 或 all 执行前应远程创建工作目录：
+   mkdir -p <LinuxRemoteWorkspaceDir>
+
+4. 远程清理
+   clean 操作应删除远程工作目录：
+   rm -rf <LinuxRemoteWorkspaceDir>
+
+5. 文件同步
+   rsync 应使用以下基础参数：
+   -az
+   --delete
+   --force
+   --omit-dir-times
+   --no-perms
+   --no-owner
+   --no-group
+
+   同步源为本地 workspace 根目录。
+   同步目标为：
+   <LinuxHostUser>@<LinuxHostIp>:<LinuxRemoteWorkspaceDir>/
+
+6. 远程构建
+   远程构建前应进入远程模块目录。
+   如果远程目录存在 .git，可尝试切换到本地分支，失败不应中断构建。
+
+   构建命令应采用：
+   ./scripts/build-release.sh <Target> <OutputDir>
+
+五、Action 行为定义
+
+1. clean
+   - 删除远程工作目录。
+   - 成功后输出远端清理完成。
+
+2. sync
+   - 创建远程工作目录。
+   - 使用 rsync 同步本地 workspace 到远程工作目录。
+   - 成功后输出 rsync 同步完成。
+
+3. build
+   - 在远程模块目录执行构建脚本。
+   - 成功后输出远端构建完成。
+
+4. all
+   - 创建远程工作目录。
+   - 使用 rsync 同步本地 workspace 到远程工作目录。
+   - 执行远程构建。
+   - 成功后输出 rsync 同步 + 远端构建完成。
+
+六、输出要求
+
+当用户要求生成脚本、Dockerfile、docker-compose.yml、构建方案或排错方案时，你应：
+
+1. 直接给出完整、可执行、可维护的实现。
+2. 所有路径、可执行文件、私钥、远程目录必须通过参数配置。
+3. 不使用隐式相对路径调用关键工具。
+4. 不省略错误处理、参数校验、日志输出和退出码检查。
+5. 对 PowerShell 与 shell 的字符串转义进行安全处理。
+6. 对 Windows 到 rsync/cygwin 风格路径转换进行处理。
+7. 对 SSH 跳板机、国内网络加速、多架构构建、Go 构建参数进行完整覆盖。
+8. 如存在权限、安全、网络、架构、路径或构建模式风险，应主动说明并给出修正方案。
--- a/13-构建专家-SHELL/构建专家/构建内容.md
+++ b/13-构建专家-SHELL/构建专家/构建内容.md
@@ -0,0 +1,14 @@
+你是一名精通docker docker-compose  powershell和Linux的专家
+
+你精通Dockerfile的创建，不同CPU架构下的镜像构建，以及如何创建多架构镜像
+
+你精通Go的构建流程，能够处理解决构建过程中的各种问题
+
+如果目标服务器是中国大陆境内的服务器，你需要设置加速镜像
+
+你非常善于利用windows远程操作远程的Linux服务器，通过poweershell脚本触发远程服务器上的构建过程
+
+注意事项：
+1. 你应该使用windows上面的rsync.exe工具 ssh也是使用rsync自带的
+2. powershell和shell脚本都应该全路径才可以，都需要作为参数配置在脚本的前方，请参考附件中的写法
+3. 做好能够支持ssh跳板机的形式，因为日本的服务器直连比较糟糕，能够通过中间服务器进行跳转
--- a/31-雄安工作总结/0-基本要求-prompt.md
+++ b/31-雄安工作总结/0-基本要求-prompt.md
@@ -0,0 +1,15 @@
+你是一名优秀的产品经理及工作总结的专家
+
+你作为中国移动成都产业研究院于2025年6月派驻雄安为期一年的“专家”，领域是低空经济方向
+
+核心的工作总结要求如下，请你严格遵守：
+需聚焦个人交流期间（2025年6月至2026年5月）参与的重点工作，可参考维度如下：
+·个人工作概述：简要介绍本人职责分工、工作定位、承担的具体任务及重点工作等。
+·交流期间工作成效（此项为重点撰写内容）：坚持量化成果、写实实绩，量化或具象化呈现各项重点工作成果成效及创新性做法等；
+·个人成长与收获：总结交流期间积累的经验或在工作中形成的有效工作方法等。
+·问题与反思：分析工作中遇到的困难、对自身工作的反思等。
+
+
+请你参考我给出的工作总结汇报，重点是 领导发言V2.docx中的内容，你可以理解为在低空联合专班中，成研院的工作都可以算在我的工作量里面，具体量化请忽略
+
+
--- a/31-雄安工作总结/派驻雄安重点工作汇报20250830.docx
+++ b/31-雄安工作总结/派驻雄安重点工作汇报20250830.docx
--- a/31-雄安工作总结/雄安工作情况汇报-2025总结-王子文.docx
+++ b/31-雄安工作总结/雄安工作情况汇报-2025总结-王子文.docx
--- a/31-雄安工作总结/雄安工作情况汇报-三月报-王子文-2510-2602.docx
+++ b/31-雄安工作总结/雄安工作情况汇报-三月报-王子文-2510-2602.docx
--- a/31-雄安工作总结/领导发言V2.docx
+++ b/31-雄安工作总结/领导发言V2.docx
--- a/32-WDD-AI服务器/0-AI服务器初始化.md
+++ b/32-WDD-AI服务器/0-AI服务器初始化.md
@@ -0,0 +1,158 @@
+服务器的基本信息如下：
+
+root@wdd-ai-server:~# lscpu
+Architecture:            x86_64
+  CPU op-mode(s):        32-bit, 64-bit
+  Address sizes:         48 bits physical, 48 bits virtual
+  Byte Order:            Little Endian
+CPU(s):                  16
+  On-line CPU(s) list:   0-15
+Vendor ID:               AuthenticAMD
+  Model name:            AMD Ryzen 7 5700X 8-Core Processor
+    CPU family:          25
+    Model:               33
+    Thread(s) per core:  2
+    Core(s) per socket:  8
+    Socket(s):           1
+    Stepping:            2
+    Frequency boost:     enabled
+    CPU max MHz:         3400.0000
+    CPU min MHz:         2200.0000
+    BogoMIPS:            6800.35
+    Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse
+                         36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rd
+                         tscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aper
+                         fmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic
+                          movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapi
+                         c cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt
+                         tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_
+                         l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 av
+                         x2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb s
+                         ha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_to
+                         tal cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lb
+                         rv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists
+                         pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pk
+                         u ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
+Virtualization features:
+  Virtualization:        AMD-V
+Caches (sum of all):
+  L1d:                   256 KiB (8 instances)
+  L1i:                   256 KiB (8 instances)
+  L2:                    4 MiB (8 instances)
+  L3:                    32 MiB (1 instance)
+NUMA:
+  NUMA node(s):          1
+  NUMA node0 CPU(s):     0-15
+Vulnerabilities:
+  Gather data sampling:  Not affected
+  Itlb multihit:         Not affected
+  L1tf:                  Not affected
+  Mds:                   Not affected
+  Meltdown:              Not affected
+  Mmio stale data:       Not affected
+  Retbleed:              Not affected
+  Spec rstack overflow:  Mitigation; safe RET, no microcode
+  Spec store bypass:     Mitigation; Speculative Store Bypass disabled via prctl and seccomp
+  Spectre v1:            Mitigation; usercopy/swapgs barriers and __user pointer sanitization
+  Spectre v2:            Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP always-on, R
+                         SB filling, PBRSB-eIBRS Not affected
+  Srbds:                 Not affected
+  Tsx async abort:       Not affected
+root@wdd-ai-server:~# lsmem
+RANGE                                 SIZE  STATE REMOVABLE  BLOCK
+0x0000000000000000-0x00000000bfffffff   3G online       yes   0-23
+0x0000000100000000-0x000000043fffffff  13G online       yes 32-135
+
+Memory block size:       128M
+Total online memory:      16G
+Total offline memory:      0B
+root@wdd-ai-server:~# lspci
+00:00.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse Root Complex
+00:00.2 IOMMU: Advanced Micro Devices, Inc. [AMD] Starship/Matisse IOMMU
+00:01.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:01.1 PCI bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse GPP Bridge
+00:01.3 PCI bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse GPP Bridge
+00:02.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:03.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:03.1 PCI bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse GPP Bridge
+00:04.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:05.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:07.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:07.1 PCI bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse Internal PCIe GPP Bridge 0 to bus[E:B]
+00:08.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Host Bridge
+00:08.1 PCI bridge: Advanced Micro Devices, Inc. [AMD] Starship/Matisse Internal PCIe GPP Bridge 0 to bus[E:B]
+00:14.0 SMBus: Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller (rev 61)
+00:14.3 ISA bridge: Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge (rev 51)
+00:18.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 0
+00:18.1 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 1
+00:18.2 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 2
+00:18.3 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 3
+00:18.4 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 4
+00:18.5 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 5
+00:18.6 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 6
+00:18.7 Host bridge: Advanced Micro Devices, Inc. [AMD] Matisse/Vermeer Data Fabric: Device 18h; Function 7
+01:00.0 Non-Volatile memory controller: MAXIO Technology (Hangzhou) Ltd. NVMe SSD Controller MAP1001 (rev 01)
+03:00.0 USB controller: Advanced Micro Devices, Inc. [AMD] 400 Series Chipset USB 3.1 XHCI Controller (rev 01)
+03:00.1 SATA controller: Advanced Micro Devices, Inc. [AMD] 400 Series Chipset SATA Controller (rev 01)
+03:00.2 PCI bridge: Advanced Micro Devices, Inc. [AMD] 400 Series Chipset PCIe Bridge (rev 01)
+20:00.0 PCI bridge: Advanced Micro Devices, Inc. [AMD] 400 Series Chipset PCIe Port (rev 01)
+20:01.0 PCI bridge: Advanced Micro Devices, Inc. [AMD] 400 Series Chipset PCIe Port (rev 01)
+20:04.0 PCI bridge: Advanced Micro Devices, Inc. [AMD] 400 Series Chipset PCIe Port (rev 01)
+22:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller (rev 15)
+26:00.0 VGA compatible controller: NVIDIA Corporation GA104 [GeForce RTX 3070] (rev a1)
+26:00.1 Audio device: NVIDIA Corporation GA104 High Definition Audio Controller (rev a1)
+27:00.0 Non-Essential Instrumentation [1300]: Advanced Micro Devices, Inc. [AMD] Starship/Matisse PCIe Dummy Function
+28:00.0 Non-Essential Instrumentation [1300]: Advanced Micro Devices, Inc. [AMD] Starship/Matisse Reserved SPP
+28:00.1 Encryption controller: Advanced Micro Devices, Inc. [AMD] Starship/Matisse Cryptographic Coprocessor PSPCPP
+28:00.3 USB controller: Advanced Micro Devices, Inc. [AMD] Matisse USB 3.0 Host Controller
+28:00.4 Audio device: Advanced Micro Devices, Inc. [AMD] Starship/Matisse HD Audio Controller
+root@wdd-ai-server:~# cat /etc/os-release
+PRETTY_NAME="Ubuntu 22.04.4 LTS"
+NAME="Ubuntu"
+VERSION_ID="22.04"
+VERSION="22.04.4 LTS (Jammy Jellyfish)"
+VERSION_CODENAME=jammy
+ID=ubuntu
+ID_LIKE=debian
+HOME_URL="https://www.ubuntu.com/"
+SUPPORT_URL="https://help.ubuntu.com/"
+BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+UBUNTU_CODENAME=jammy
+root@wdd-ai-server:~# lsblk
+NAME                      MAJ:MIN RM   SIZE RO TYPE MOUNTPOINTS
+loop0                       7:0    0  63.9M  1 loop /snap/core20/2105
+loop1                       7:1    0    87M  1 loop /snap/lxd/27037
+loop2                       7:2    0  40.4M  1 loop /snap/snapd/20671
+nvme0n1                   259:0    0 476.9G  0 disk
+├─nvme0n1p1               259:1    0     1G  0 part /boot/efi
+├─nvme0n1p2               259:2    0     2G  0 part /boot
+└─nvme0n1p3               259:3    0 473.9G  0 part
+  └─ubuntu--vg-ubuntu--lv 253:0    0 473.9G  0 lvm  /
+root@wdd-ai-server:~# df -TH
+Filesystem                        Type   Size  Used Avail Use% Mounted on
+tmpfs                             tmpfs  1.7G  1.6M  1.7G   1% /run
+/dev/mapper/ubuntu--vg-ubuntu--lv ext4   500G  7.5G  467G   2% /
+tmpfs                             tmpfs  8.4G     0  8.4G   0% /dev/shm
+tmpfs                             tmpfs  5.3M     0  5.3M   0% /run/lock
+/dev/nvme0n1p2                    ext4   2.1G  137M  1.8G   8% /boot
+/dev/nvme0n1p1                    vfat   1.2G  6.4M  1.2G   1% /boot/efi
+tmpfs
+
+GPU显卡的具体型号为 Nvidia Geforce RTX 3070 已经被扩容到16GB的显存
+
+此服务器处于中国大陆境内，如果涉及到无法直连的情况，需要考虑通过局域网内的代理实现连接
+
+代理地址为 192.168.233.206:7899
+
+
+请你给出完整的操作方案，实现如下的内容
+
+请分阶段实现，每次只给出一个阶段的具体步骤，给出提示以后再输出第二段
+
+1. 需要实现nvdia驱动的安装，实现nvidia-smi的使用
+2. 需要使用vllm的方式运行 Qwen3.6-35B-A3B的模型，需要考虑当前物理服务器的限制，在16GB内存和16GB显存，运行尽可能高精度的模型
+3. 需要给出调用vllm模型的方式
+
+
+这张3070在windows环境经常出现黑屏闪屏的情况，需要nvidia控制面板设置最高性能模式才可以，现在linux环境下如何设置呢，我现在无法ssh上去了，看着像卡死了是否是同种原因造成的呢
--- a/32-WDD-AI服务器/1-nvidia驱动安装.md
+++ b/32-WDD-AI服务器/1-nvidia驱动安装.md
@@ -0,0 +1,119 @@
+## 国内源可用性分析
+
+NVIDIA 驱动在 Ubuntu 上有两个安装途径，国内加速能力差异较大：
+
+| 安装来源 | 能否用国内源 | 说明 |
+|---|---|---|
+| Ubuntu 官方 apt 源（`main/restricted`）中的 nvidia-driver | ✅ **完全可用** | 清华/阿里源均已同步，速度极快 |
+| `ppa:graphics-drivers/ppa`（Launchpad PPA） | ❌ **无法加速** | Launchpad PPA 没有国内镜像，只能走代理 |
+| NVIDIA 官方 `.run` 文件直接下载 | ⚠️ 部分可用 | nvidia.cn 可下，但驱动版本较老 |
+
+***
+
+## 推荐方案：完全走国内源（不依赖代理）
+
+Ubuntu 22.04 官方 `restricted` 仓库已内置较新的 nvidia-driver，换清华源后直接安装即可，**无需添加 PPA** ： [cnblogs](https://www.cnblogs.com/lshan/p/18902838)
+
+### 第一步：替换为清华 Ubuntu 镜像源
+
+```bash
+sudo cp /etc/apt/sources.list /etc/apt/sources.list.bak
+
+sudo tee /etc/apt/sources.list <<'EOF'
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-backports main restricted universe multiverse
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-security main restricted universe multiverse
+EOF
+
+sudo apt update
+```
+
+### 第二步：直接安装驱动（无需 PPA）
+
+```bash
+# 查看系统推荐的驱动版本
+ubuntu-drivers devices
+
+# 安装（推荐 550 或 535，清华源内有同步）
+sudo apt install -y nvidia-driver-570-server
+```
+
+
+
+
+CUDA 13.0 是目前最新版本，以下是对应你的 Ubuntu 22.04 + 驱动 580 的完整安装方式 ： [blog.csdn](https://blog.csdn.net/u014451778/article/details/150699513)
+
+***
+
+## 方法一：runfile 本地安装（推荐，无需 apt 源配置）
+
+这种方式只安装 toolkit，**不替换已有的 580 驱动**：
+
+```bash
+# 设置代理
+export http_proxy=http://192.168.233.206:7899
+export https_proxy=http://192.168.233.206:7899
+
+# 下载 CUDA 13.0 run 安装包（约 3GB）
+wget https://developer.download.nvidia.com/compute/cuda/13.0.1/local_installers/cuda_13.0.1_580.82.07_linux.run
+
+# 添加执行权限并安装（只选 toolkit，不安装驱动）
+chmod +x cuda_13.0.1_580.82.07_linux.run
+sudo ./cuda_13.0.1_580.82.07_linux.run --toolkit --silent --override
+```
+
+> 安装界面出现时，**取消勾选 Driver**，只保留 CUDA Toolkit 打勾，避免覆盖已安装的 580 驱动 。 [cloud.tencent](https://cloud.tencent.com/developer/article/2573227)
+
+***
+
+## 方法二：apt 源安装（需要代理）
+
+```bash
+export http_proxy=http://192.168.233.206:7899
+export https_proxy=http://192.168.233.206:7899
+
+# 添加 NVIDIA CUDA 源
+wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt update
+
+# 安装 CUDA 13.0 toolkit（不带驱动）
+sudo apt install -y cuda-toolkit-13-0
+```
+
+***
+
+## 第三步：配置环境变量（两种方法通用）
+
+```bash
+cat >> ~/.bashrc <<'EOF'
+export CUDA_HOME=/usr/local/cuda-13.0
+export PATH=/usr/local/cuda-13.0/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH
+EOF
+
+source ~/.bashrc
+```
+
+***
+
+## 第四步：验证安装
+
+```bash
+# 验证 CUDA 编译器版本
+nvcc --version
+# 应显示 release 13.0
+
+# 确认 libcufile 已存在
+ls /usr/local/cuda-13.0/lib64/libcufile.so*
+
+# 验证 vllm 是否正常
+conda activate vllm
+python -c "import torch; print(torch.cuda.is_available())"
+python -c "import vllm; print(vllm.__version__)"
+```
+
+***
+
+> **注意**：如果 runfile 安装时下载太慢，也可以先走上一条回复中的**备用方案**（pip 安装自带 CUDA 运行库的 torch），那个方案不需要单独安装系统级 CUDA Toolkit，速度更快。
--- a/32-WDD-AI服务器/2-nvidia-高性能模式.md
+++ b/32-WDD-AI服务器/2-nvidia-高性能模式.md
@@ -0,0 +1,114 @@
+**极大可能是同一个原因**，而且在无头（headless）服务器上这个问题比 Windows 更常见。以下是分析和完整解决方案：
+
+***
+
+## 原因分析
+
+Windows 下的"黑屏/闪屏需要设为最高性能"，根本原因是 NVIDIA 驱动的**电源管理自动降频**——显卡在空闲时降到低功率 P 状态，在被突然调用时来不及升频，导致信号丢失或卡死。Linux 上完全相同的机制叫做 **Persistence Mode（持久化模式）**，默认是关闭的 。 [reddit](https://www.reddit.com/r/nvidia/comments/lyh69k/linux_nvidiasmi_just_wondering_what_is_the_off/gpsoykv/)
+
+你现在 SSH 卡死，**很可能是**：
+1. 显卡在空闲时进入省电睡眠状态，驱动连接断开
+2. 由于服务器没有显示器，驱动没有持续活跃的理由，更容易触发
+3. 系统也可能因此 hang 住，导致 SSH 连接超时 [forum.livepeer](https://forum.livepeer.org/t/guide-run-headless-linux-server-with-nvidia-gpus/1879)
+
+***
+
+## 第一步：物理重启恢复 SSH
+
+当前 SSH 连不上，需要先让服务器恢复：
+
+- **直接按主机电源键短按**（触发正常关机），或长按强制重启
+- 重启后立即 SSH 进去，在系统空闲时执行下面的设置
+
+***
+
+## 第二步：开机后立即设置（SSH 进去后）
+
+```bash
+# 开启持久化模式（相当于 Windows 的"最高性能"）
+sudo nvidia-smi -pm 1
+
+# 验证是否生效（Persistence-M 应显示 On）
+nvidia-smi
+```
+
+但 `-pm 1` **重启后会失效**，需要永久配置。
+
+***
+
+## 第三步：永久开启持久化模式（开机自启）
+
+```bash
+# 启用 nvidia-persistenced 守护进程服务
+sudo systemctl enable nvidia-persistenced
+sudo systemctl start nvidia-persistenced
+
+# 验证服务状态
+sudo systemctl status nvidia-persistenced
+```
+
+如果服务起来后持久化仍然是 Off，编辑服务文件修复：
+
+```bash
+sudo nano /lib/systemd/system/nvidia-persistenced.service
+```
+
+找到这一行：
+```
+ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --no-persistence-mode --verbose
+```
+
+改为：
+```
+ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --persistence-mode --verbose
+```
+
+```bash
+sudo systemctl daemon-reload
+sudo systemctl restart nvidia-persistenced
+sudo nvidia-smi  # 确认 Persistence-M: On
+```
+
+***
+
+## 第四步：额外防护——禁用显卡自动省电
+
+对于 AI 推理服务器，建议还要锁定电源上限，防止动态调频 ： [wikipedia.jakami](https://wikipedia.jakami.de/content/askubuntu.com_en_all_2025-12/questions/1164934/how-to-turn-off-nvidia-gpu-on-a-headless-server)
+
+```bash
+# 设置最大性能模式（等同 Windows 控制面板"最高性能优先"）
+sudo nvidia-smi --auto-boost-default=0
+sudo nvidia-smi -ac 6251,1695   # RTX 3070 的 Memory,Graphics 最大频率
+
+# 写入开机启动（加到 /etc/rc.local 或 systemd 服务中）
+sudo tee /etc/systemd/system/nvidia-perf.service <<'EOF'
+[Unit]
+Description=NVIDIA Performance Mode
+After=nvidia-persistenced.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/bin/nvidia-smi -pm 1
+ExecStart=/usr/bin/nvidia-smi --auto-boost-default=0
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
+EOF
+
+sudo systemctl enable nvidia-perf.service
+sudo systemctl start nvidia-perf.service
+```
+
+***
+
+## 设置完成后的预期状态
+
+```
+nvidia-smi 输出中：
+Persistence-M: On     ← 持久化开启
+Power: 46W / 220W     ← 功率正常
+GPU-Util: 0%          ← 空闲但驱动不会断连
+```
+
+完成以上设置后服务器应当稳定，不再出现 SSH 卡死的情况，之后可以继续 vLLM 的部署。
--- a/32-WDD-AI服务器/2-vllm安装.md
+++ b/32-WDD-AI服务器/2-vllm安装.md
@@ -0,0 +1,214 @@
+好的，以下是完整的**阶段二：vLLM + Qwen3-30B-A3B 量化部署方案**。
+
+***
+
+# 阶段二：vLLM 部署 Qwen3-30B-A3B
+
+## 内存约束分析
+
+你的服务器限制：16GB 显存 + 16GB 内存。各量化版本的显存需求：
+
+| 量化格式 | 模型大小 | 显存需求（含KV Cache） | 能否单卡运行 |
+|---|---|---|---|
+| BF16 原版 | ~60GB | ~65GB | ❌ |
+| FP8 | ~30GB | ~32GB | ❌ |
+| INT8 (GPTQ-Int8) | ~30GB | ~32GB | ❌ |
+| **AWQ INT4** | **~16GB** | **~17-18GB** | ⚠️ 需调参 |
+| **W4A16 (GPTQ INT4)** | **~15GB** | **~16GB** | **✅ 推荐** |
+
+结论：**必须使用 INT4 量化版本**，且需要配合 `--cpu-offload-gb` 将少量溢出部分卸载到内存 。 [huggingface](https://huggingface.co/Qwen/Qwen3-30B-A3B/discussions/12)
+
+***
+
+## 第一步：安装 Miniconda 环境
+
+```bash
+# 下载 Miniconda（走代理）
+export http_proxy=http://192.168.233.206:7899
+export https_proxy=http://192.168.233.206:7899
+
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
+bash /tmp/miniconda.sh -b -p /opt/miniconda3
+
+
+conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
+conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
+
+# 初始化 bash
+/opt/miniconda3/bin/conda init bash
+
+echo 'export PATH="/opt/miniconda3/bin:$PATH"' >> ~/.bashrc
+source ~/.bashrc
+
+
+# 3. 切换为清华 conda 源（避免后续因访问 Anaconda 官方源速度慢）
+conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
+conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
+conda config --set show_channel_urls yes
+
+# 创建 Python 3.11 环境（配合 CUDA 13.0）
+conda create -n vllm python=3.11 -y
+conda activate vllm
+```
+
+***
+
+## 第二步：安装 vLLM（清华 pip 源）
+
+CUDA 13.0 对应的 Driver 580，直接使用最新 vLLM 即可 ： [vllm.hyper](https://vllm.hyper.ai/docs/0.8.x/getting-started/installation/)
+
+```bash
+# 配置清华 pip 镜像（永久生效）
+pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+
+# 安装 vLLM（清华源，无需代理）
+pip install vllm
+
+# 验证安装
+python -c "import vllm; print(vllm.__version__)"
+```
+
+***
+
+## 第三步：安装 ModelScope 并下载模型
+
+国内使用 ModelScope 下载，速度远比 HuggingFace 快 ： [blog.csdn](https://blog.csdn.net/xiezhipu/article/details/145956308)
+
+```bash
+pip install modelscope
+
+# 下载 AWQ INT4 量化版（ModelScope 国内源，约 16GB）
+python - <<'EOF'
+from modelscope import snapshot_download
+model_dir = snapshot_download(
+    'swift/Qwen3-30B-A3B-AWQ',
+    cache_dir='/data/models'
+)
+print(f"模型已下载至: {model_dir}")
+EOF
+```
+
+> 模型存储路径约 16GB，确保 `/data/models` 所在分区空间充足（你的 `/` 挂载点还有 467GB 可用）。
+
+***
+
+## 第四步：启动 vLLM 服务
+
+由于 AWQ INT4 模型加载后约占 17GB，略超 16GB 显存，需要将约 **2GB 溢出卸载到内存（CPU Offload）** ： [blog.csdn](https://blog.csdn.net/qq_43692950/article/details/150066140)
+
+```bash
+conda activate vllm
+
+# 关键参数说明见下方
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
+VLLM_USE_MODELSCOPE=True \
+nohup vllm serve swift/Qwen3-30B-A3B-AWQ \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --dtype float16 \
+  --gpu-memory-utilization 0.88 \
+  --max-model-len 8192 \
+  --cpu-offload-gb 2 \
+  --max-num-seqs 4 \
+  --enable-prefix-caching \
+  --served-model-name Qwen3-30B-A3B \
+  --trust-remote-code > /var/log/vllm.log 2>&1 &
+
+
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+export  VLLM_USE_MODELSCOPE=True 
+
+vllm serve swift/Qwen3-30B-A3B-AWQ \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --dtype float16 \
+  --gpu-memory-utilization 0.90 \
+  --max-model-len 4096 \
+  --max-num-seqs 2 \
+  --cpu-offload-gb 2 \
+  --enforce-eager \
+  --disable-log-stats \
+  --served-model-name Qwen3-30B-A3B \
+  --trust-remote-code
+
+echo "vLLM 已在后台启动，查看日志: tail -f /var/log/vllm.log"
+```
+
+### 参数说明
+
+| 参数 | 值 | 原因 |
+|---|---|---|
+| `--gpu-memory-utilization` | `0.92` | 充分利用 16GB 显存 |
+| `--max-model-len` | `8192` | 限制上下文长度以节省 KV Cache 显存 |
+| `--cpu-offload-gb` | `2` | 将 2GB 模型权重卸载到 16GB 内存（速度略降）|
+| `--max-num-seqs` | `4` | 低并发限制，节省显存 |
+| `PYTORCH_CUDA_ALLOC_CONF` | `expandable_segments:True` | 防止显存碎片化 OOM |
+
+***
+
+## 第五步：验证服务是否启动成功
+
+```bash
+# 等待约 60-120 秒模型加载完毕
+tail -f /var/log/vllm.log
+
+# 看到如下日志说明启动成功：
+# INFO: Uvicorn running on http://0.0.0.0:8000
+
+# 查看已加载模型
+curl http://localhost:8000/v1/models
+```
+
+***
+
+## 若显存仍然不足的备用方案
+
+```bash
+# 进一步降低上下文长度和并发
+vllm serve swift/Qwen3-30B-A3B-AWQ \
+  --max-model-len 4096 \
+  --cpu-offload-gb 4 \
+  --max-num-seqs 2 \
+  --gpu-memory-utilization 0.90 \
+  ...（其余参数同上）
+```
+
+***
+
+✅ **确认 `curl http://localhost:8000/v1/models` 返回模型信息后**，告诉我，我将给出**阶段三：完整的 API 调用方式**（OpenAI 兼容接口、Python SDK、流式输出等）。
+
+
+
+# 1. 彻底删除旧环境
+conda deactivate
+conda env remove -n vllm -y
+
+# 2. 创建新环境
+conda create -n vllm python=3.11 -y
+conda activate vllm
+
+# 3. 设置清华源
+pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+
+# 5. 安装 torch（走代理）
+export http_proxy=http://192.168.233.206:7899
+export https_proxy=http://192.168.233.206:7899
+pip install torch torchvision torchaudio \
+    --index-url https://download.pytorch.org/whl/cu128 \
+    --no-cache-dir
+
+# 6. 安装 vllm 和 modelscope（关闭代理走清华源）
+unset http_proxy https_proxy
+pip install vllm modelscope --no-cache-dir
+
+# 7. 一次性验证
+python -c "
+import torch
+print('CUDA:', torch.cuda.is_available())
+print('GPU:', torch.cuda.get_device_name(0))
+import vllm
+print('vllm:', vllm.__version__)
+import modelscope
+print('modelscope:', modelscope.__version__)
+print('ALL OK')
+"
--- a/32-WDD-AI服务器/3-优化模型启动参数.md
+++ b/32-WDD-AI服务器/3-优化模型启动参数.md
@@ -0,0 +1,91 @@
+CPU为5800X 内存为16GB 3200Mhz Nvidia Geforce RTX 3070 已经被扩容到16GB的显存
+
+现在已经安装 Nvidia Driver 13.0 CUDA-Tookit 13.0
+
+运行的模型为 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True VLLM_USE_MODELSCOPE=True 
+swift/Qwen3-30B-A3B-AWQ
+
+已经安装了mini conda
+
+你需要给出针对性编译llama.cpp支持GPU的版本，极致的利用主机的资源，提升大模型的速度，提升大模型的上下文
+
+具体的模型及运行命令如下
+~/llama.cpp/build/bin/llama-server \
+  -m /root/models/Qwen3-30B-A3B-GGUF/Qwen3-30B-A3B-Q4_K_M.gguf \
+  --host 0.0.0.0 \
+  --port 8000 \
+  -ngl 99 \
+  -c 8192 \
+  -t 8 \
+  --flash-attn \
+  -b 512 \
+  --no-mmap
+
+
+conda activate vllm
+
+# 1. 检查驱动和 CUDA 版本
+nvidia-smi
+
+# 2. 检查 nvcc 实际指向的版本
+nvcc --version
+
+# 3. 检查 PyTorch 编译时的 CUDA 版本
+python -c "import torch; print('PyTorch:', torch.__version__); print('PyTorch CUDA:', torch.version.cuda); print('GPU可用:', torch.cuda.is_available())"
+
+
+export CUDA_VISIBLE_DEVICES=0
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+export VLLM_USE_MODELSCOPE=True
+export VLLM_WORKER_MULTIPROC_METHOD=spawn
+export VLLM_ATTENTION_BACKEND=FLASHINFER
+export VLLM_USE_FLASHINFER_MOE_FP16=1          # ← 新增：FlashInfer MoE FP16 内核加速
+export OMP_NUM_THREADS=8
+export TOKENIZERS_PARALLELISM=false
+
+vllm serve swift/Qwen3-30B-A3B-AWQ \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --dtype float16 \
+  --quantization awq_marlin \
+  --gpu-memory-utilization 0.90 \
+  --max-model-len 8192 \
+  --max-num-seqs 1 \
+  --max-num-batched-tokens 1024 \
+  --num-gpu-blocks-override 800 \
+  --cpu-offload-gb 2 \
+  --kv-cache-dtype fp8 \
+  --enforce-eager \
+  --disable-log-stats \
+  --served-model-name Qwen3-30B-A3B \
+  --trust-remote-code
+
+
+sudo nvidia-smi -pm 1
+
+
+export CUDA_VISIBLE_DEVICES=0
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+export VLLM_USE_MODELSCOPE=True
+export VLLM_WORKER_MULTIPROC_METHOD=spawn
+export VLLM_ATTENTION_BACKEND=FLASHINFER
+export VLLM_CPU_KVCACHE_SPACE=8          # ← 正确方式：8GB RAM 作为 CPU KV cache
+export OMP_NUM_THREADS=8
+export TOKENIZERS_PARALLELISM=false
+
+vllm serve swift/Qwen3-30B-A3B-AWQ \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --dtype float16 \
+  --quantization awq_marlin \
+  --gpu-memory-utilization 0.90 \
+  --max-model-len 16384 \
+  --max-num-seqs 1 \
+  --max-num-batched-tokens 512 \
+  --num-gpu-blocks-override 300 \
+  --cpu-offload-gb 2 \
+  --kv-cache-dtype fp8 \
+  --enforce-eager \
+  --disable-log-stats \
+  --served-model-name Qwen3-30B-A3B \
+  --trust-remote-code
--- a/32-WDD-AI服务器/4-nvidia-监控.md
+++ b/32-WDD-AI服务器/4-nvidia-监控.md
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# 打印表头
+printf "%-26s | %6s | %6s | %12s | %7s | %6s | %6s | %14s\n" \
+  "时间戳" "GPU%" "显存%" "显存用/总MiB" "功耗W" "温度°C" "CPU%" "RAM用/总GiB"
+printf '%s\n' "$(printf '%.0s-' {1..100})"
+
+# 保存上次 CPU 计数
+prev_total=0; prev_used=0
+
+while true; do
+  # ── GPU 数据 ──────────────────────────────────────────────
+  IFS=',' read -r ts gpu_util mem_used mem_total pwr temp < <(
+    nvidia-smi \
+      --query-gpu=timestamp,utilization.gpu,memory.used,memory.total,power.draw,temperature.gpu \
+      --format=csv,noheader,nounits \
+      --id=0 2>/dev/null | head -1
+  )
+  # 去除首尾空格
+  ts=$(echo "$ts" | xargs)
+  gpu_util=$(echo "$gpu_util" | xargs)
+  mem_util=$(awk "BEGIN {printf \"%.1f\", $mem_used/$mem_total*100}")
+  pwr=$(echo "$pwr" | xargs)
+  temp=$(echo "$temp" | xargs)
+
+  # ── CPU 使用率（差值法）──────────────────────────────────
+  read -ra c < <(grep '^cpu ' /proc/stat)
+  total=$(( c[1]+c[2]+c[3]+c[4]+c[5]+c[6]+c[7] ))
+  used=$(( total - c[4] - c[5] ))
+  if (( prev_total > 0 )); then
+    cpu_pct=$(( (used - prev_used) * 100 / (total - prev_total) ))
+  else
+    cpu_pct=0
+  fi
+  prev_total=$total; prev_used=$used
+
+  # ── RAM 使用率 ────────────────────────────────────────────
+  mem_total_kb=$(grep '^MemTotal' /proc/meminfo | awk '{print $2}')
+  mem_avail_kb=$(grep '^MemAvailable' /proc/meminfo | awk '{print $2}')
+  mem_used_gib=$(awk "BEGIN {printf \"%.2f\", ($mem_total_kb - $mem_avail_kb)/1024/1024}")
+  mem_total_gib=$(awk "BEGIN {printf \"%.2f\", $mem_total_kb/1024/1024}")
+
+  # ── 输出一行 ──────────────────────────────────────────────
+  printf "%-26s | %5s%% | %5s%% | %5s / %-5s | %6s | %5s  | %5d%% | %5s / %s\n" \
+    "$ts" "$gpu_util" "$mem_util" \
+    "$mem_used" "$mem_total" \
+    "$pwr" "$temp" "$cpu_pct" \
+    "$mem_used_gib" "$mem_total_gib"
+
+  sleep 1
+done
--- a/32-WDD-AI服务器/5-llama-cpp运行方式.md
+++ b/32-WDD-AI服务器/5-llama-cpp运行方式.md
@@ -0,0 +1,85 @@
+这是非常关键的一个决策问题，现在有真实 benchmark 数据可以精准回答。
+
+***
+
+## 速度对比：vllm vs llama.cpp（你的硬件）
+
+基于真实测试数据 ： [github](https://github.com/ggml-org/llama.cpp/discussions/15180)
+
+| 指标 | vllm（当前） | llama.cpp（切换后） | 差距 |
+|---|---|---|---|
+| **单请求 decode 速度** | ~18 tok/s | **~20-28 tok/s** | llama.cpp **反而更快** ✅ |
+| **最大上下文** | 8192（极限） | 32768+ | llama.cpp 碾压 ✅ |
+| **显存占用** | 15403 MiB（94%） | ~13000 MiB | llama.cpp 更省 ✅ |
+| **高并发吞吐** | 更好 | 较差 | vllm 优势（你用不到）|
+| **启动时间** | ~60s | ~10s | llama.cpp 更快 ✅ |
+
+**关键数据来源**：一位 RTX 3070 笔记本用户（8GB VRAM）用 ollama（llama.cpp 封装）跑 Qwen3-30B MoE Q4，跑出了 **24 tok/s** 。你的是 16GB 版，可以装更多层到 GPU，速度会更快，**预计 30-40 tok/s**。 [reddit](https://www.reddit.com/r/ollama/comments/1l1tjbb/i_optimized_qwen330b_moe_to_run_on_my_rtx_3070/)
+
+llama.cpp 单请求场景与 vllm 几乎持平，差距在 0-6% 以内 。vllm 的优势只在**高并发**（多用户同时请求）时才体现，你是个人单用户场景，这个优势完全用不到。 [github](https://github.com/ggml-org/llama.cpp/discussions/15180)
+
+***
+
+## llama.cpp 快速部署
+
+### 第一步：下载 GGUF 模型
+
+```bash
+# 从 ModelScope 下载 Q4_K_M 版本（推荐，质量与速度最佳平衡）
+pip install modelscope
+
+python -c "
+from modelscope import snapshot_download
+
+# 方案一：lmstudio-community（bartowski量化，推荐）
+snapshot_download(
+    'lmstudio-community/Qwen3-30B-A3B-GGUF',
+    allow_file_pattern=['*Q4_K_M*'],
+    local_dir='/root/models/Qwen3-30B-A3B-GGUF'
+)
+"
+```
+
+### 第二步：安装 llama.cpp（CUDA 版）
+
+```bash
+pip install llama-cpp-python \
+  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu128
+```
+
+或直接用预编译二进制：
+```bash
+# 下载官方 CUDA release
+wget https://github.com/ggml-org/llama.cpp/releases/latest/download/llama-linux-cuda-cu12.0-x64.zip
+unzip llama-linux-cuda-cu12.0-x64.zip -d ~/llama.cpp/
+```
+
+### 第三步：启动 OpenAI 兼容服务
+
+```bash
+~/llama.cpp/llama-server \
+  -m /root/models/Qwen3-30B-A3B-GGUF/Qwen3-30B-A3B-Q4_K_M.gguf \
+  --host 0.0.0.0 \
+  --port 8000 \
+  -ngl 99 \
+  -c 32768 \
+  -n -1 \
+  -t 8 \
+  --parallel 1 \
+  --flash-attn \
+  -b 512
+```
+
+| 参数 | 含义 |
+|---|---|
+| `-ngl 99` | 全部层加载到 GPU |
+| `-c 32768` | 32K 上下文（RAM 按需分配）|
+| `--flash-attn` | 启用 FlashAttention，Ampere 原生支持 |
+| `-t 8` | 使用 5800X 全部 8 核心 |
+| `-b 512` | batch size，平衡速度与延迟 |
+
+***
+
+## 一句话总结
+
+> 你的场景（单用户、需要长上下文、16GB 单卡）**llama.cpp 全面优于 vllm**：不仅不会更慢，反而更快 ~10-50%，且上下文长度从 8K 直接升到 32K，显存占用更低，部署更简单。 [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1kdsp4z/qwen_3_performance_quick_benchmarks_across/)
--- a/32-WDD-AI服务器/6-llama-cpp编译安装.md
+++ b/32-WDD-AI服务器/6-llama-cpp编译安装.md
@@ -0,0 +1,129 @@
+基于你的硬件配置（5800X + 16GB RAM + RTX 3070 扩容到 16GB 显存 + CUDA 13.0），下面是完整的针对性编译和运行方案。
+
+***
+
+## 第一步：环境准备
+
+在编译前，确认 CUDA 环境变量已正确配置：
+
+```bash
+export PATH=/usr/local/cuda/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+
+# 验证
+nvcc --version
+nvidia-smi
+```
+
+安装编译依赖：
+
+```bash
+sudo apt update
+sudo apt install -y build-essential git cmake gcc g++ libcurl4-openssl-dev
+```
+
+***
+
+## 第二步：针对性编译命令
+
+RTX 3070 的 Compute Capability 为 **8.6**（sm_86），要锁定这个架构以避免编译浪费并获得最优内核 。 [raw.githubusercontent](https://raw.githubusercontent.com/ggml-org/llama.cpp/master/docs/build.md)
+
+```bash
+cd ~/llama.cpp
+
+# 清理旧构建
+rm -rf build
+
+# 针对 RTX 3070 的极致优化编译
+cmake -B build \
+  -DGGML_CUDA=ON \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_CUDA_ARCHITECTURES="86" \
+  -DGGML_CUDA_F16=ON \
+  -DGGML_CUDA_FA_ALL_QUANTS=ON \
+  -DGGML_NATIVE=ON \
+  -DCMAKE_C_COMPILER=gcc \
+  -DCMAKE_CXX_COMPILER=g++ \
+  -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc
+
+# 并行编译（5800X 16核全开）
+cmake --build build --config Release -j$(nproc)
+```
+
+**各编译参数说明：**
+
+| 参数 | 作用 |
+|---|---|
+| `-DCMAKE_CUDA_ARCHITECTURES="86"` | 精准锁定 RTX 3070 架构，减少编译体积，避免 nvcc 警告  [raw.githubusercontent](https://raw.githubusercontent.com/ggml-org/llama.cpp/master/docs/build.md) |
+| `-DGGML_CUDA_F16=ON` | 启用半精度 FP16 加速 dequantization + mul mat 核，在 30 系显卡上明显提速  [raw.githubusercontent](https://raw.githubusercontent.com/ggml-org/llama.cpp/master/docs/build.md) |
+| `-DGGML_CUDA_FA_ALL_QUANTS=ON` | 编译所有 KV cache 量化类型的 FlashAttention CUDA 内核，配合 `--ctk/--ctv` 大幅节省显存扩展上下文  [raw.githubusercontent](https://raw.githubusercontent.com/ggml-org/llama.cpp/master/docs/build.md) |
+| `-DGGML_NATIVE=ON` | 针对本机 CPU（5800X Zen3）生成最优指令集（AVX2/FMA） |
+
+***
+
+## 第三步：验证 GPU 编译成功
+
+```bash
+~/llama.cpp/build/bin/llama-server --list-devices
+# 应看到 CUDA0: NVIDIA GeForce RTX 3070
+```
+
+***
+
+## 第四步：极致优化运行命令
+
+你的 RTX 3070 已扩容到 **16GB 显存**，Qwen3-30B-A3B Q4_K_M 约 18GB，可以做到几乎全量 GPU 卸载配合少量 CPU offload 。 [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1kwdpey/best_settings_for_running_qwen330ba3b_with/)
+
+```bash
+
+GGML_CUDA_DISABLE_GRAPHS=1 \
+~/llama.cpp/build/bin/llama-server \
+  -m /root/models/Qwen3-30B-A3B-GGUF/Qwen3-30B-A3B-Q4_K_M.gguf \
+  --host 0.0.0.0 \
+  --port 8000 \
+  -ngl 40 \
+  -c 32768 \
+  -t 8 \
+  --flash-attn on \
+  -b 768 \
+  -ub 256 \
+  --mlock \
+  -ctk q8_0 \
+  -ctv q8_0 \
+  --parallel 1 \
+  --temp 0.6 \
+  --top-k 20 \
+  --top-p 0.95 \
+  --jinja \
+  -a "Qwen3-30B-A3B"
+```
+
+**关键参数变更说明：**
+
+| 参数 | 原值 → 新值 | 原因 |
+|---|---|---|
+| `-c` | 8192 → **32768** | 16GB 显存 + `--ctk/ctv q8_0` KV cache 量化后可支持大上下文，Flash Attention 让长上下文 prompt 处理速度翻倍以上  [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1l4xiwg/whats_the_case_against_flash_attention/) |
+| `--mlock` | 无 → **加入** | 将模型锁定在内存，防止系统换页，降低推理延迟  [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1kwdpey/best_settings_for_running_qwen330ba3b_with/) |
+| `--ctk q8_0 --ctv q8_0` | 无 → **加入** | KV cache 量化到 int8，节省约 50% KV 显存，换取更长上下文，对输出质量影响极小  [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1l4xiwg/whats_the_case_against_flash_attention/) |
+| `-ub 256` | 无 → **加入** | Micro-batch 大小，针对单 GPU 单并发优化 token 生成速度 |
+| `-b 512` | 512 → **保留** | Batch size 对 prompt 处理速度有显著影响，512 适合 16GB 显存 |
+
+***
+
+## 第五步：内存/显存不足时的回退策略
+
+如果模型 18GB > 16GB 显存导致 OOM，启用 **Unified Memory** 让系统 RAM 兜底（Linux 下）：
+
+```bash
+export GGML_CUDA_ENABLE_UNIFIED_MEMORY=1
+```
+
+这允许显存溢出时自动 swap 到 16GB 系统内存，而不是直接崩溃 。同时可以适当降低 `-ngl` 值（如 `-ngl 60`）手动将部分层卸载到 CPU，由 5800X 的 8 核 16 线程承接。 [raw.githubusercontent](https://raw.githubusercontent.com/ggml-org/llama.cpp/master/docs/build.md)
+
+***
+
+## 性能预期
+
+- **Token 生成速度**：编译优化 + F16 + Flash Attention，RTX 3070 上 Qwen3-30B Q4_K_M 预计可达 **15~25 tok/s** [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1kwdpey/best_settings_for_running_qwen330ba3b_with/)
+- **上下文**：KV cache q8_0 量化后，16GB 显存可稳定支持 **16K~32K context**
+- **Prompt 处理**：Flash Attention 对长 prompt 处理速度提升 2x+ [reddit](https://www.reddit.com/r/LocalLLaMA/comments/1l4xiwg/whats_the_case_against_flash_attention/)
Author	SHA1	Message	Date
zeaslity	978135e3a1	雄安工作总结；WDD-AI服务器	2026-06-08 09:28:55 +08:00
zeaslity	a496b1b878	构建内容prompt	2026-05-27 17:44:02 +08:00