From 3a36fc34fe55cf75edc3c9a76e85e42a065abca5 Mon Sep 17 00:00:00 2001
From: root <15750543867@163.com>
Date: Thu, 16 Apr 2026 23:09:00 +0800
Subject: [PATCH 1/2] refactor deepspeed doc
---
.gitmodules | 3 ++
Makefile | 3 +-
_repos/deepspeed | 1 +
index.rst | 4 +-
sources/deepspeed/index.rst | 1 -
sources/deepspeed/install.rst | 73 -------------------------------
sources/deepspeed/quick_start.rst | 34 --------------
7 files changed, 8 insertions(+), 111 deletions(-)
create mode 160000 _repos/deepspeed
delete mode 100644 sources/deepspeed/install.rst
delete mode 100644 sources/deepspeed/quick_start.rst
diff --git a/.gitmodules b/.gitmodules
index e2cc60ff..876aa3d3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -19,3 +19,6 @@
path = _repos/triton-ascend
url = https://gitcode.com/Ascend/triton-ascend.git
branch = main
+[submodule "_repos/deepspeed"]
+ path = _repos/deepspeed
+ url = https://github.com/deepspeedai/DeepSpeed.git
diff --git a/Makefile b/Makefile
index 2e550ed4..433a7b8f 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,8 @@ PROJECT_CONFIGS = \
_repos/LLaMA-Factory/docs:sources/LLaMA-Factory \
_repos/ms-swift/docs:sources/ms-swift \
_repos/vllm-ascend/docs/source:sources/vllm-ascend \
- _repos/triton-ascend/docs/zh:sources/triton-ascend
+ _repos/triton-ascend/docs/zh:sources/triton-ascend \
+ _repos/deepspeed/docs/_tutorials/accelerate-setup-guide.md:sources/deepspeed/quick_start.rst \
# Configure all subprojects generated path
GENERATED_DOCS := sources/_generated
diff --git a/_repos/deepspeed b/_repos/deepspeed
new file mode 160000
index 00000000..dc0fd295
--- /dev/null
+++ b/_repos/deepspeed
@@ -0,0 +1 @@
+Subproject commit dc0fd2950b4cb0234ca36fa936e8d2a659b3ca04
diff --git a/index.rst b/index.rst
index fffc707a..84d21312 100644
--- a/index.rst
+++ b/index.rst
@@ -123,8 +123,8 @@
-
适用于 Pytorch 的多 GPUs/NPUs 训练工具链。
-
+
DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.
+
diff --git a/sources/deepspeed/index.rst b/sources/deepspeed/index.rst
index 29bff2a2..4b9f533a 100644
--- a/sources/deepspeed/index.rst
+++ b/sources/deepspeed/index.rst
@@ -4,5 +4,4 @@ DeepSpeed
.. toctree::
:maxdepth: 2
- install.rst
quick_start.rst
diff --git a/sources/deepspeed/install.rst b/sources/deepspeed/install.rst
deleted file mode 100644
index 54ed9b08..00000000
--- a/sources/deepspeed/install.rst
+++ /dev/null
@@ -1,73 +0,0 @@
-安装指南
-==============
-
-.. note::
- 在本示例之前,请确保已经安装了 `昇腾环境 <../ascend/quick_install.html>`_ 和 `PyTorch <../pytorch/install.html>`_ 环境。
-
-1. 安装DeepSpeed
------------------
-安装DeepSpeed最简单的方式是通过 ``pip`` 。
-
-.. code-block:: shell
- :linenos:
-
- pip install deepspeed
-
-
-2. 通过源码安装
-------------------
-从 `GitHub `_ 克隆DeepSpeed项目后,可以通过 ``pip`` 来通过源码编译。
-
-.. code-block:: shell
- :linenos:
-
- pip install .
-
-
-3. 预编译DeepSpeed算子(可选)
-----------------------------------
-如果不想使用JIT编译模式,而想要预编译DeepSpeed算子,可以通过设置环境变量的方式完成算子的预编译。
-
-.. code-block:: shell
- :linenos:
-
- DS_BUILD_OPS=1 pip install deepspeed
-
-4. 安装验证
------------
-
-安装完成后,可以通过 ``ds_report`` 命令查看安装结果
-
-.. code-block:: shell
- :linenos:
-
- --------------------------------------------------
- DeepSpeed C++/CUDA extension op report
- --------------------------------------------------
- NOTE: Ops not installed will be just-in-time (JIT) compiled at
- runtime if needed. Op compatibility means that your system
- meet the required dependencies to JIT install the op.
- --------------------------------------------------
- JIT compiled ops requires ninja
- ninja .................. [OKAY]
- --------------------------------------------------
- op name ................ installed .. compatible
- --------------------------------------------------
- deepspeed_not_implemented [NO] ....... [OKAY]
- async_io ............... [NO] ....... [OKAY]
- cpu_adagrad ............ [NO] ....... [OKAY]
- cpu_adam ............... [NO] ....... [OKAY]
- cpu_lion ............... [NO] ....... [OKAY]
- fused_adam ............. [NO] ....... [OKAY]
- transformer_inference .. [NO] ....... [OKAY]
- --------------------------------------------------
- DeepSpeed general environment info:
- torch install path ............... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/torch']
- torch version .................... 2.2.0
- deepspeed install path ........... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/deepspeed']
- deepspeed info ................... 0.14.4, unknown, unknown
- deepspeed wheel compiled w. ...... torch 2.2
- torch_npu install path ........... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/torch_npu']
- torch_npu version ................ 2.2.0
- ascend_cann version .............. 8.0.RC2.alpha002
- shared memory (/dev/shm) size .... 20.00 GB
diff --git a/sources/deepspeed/quick_start.rst b/sources/deepspeed/quick_start.rst
deleted file mode 100644
index 00baae35..00000000
--- a/sources/deepspeed/quick_start.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-快速开始
-==========
-
-.. note::
- 在本示例之前,请确保已经安装了 `DeepSpeed <./install.html>`_ 环境。 如果还未安装,可以执行 ``pip install deepspeed`` 完成安装。
-
-
-1. 使用DeepSpeed多卡并行训练
--------------------------------
-以下代码使用了cifar10数据集,使用DeepSpeed训练模型在多张NPU卡上进行模型训练(来自 `DeepSpeed Examples `_),自DeepSpeed v0.12.6之后,代码无需任何修改,即可自动检测NPU并进行训练。
-
-.. rli:: https://raw.githubusercontent.com/microsoft/DeepSpeedExamples/master/training/cifar/cifar10_deepspeed.py
- :language: python
- :linenos:
-
-2. 训练结果查看
-----------------
-训练完成后,会打印模型对图像识别的结果。
-
-.. code-block:: shell
- :linenos:
-
- Finished Training
- Accuracy of the network on the 10000 test images: 57 %
- Accuracy of plane : 65 %
- Accuracy of car : 67 %
- Accuracy of bird : 52 %
- Accuracy of cat : 34 %
- Accuracy of deer : 52 %
- Accuracy of dog : 49 %
- Accuracy of frog : 59 %
- Accuracy of horse : 66 %
- Accuracy of ship : 66 %
- Accuracy of truck : 56 %
From e06bc109ed7be6e0fa6ad9c4ed96f30759d8f7cf Mon Sep 17 00:00:00 2001
From: root <15750543867@163.com>
Date: Fri, 17 Apr 2026 09:59:54 +0800
Subject: [PATCH 2/2] add single file copy feature
---
Makefile | 37 ++++++++++++++++++++++++++++---------
conf.py | 4 +++-
index.rst | 10 +++++-----
sources/deepspeed/index.rst | 2 +-
4 files changed, 37 insertions(+), 16 deletions(-)
diff --git a/Makefile b/Makefile
index 433a7b8f..3e4a3c22 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,8 @@
# You can set these variables from the command line, and also
# from the environment for the first two.
-SPHINXOPTS ?=
+# Default -j 1: parallel sphinx workers multiply RSS; cgroup limits (e.g. 2Gi) often OOM-kill (exit 137) without this.
+SPHINXOPTS ?= -j 1
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
@@ -16,7 +17,7 @@ PROJECT_CONFIGS = \
_repos/ms-swift/docs:sources/ms-swift \
_repos/vllm-ascend/docs/source:sources/vllm-ascend \
_repos/triton-ascend/docs/zh:sources/triton-ascend \
- _repos/deepspeed/docs/_tutorials/accelerate-setup-guide.md:sources/deepspeed/quick_start.rst \
+ _repos/deepspeed/docs/_tutorials/accelerator-setup-guide.md:sources/deepspeed/quick_start.md \
# Configure all subprojects generated path
GENERATED_DOCS := sources/_generated
@@ -64,7 +65,15 @@ sync-onnxruntime-doc:
# Initialize submodules (always run to handle empty dirs left by git clone)
init-submodules:
@git submodule sync --recursive
- @git submodule update --init --remote
+ @n=0; \
+ while [ $$n -lt 3 ]; do \
+ git submodule update --init --remote && exit 0; \
+ n=$$((n+1)); \
+ echo "git submodule update failed (attempt $$n/3), retrying in 8s..."; \
+ sleep 8; \
+ done; \
+ echo "git submodule update failed after 3 attempts"; \
+ exit 1
# Copy documentation from submodules
copy-docs: init-submodules
@@ -80,14 +89,24 @@ copy-docs: init-submodules
rel_dst=$$(echo $$config | cut -d: -f2); \
dst="$(GENERATED_DOCS)/$$rel_dst"; \
echo "Copying $$src -> $$dst"; \
- rm -rf $$dst; \
- mkdir -p $$dst; \
- echo "Copying $$src to $$dst"; \
- cp -r "$$src"/* "$$dst"/ 2>/dev/null || echo " [WARN] Source directory does not exist or is empty: $$src"; \
+ rm -rf "$$dst"; \
+ if [ -f "$$src" ]; then \
+ mkdir -p "$$(dirname "$$dst")"; \
+ echo "Copying $$src to $$dst"; \
+ cp "$$src" "$$dst" || echo " [WARN] Source file missing or copy failed: $$src"; \
+ elif [ -d "$$src" ]; then \
+ mkdir -p "$$dst"; \
+ echo "Copying $$src to $$dst"; \
+ cp -r "$$src"/* "$$dst"/ 2>/dev/null || echo " [WARN] Source directory does not exist or is empty: $$src"; \
+ else \
+ echo " [WARN] Source does not exist: $$src"; \
+ fi; \
if [ "$$rel_dst" = "sources/vllm-ascend" ] || [ "$$rel_dst" = "sources/triton-ascend" ]; then \
rm -f "$$dst/index.md" "$$dst/index.rst" "$$dst/index.html" 2>/dev/null || true; \
else \
- find "$$dst" -name 'index.*' -delete 2>/dev/null || true; \
+ if [ -d "$$dst" ]; then \
+ find "$$dst" -name 'index.*' -delete 2>/dev/null || true; \
+ fi; \
fi; \
done
@@ -103,4 +122,4 @@ html dirhtml singlehtml latex pdf: fetch-config copy-docs sync-onnxruntime-doc
# Catch-all target for other Sphinx targets (clean, help, etc.)
%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/conf.py b/conf.py
index 0adb6969..0ca0138a 100644
--- a/conf.py
+++ b/conf.py
@@ -71,7 +71,9 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '.venv', 'README.md']
+# _repos: submodule working trees duplicate content already copied to sources/_generated;
+# indexing both roughly doubles RSS and OOM-kills low-memory cgroup builds (e.g. 2Gi).
+exclude_patterns = ['_build', '_repos', 'Thumbs.db', '.DS_Store', '.venv', 'README.md']
# -- Options for HTML output -------------------------------------------------
diff --git a/index.rst b/index.rst
index 84d21312..7b2129c7 100644
--- a/index.rst
+++ b/index.rst
@@ -123,16 +123,16 @@
-
DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.
-
+
适用于 Pytorch 的多 GPUs/NPUs 训练工具链。
+
-
分布式训练优化库,V0.10.1 版本起支持昇腾。
-
+
DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.
+
diff --git a/sources/deepspeed/index.rst b/sources/deepspeed/index.rst
index 4b9f533a..5f8dc2e7 100644
--- a/sources/deepspeed/index.rst
+++ b/sources/deepspeed/index.rst
@@ -4,4 +4,4 @@ DeepSpeed
.. toctree::
:maxdepth: 2
- quick_start.rst
+ ../_generated/sources/deepspeed/quick_start