diff --git a/.github/workflows/docker-image b/.github/workflows/docker-image deleted file mode 100644 index 6284b47..0000000 --- a/.github/workflows/docker-image +++ /dev/null @@ -1,47 +0,0 @@ - -name: DockerHub CI - -on: - release: - types: [published] -env: - DOCKERHUB_REPO: ${{ secrets.DOCKERHUB_USERNAME }}/ktransformers -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Move Docker data directory - run: | - sudo systemctl stop docker - sudo mkdir -p /mnt/docker - sudo rsync -avz /var/lib/docker/ /mnt/docker - sudo rm -rf /var/lib/docker - sudo ln -s /mnt/docker /var/lib/docker - sudo systemctl start docker - - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push - uses: docker/build-push-action@v6 - with: - push: true - platforms: | - linux/amd64 - linux/arm64 - tags: | - ${{ env.DOCKERHUB_REPO }}:latest - ${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }} - diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..60df01f --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,90 @@ +name: DockerHub CI + +on: + release: + types: [published] + # push: + # branches: + # - main +env: + DOCKERHUB_REPO: ${{ secrets.DOCKERHUB_USERNAME }}/ktransformers +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run tests + run: | + if [ -f docker-compose.test.yml ]; then + docker-compose --file docker-compose.test.yml build + docker-compose --file docker-compose.test.yml run sut + else + docker build . --file Dockerfile + fi + + docker_task: + needs: test + name: ${{ matrix.instruct}} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + # for amd64 + - {instruct: "FANCY", platform: "linux/amd64"} + - {instruct: "AVX512", platform: "linux/amd64"} + - {instruct: "AVX2", platform: "linux/amd64"} + - {instruct: "NATIVE", platform: "linux/amd64"} + # for arm64 + - {instruct: "NATIVE", platform: "linux/arm64"} + + steps: + - name: Move Docker data directory + run: | + sudo systemctl stop docker + sudo mkdir -p /mnt/docker + sudo rsync -avz /var/lib/docker/ /mnt/docker + sudo rm -rf /var/lib/docker + sudo ln -s /mnt/docker /var/lib/docker + sudo systemctl start docker + + - + name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - + name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - + name: Build and push for amd64 + if: matrix.platform == 'linux/amd64' + uses: docker/build-push-action@v6 + with: + push: true + platforms: | + linux/amd64 + tags: | + ${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }} + ${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }} + build-args: | + CPU_INSTRUCT=${{ matrix.instruct }} + - + name: Build and push for arm64 + if: matrix.platform == 'linux/arm64' + uses: docker/build-push-action@v6 + with: + push: true + platforms: | + linux/arm64 + tags: | + ${{ env.DOCKERHUB_REPO }}:latest-${{ matrix.instruct }} + ${{ env.DOCKERHUB_REPO }}:${{ github.event.release.tag_name }}-${{ matrix.instruct }} + build-args: | + CPU_INSTRUCT=${{ matrix.instruct }} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6d4b214..1807150 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,7 @@ EOF FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server +ARG CPU_INSTRUCT=NATIVE WORKDIR /workspace ENV CUDA_HOME /usr/local/cuda COPY --from=web_compile /home/ktransformers /workspace/ktransformers @@ -28,8 +29,9 @@ git submodule init && git submodule update && pip install ninja pyproject numpy cpufeature && pip install flash-attn && -CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose && -pip cache purge +CPU_INSTRUCT=${CPU_INSTRUCT} KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose && +pip cache purge && +cp /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/ EOF ENTRYPOINT ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/ktransformers/tests/mmlu_pro_test.py b/ktransformers/tests/mmlu_pro_test.py index d44be2a..27eb9b2 100644 --- a/ktransformers/tests/mmlu_pro_test.py +++ b/ktransformers/tests/mmlu_pro_test.py @@ -176,7 +176,7 @@ if __name__ == "__main__": parser.add_argument("--result", type=str, default="./mmlu_pro.json", help="Path to save the result JSON file") parser.add_argument("--log", type=str, default="./mmlu_pro.log", help="Path to save the log file") parser.add_argument("--model", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model name or path") - parser.add_argument("--api_url", type=str, default="http://localhost:10002/v1/chat/completions", help="API URL") + parser.add_argument("--api_url", type=str, default="http://localhost:15488/v1/chat/completions", help="API URL") # parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL") args = parser.parse_args()