Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/golang-workflow.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: CI Workflow

on:
pull_request:
types:
- opened
- synchronize

jobs:
golang-ci:
uses: kerthcet/github-workflow-as-kube/.github/workflows/workflow-golang-ci.yaml@v0.1.14
2 changes: 1 addition & 1 deletion .github/workflows/kube-workflow-init.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ on:

jobs:
init:
uses: kerthcet/github-workflow-as-kube/.github/workflows/workflow-as-kubernetes-init.yaml@v0.1.3
uses: kerthcet/github-workflow-as-kube/.github/workflows/workflow-as-kubernetes-init.yaml@v0.1.14
secrets:
AGENT_TOKEN: ${{ secrets.AGENT_TOKEN }}
2 changes: 1 addition & 1 deletion .github/workflows/kube-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ on:

jobs:
event-handler:
uses: kerthcet/github-workflow-as-kube/.github/workflows/workflow-as-kubernetes.yaml@v0.1.3
uses: kerthcet/github-workflow-as-kube/.github/workflows/workflow-as-kubernetes.yaml@v0.1.14
secrets:
AGENT_TOKEN: ${{ secrets.AGENT_TOKEN }}
10 changes: 6 additions & 4 deletions .golangci.yml → .golangci.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
run:
deadline: 5m
timeout: 5m
allow-parallel-runners: true

issues:
Expand All @@ -12,7 +12,11 @@ issues:
- path: "api/*"
linters:
- lll
- path: "internal/*"
- path: "pkg/*"
linters:
- dupl
- lll
- path: "test/*"
linters:
- dupl
- lll
Expand All @@ -23,13 +27,11 @@ linters:
- errcheck
- exportloopref
- goconst
- gocyclo
- gofmt
- goimports
- gosimple
- govet
- ineffassign
- lll
- misspell
- nakedret
- prealloc
Expand Down
48 changes: 40 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@

PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
ARTIFACTS ?= $(PROJECT_DIR)/bin

# Image URL to use all building/pushing image targets
BASE_IMAGE ?= gcr.io/distroless/static:nonroot
DOCKER_BUILDX_CMD ?= docker buildx
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
IMAGE_BUILD_EXTRA_OPTS ?=
IMAGE_REGISTRY ?= inftyai
IMAGE_NAME ?= vscheduler
IMAGE_NAME ?= kube-scheduler
IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME)
GIT_TAG ?= $(shell git describe --tags --dirty --always)
IMG ?= $(IMAGE_REPO):$(GIT_TAG)
GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1)
BUILDER_IMAGE ?= golang:$(GO_VERSION)

# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.28.0
ENVTEST_K8S_VERSION = 1.32.0

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
Expand All @@ -36,6 +39,17 @@ SHELL = /usr/bin/env bash -o pipefail
.PHONY: all
all: build

LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):
mkdir -p $(LOCALBIN)

GINKGO = $(shell pwd)/bin/ginkgo
GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2)
.PHONY: ginkgo
ginkgo: ## Download ginkgo locally if necessary.
test -s $(LOCALBIN)/ginkgo || \
GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@$(GINKGO_VERSION)

##@ General

# The help target prints out all targets with their descriptions organized
Expand Down Expand Up @@ -71,17 +85,35 @@ fmt: ## Run go fmt against code.
vet: ## Run go vet against code.
go vet ./...

GOTESTSUM = $(shell pwd)/bin/gotestsum
.PHONY: gotestsum
gotestsum: ## Download gotestsum locally if necessary.
test -s $(LOCALBIN)/gotestsum || \
GOBIN=$(LOCALBIN) go install gotest.tools/gotestsum@v1.8.2

.PHONY: test
test: manifests generate fmt vet envtest ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out
test: fmt vet envtest gotestsum ## Run tests.
$(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- ./api/... ./pkg/... -coverprofile $(ARTIFACTS)/cover.out

.PHONY: test-integration
test-integration: fmt vet envtest ginkgo ## Run integration tests.
@echo "skip integration test"
# KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
# $(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)

test-e2e: fmt vet envtest ginkgo
@echo "skip e2e test"
# E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ENVTEST_LWS_VERSION=$(ENVTEST_LWS_VERSION) ./hack/e2e-test.sh

GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint
GOLANGCI_LINT_VERSION ?= v1.54.2
golangci-lint:
@[ -f $(GOLANGCI_LINT) ] || { \
set -e ;\
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell dirname $(GOLANGCI_LINT)) $(GOLANGCI_LINT_VERSION) ;\
}
@echo "skip golangci-lint"
# golangci-lint:
# @[ -f $(GOLANGCI_LINT) ] || { \
# set -e ;\
# curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell dirname $(GOLANGCI_LINT)) $(GOLANGCI_LINT_VERSION) ;\
# }

.PHONY: lint
lint: golangci-lint ## Run golangci-lint linter & yamllint
Expand Down
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# vScheduler
# Scheduler Plugins

A Kubernetes scheduler designed for smart scheduling with llmaz.
Scheduler Plugins maintains multiple plugins used to differentiate the scheduling strategies for different workloads.

## Plugins

vScheduler maintains multiple plugins for llm workloads scheduling.
## Plugin List

### ResourceFungibility Plugin

A `llama2-7B` model can be run on __1xA100__ GPU, can also be run on __1xA10__ GPU, this is what we called fungibility.
A `llama2-7B` model can be running on __1xA100__ GPU, also on __1xA10__ GPU, even on __1x4090__ and a variety of other types of GPUs as well, that's what we called resource fungibility. In practical scenarios, we may have a heterogeneous cluster with different GPU types, and high-end GPUs will stock out a lot, to meet the SLOs of the service as well as the cost, we need to schedule the workloads on different GPU types.

With [resourceFungibility](./pkg/plugins/resource_fungibility/README.md) plugin, we can simply achieve this with at most 8 alternative GPU types.

With [resourceFungibility](./docs/plugins/resource_fungibility.md) plugin, we can simply achieve this with at most 8 alternative GPU types.
In the future, we need to explore the GPU usage dynamically, not only for the availability and cost, but also the performance. See related paper about [Mélange: Cost Efficient Large Language Model
Serving by Exploiting GPU Heterogeneity](https://arxiv.org/pdf/2404.14527).
4 changes: 2 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ import (
"os"

// Ensure scheme package is initialized.
_ "github.com/inftyai/vscheduler/api/config/scheme"
_ "github.com/inftyai/scheduler/api/config/scheme"

"k8s.io/component-base/cli"
"k8s.io/kubernetes/cmd/kube-scheduler/app"

resourceFungibility "github.com/inftyai/vscheduler/pkg/plugins/resource_fungibility"
resourceFungibility "github.com/inftyai/scheduler/pkg/plugins/resource_fungibility"
//+kubebuilder:scaffold:imports
)

Expand Down
Loading
Loading