Skip to content

Commit e2f0786

Browse files
Merge pull request #157 from kerthcet/cleanup/support-release-helm
Refactor backendRuntime
2 parents b236943 + e8fea37 commit e2f0786

35 files changed

+183
-155
lines changed

.github/workflows/publish-helm-chart.yaml

-12
This file was deleted.

Makefile

+1-2
Original file line numberDiff line numberDiff line change
@@ -295,15 +295,14 @@ $(HELMIFY): $(LOCALBIN)
295295

296296
.PHONY: helm
297297
helm: manifests kustomize helmify
298-
$(KUBECTL) create namespace llmaz-system --dry-run=client -o yaml | $(KUBECTL) apply -f -
299298
$(KUSTOMIZE) build config/default | $(HELMIFY) -crd-dir
300299

301300
.PHONY: helm-install
302301
helm-install: helm
303302
helm upgrade --install llmaz ./chart --namespace llmaz-system --create-namespace -f ./chart/values.global.yaml
304303

305304
.PHONY: helm-package
306-
helm-package:
305+
helm-package: helm
307306
# Make sure will alwasy start with a new line.
308307
printf "\n" >> ./chart/values.yaml
309308
cat ./chart/values.global.yaml >> ./chart/values.yaml

api/core/v1alpha1/model_types.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,14 @@ const (
131131
DraftRole ModelRole = "draft"
132132
)
133133

134-
type ModelRepresentative struct {
134+
// ModelRefer refers to a created Model with it's role.
135+
type ModelRefer struct {
135136
// Name represents the model name.
136137
Name ModelName `json:"name"`
137138
// Role represents the model role once more than one model is required.
139+
// Such as a draft role, which means running with SpeculativeDecoding,
140+
// and default arguments for backend will be searched in backendRuntime
141+
// with the name of speculative-decoding.
138142
// +kubebuilder:validation:Enum={main,draft}
139143
// +kubebuilder:default=main
140144
// +optional
@@ -148,7 +152,7 @@ type ModelClaims struct {
148152
// speculative decoding, then one model is main(target) model, another one
149153
// is draft model.
150154
// +kubebuilder:validation:MinItems=1
151-
Models []ModelRepresentative `json:"models,omitempty"`
155+
Models []ModelRefer `json:"models,omitempty"`
152156
// InferenceFlavors represents a list of flavors with fungibility supported
153157
// to serve the model.
154158
// - If not set, always apply with the 0-index model by default.

api/core/v1alpha1/zz_generated.deepcopy.go

+5-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/inference/v1alpha1/backendruntime_types.go

+10-14
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,15 @@ import (
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2222
)
2323

24-
type InferenceMode string
25-
26-
const (
27-
DefaultInferenceMode InferenceMode = "Default"
28-
SpeculativeDecodingInferenceMode InferenceMode = "SpeculativeDecoding"
29-
)
30-
24+
// BackendRuntimeArg is preset arguments for easy to use.
25+
// Do not edit the preset names unless set the argument name explicitly
26+
// in Playground backendRuntimeConfig.
3127
type BackendRuntimeArg struct {
32-
Mode InferenceMode `json:"mode"`
33-
Flags []string `json:"flags,omitempty"`
28+
// Name represents the identifier of the backendRuntime argument.
29+
Name string `json:"name"`
30+
// Flags represents all the preset configurations.
31+
// Flag around with {{ .CONFIG }} is a configuration waiting for render.
32+
Flags []string `json:"flags,omitempty"`
3433
}
3534

3635
// BackendRuntimeSpec defines the desired state of BackendRuntime
@@ -43,11 +42,8 @@ type BackendRuntimeSpec struct {
4342
// Version represents the default version of the backendRuntime.
4443
// It will be appended to the image as a tag.
4544
Version string `json:"version"`
46-
// Args represents the args of the backendRuntime.
47-
// They can be appended or overwritten by the Playground args.
48-
// The key is the inference option, like default one or advanced
49-
// speculativeDecoding, the values are the corresponding args.
50-
// Flag around with {{ .XXX }} is a flag waiting for render.
45+
// Args represents the preset arguments of the backendRuntime.
46+
// They can be appended or overwritten by the Playground backendRuntimeConfig.
5147
Args []BackendRuntimeArg `json:"args,omitempty"`
5248
// Envs represents the environments set to the container.
5349
// +optional

chart/Chart.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ type: application
1313
# This is the chart version. This version number should be incremented each time you make changes
1414
# to the chart and its templates, including the app version.
1515
# Versions are expected to follow Semantic Versioning (https://semver.org/)
16-
version: 0.0.2
16+
version: 0.0.3
1717
# This is the version number of the application being deployed. This version number should be
1818
# incremented each time you make changes to the application. Versions are not expected to
1919
# follow Semantic Versioning. They should reflect the version the application is using.
2020
# It is recommended to use it with quotes.
21-
appVersion: "0.0.6"
21+
appVersion: 0.0.7

chart/crds/backendruntime-crd.yaml

+13-7
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,27 @@ spec:
4242
properties:
4343
args:
4444
description: |-
45-
Args represents the args of the backendRuntime.
46-
They can be appended or overwritten by the Playground args.
47-
The key is the inference option, like default one or advanced
48-
speculativeDecoding, the values are the corresponding args.
49-
Flag around with {{ .XXX }} is a flag waiting for render.
45+
Args represents the preset arguments of the backendRuntime.
46+
They can be appended or overwritten by the Playground backendRuntimeConfig.
5047
items:
48+
description: |-
49+
BackendRuntimeArg is preset arguments for easy to use.
50+
Do not edit the preset names unless set the argument name explicitly
51+
in Playground backendRuntimeConfig.
5152
properties:
5253
flags:
54+
description: |-
55+
Flags represents all the preset configurations.
56+
Flag around with {{ .CONFIG }} is a configuration waiting for render.
5357
items:
5458
type: string
5559
type: array
56-
mode:
60+
name:
61+
description: Name represents the identifier of the backendRuntime
62+
argument.
5763
type: string
5864
required:
59-
- mode
65+
- name
6066
type: object
6167
type: array
6268
commands:

chart/crds/playground-crd.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,19 @@ spec:
259259
speculative decoding, then one model is main(target) model, another one
260260
is draft model.
261261
items:
262+
description: ModelRefer refers to a created Model with it's
263+
role.
262264
properties:
263265
name:
264266
description: Name represents the model name.
265267
type: string
266268
role:
267269
default: main
268-
description: Role represents the model role once more than
269-
one model is required.
270+
description: |-
271+
Role represents the model role once more than one model is required.
272+
Such as a draft role, which means running with SpeculativeDecoding,
273+
and default arguments for backend will be searched in backendRuntime
274+
with the name of speculative-decoding.
270275
enum:
271276
- main
272277
- draft

chart/crds/service-crd.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,19 @@ spec:
8484
speculative decoding, then one model is main(target) model, another one
8585
is draft model.
8686
items:
87+
description: ModelRefer refers to a created Model with it's
88+
role.
8789
properties:
8890
name:
8991
description: Name represents the model name.
9092
type: string
9193
role:
9294
default: main
93-
description: Role represents the model role once more than
94-
one model is required.
95+
description: |-
96+
Role represents the model role once more than one model is required.
97+
Such as a draft role, which means running with SpeculativeDecoding,
98+
and default arguments for backend will be searched in backendRuntime
99+
with the name of speculative-decoding.
95100
enum:
96101
- main
97102
- draft

chart/templates/backends/llamacpp.yaml

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,18 @@ spec:
1212
- ./llama-server
1313
image: ghcr.io/ggerganov/llama.cpp
1414
version: server
15+
# Do not edit the preset argument name unless you know what you're doing.
16+
# Free to add more arguments with your requirements.
1517
args:
16-
- mode: Default
18+
- name: default
1719
flags:
1820
- -m
1921
- "{{`{{ .ModelPath }}`}}"
2022
- --host
2123
- "0.0.0.0"
2224
- --port
2325
- "8080"
24-
- mode: SpeculativeDecoding
26+
- name: speculative-decoding
2527
flags:
2628
- -m
2729
- "{{`{{ .ModelPath }}`}}"

chart/templates/backends/sglang.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ spec:
1414
- sglang.launch_server
1515
image: lmsysorg/sglang
1616
version: v0.2.10-cu121
17+
# Do not edit the preset argument name unless you know what you're doing.
18+
# Free to add more arguments with your requirements.
1719
args:
18-
- mode: Default
20+
- name: default
1921
flags:
2022
- --model-path
2123
- "{{`{{ .ModelPath }}`}}"

chart/templates/backends/vllm.yaml

+4-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ spec:
1414
- vllm.entrypoints.openai.api_server
1515
image: vllm/vllm-openai
1616
version: v0.6.0
17+
# Do not edit the preset argument name unless you know what you're doing.
18+
# Free to add more arguments with your requirements.
1719
args:
18-
- mode: Default
20+
- name: default
1921
flags:
2022
- --model
2123
- "{{`{{ .ModelPath }}`}}"
@@ -25,7 +27,7 @@ spec:
2527
- "0.0.0.0"
2628
- --port
2729
- "8080"
28-
- mode: SpeculativeDecoding
30+
- name: speculative-decoding
2931
flags:
3032
- --model
3133
- "{{`{{ .ModelPath }}`}}"

client-go/applyconfiguration/core/v1alpha1/modelclaims.go

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/core/v1alpha1/modelrepresentative.go renamed to client-go/applyconfiguration/core/v1alpha1/modelrefer.go

+7-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/utils.go

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/inference.llmaz.io_backendruntimes.yaml

+13-7
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,27 @@ spec:
4343
properties:
4444
args:
4545
description: |-
46-
Args represents the args of the backendRuntime.
47-
They can be appended or overwritten by the Playground args.
48-
The key is the inference option, like default one or advanced
49-
speculativeDecoding, the values are the corresponding args.
50-
Flag around with {{ .XXX }} is a flag waiting for render.
46+
Args represents the preset arguments of the backendRuntime.
47+
They can be appended or overwritten by the Playground backendRuntimeConfig.
5148
items:
49+
description: |-
50+
BackendRuntimeArg is preset arguments for easy to use.
51+
Do not edit the preset names unless set the argument name explicitly
52+
in Playground backendRuntimeConfig.
5253
properties:
5354
flags:
55+
description: |-
56+
Flags represents all the preset configurations.
57+
Flag around with {{ .CONFIG }} is a configuration waiting for render.
5458
items:
5559
type: string
5660
type: array
57-
mode:
61+
name:
62+
description: Name represents the identifier of the backendRuntime
63+
argument.
5864
type: string
5965
required:
60-
- mode
66+
- name
6167
type: object
6268
type: array
6369
commands:

config/crd/bases/inference.llmaz.io_playgrounds.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -260,14 +260,19 @@ spec:
260260
speculative decoding, then one model is main(target) model, another one
261261
is draft model.
262262
items:
263+
description: ModelRefer refers to a created Model with it's
264+
role.
263265
properties:
264266
name:
265267
description: Name represents the model name.
266268
type: string
267269
role:
268270
default: main
269-
description: Role represents the model role once more than
270-
one model is required.
271+
description: |-
272+
Role represents the model role once more than one model is required.
273+
Such as a draft role, which means running with SpeculativeDecoding,
274+
and default arguments for backend will be searched in backendRuntime
275+
with the name of speculative-decoding.
271276
enum:
272277
- main
273278
- draft

0 commit comments

Comments
 (0)