|
| 1 | +kind: CustomResourceDefinition |
| 2 | +apiVersion: apiextensions.k8s.io/v1 |
| 3 | +metadata: |
| 4 | + creationTimestamp: null |
| 5 | + name: modelservers.intel.com |
| 6 | +spec: |
| 7 | + group: intel.com |
| 8 | + names: |
| 9 | + kind: ModelServer |
| 10 | + listKind: ModelServerList |
| 11 | + plural: modelservers |
| 12 | + singular: modelserver |
| 13 | + scope: Namespaced |
| 14 | + versions: |
| 15 | + - name: v1alpha1 |
| 16 | + served: true |
| 17 | + storage: true |
| 18 | + schema: |
| 19 | + openAPIV3Schema: |
| 20 | + description: >- |
| 21 | + ModelServer is the Schema for the modelserver API representing |
| 22 | + OpenVINO Model Server instances |
| 23 | + type: object |
| 24 | + properties: |
| 25 | + apiVersion: |
| 26 | + description: >- |
| 27 | + APIVersion defines the versioned schema of this representation |
| 28 | + of an object. Servers should convert recognized schemas to the |
| 29 | + latest internal value, and may reject unrecognized values. More |
| 30 | + info: |
| 31 | + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources |
| 32 | + type: string |
| 33 | + kind: |
| 34 | + description: >- |
| 35 | + Kind is a string value representing the REST resource this |
| 36 | + object represents. Servers may infer this from the endpoint the |
| 37 | + client submits requests to. Cannot be updated. In CamelCase. |
| 38 | + More info: |
| 39 | + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds |
| 40 | + type: string |
| 41 | + metadata: |
| 42 | + type: object |
| 43 | + spec: |
| 44 | + description: Spec defines the desired state of Ovms |
| 45 | + type: object |
| 46 | + required: |
| 47 | + - image_name |
| 48 | + properties: |
| 49 | + image_name: |
| 50 | + description: Model Server docker image to be used |
| 51 | + type: string |
| 52 | + default: >- |
| 53 | + registry.connect.redhat.com/intel/openvino-model-server:latest |
| 54 | + deployment_parameters: |
| 55 | + description: Cluster deployment parameters to be applied to the Model Server |
| 56 | + type: object |
| 57 | + properties: |
| 58 | + replicas: |
| 59 | + type: integer |
| 60 | + default: 1 |
| 61 | + node_affinity: |
| 62 | + type: object |
| 63 | + x-kubernetes-preserve-unknown-fields: true |
| 64 | + pod_affinity: |
| 65 | + type: object |
| 66 | + x-kubernetes-preserve-unknown-fields: true |
| 67 | + pod_antiaffinity: |
| 68 | + type: object |
| 69 | + x-kubernetes-preserve-unknown-fields: true |
| 70 | + update_strategy: |
| 71 | + type: object |
| 72 | + x-kubernetes-preserve-unknown-fields: true |
| 73 | + openshift_service_mesh: |
| 74 | + description: If selected deployed pods will get annotations for integration with Red Hat Service Mesh |
| 75 | + type: boolean |
| 76 | + default: false |
| 77 | + resources: |
| 78 | + description: Resources for Model Serving server |
| 79 | + type: object |
| 80 | + properties: |
| 81 | + limits: |
| 82 | + type: object |
| 83 | + properties: |
| 84 | + cpu: |
| 85 | + description: >- |
| 86 | + Example values: "1", "4", "12" |
| 87 | + type: string |
| 88 | + memory: |
| 89 | + description: >- |
| 90 | + Example values "500M", "1.5Gi", "12Gi" |
| 91 | + type: string |
| 92 | + xpu_device: |
| 93 | + description: >- |
| 94 | + The device must be supported by the device plugin. Learn more on https://github.com/intel/intel-device-plugins-for-kubernetes. Only integer values are allowed. |
| 95 | + type: string |
| 96 | + xpu_device_quantity: |
| 97 | + type: string |
| 98 | + default: "1" |
| 99 | + requests: |
| 100 | + type: object |
| 101 | + properties: |
| 102 | + cpu: |
| 103 | + description: >- |
| 104 | + Example values: "1", "4", "12" |
| 105 | + type: string |
| 106 | + memory: |
| 107 | + description: >- |
| 108 | + Example values "500M", "1.5Gi", "12Gi" |
| 109 | + type: string |
| 110 | + xpu_device: |
| 111 | + description: >- |
| 112 | + The device must be supported by the device plugin. Learn more on https://github.com/intel/intel-device-plugins-for-kubernetes. Only integer values are allowed. |
| 113 | + type: string |
| 114 | + xpu_device_quantity: |
| 115 | + type: string |
| 116 | + default: "1" |
| 117 | + service_parameters: |
| 118 | + type: object |
| 119 | + description: Fill service settings |
| 120 | + properties: |
| 121 | + grpc_port: |
| 122 | + description: gRPC service port |
| 123 | + type: integer |
| 124 | + format: int32 |
| 125 | + default: 8080 |
| 126 | + rest_port: |
| 127 | + description: REST service port |
| 128 | + type: integer |
| 129 | + format: int32 |
| 130 | + default: 8081 |
| 131 | + service_type: |
| 132 | + description: Service type |
| 133 | + type: string |
| 134 | + default: ClusterIP |
| 135 | + enum: |
| 136 | + - ClusterIP |
| 137 | + - NodePort |
| 138 | + - Loadbalancer |
| 139 | + models_settings: |
| 140 | + type: object |
| 141 | + description: Fill model settings if model config is not used |
| 142 | + properties: |
| 143 | + single_model_mode: |
| 144 | + description: >- |
| 145 | + Select this option for serving a single model. |
| 146 | + For multi model and DAG deployments, a configuration file `config.json` should be added to a config_map resource |
| 147 | + type: boolean |
| 148 | + default: true |
| 149 | + config_configmap_name: |
| 150 | + type: string |
| 151 | + config_path: |
| 152 | + type: string |
| 153 | + model_name: |
| 154 | + description: Name of the model loaded to Model Server |
| 155 | + type: string |
| 156 | + default: resnet |
| 157 | + model_path: |
| 158 | + description: Path to the model files |
| 159 | + type: string |
| 160 | + default: 'gs://ovms-public-eu/resnet50-binary' |
| 161 | + shape: |
| 162 | + description: Resets models shape (model must support reshaping). If set, batch_size parameter is ignored |
| 163 | + type: string |
| 164 | + batch_size: |
| 165 | + description: Resets models batchsize, int value or auto. This parameter will be ignored if shape is set |
| 166 | + type: string |
| 167 | + target_device: |
| 168 | + description: Target device to run the inference |
| 169 | + type: string |
| 170 | + plugin_config: |
| 171 | + description: A dictionary of plugin configuration keys and their values |
| 172 | + type: string |
| 173 | + default: '{"CPU_THROUGHPUT_STREAMS":"1"}' |
| 174 | + model_version_policy: |
| 175 | + description: Model version policy |
| 176 | + type: string |
| 177 | + default: '{"latest": { "num_versions":1 }}' |
| 178 | + layout: |
| 179 | + description: Defines model input/output layouts |
| 180 | + type: string |
| 181 | + nireq: |
| 182 | + description: Size of inference request queue for model executions |
| 183 | + type: integer |
| 184 | + format: int32 |
| 185 | + is_stateful: |
| 186 | + type: boolean |
| 187 | + default: false |
| 188 | + idle_sequence_cleanup: |
| 189 | + description: Flag indicating if model is subject to sequence cleaner scans |
| 190 | + type: boolean |
| 191 | + default: true |
| 192 | + low_latency_transformation: |
| 193 | + description: Flag indicating that Model Server should perform low latency transformation on that model |
| 194 | + type: boolean |
| 195 | + default: false |
| 196 | + max_sequence_number: |
| 197 | + description: Determines how many sequences can be processed concurrently by one model instance. When that value is reached, attempt to start a new sequence will result in error. |
| 198 | + type: integer |
| 199 | + format: int32 |
| 200 | + server_settings: |
| 201 | + type: object |
| 202 | + properties: |
| 203 | + file_system_poll_wait_seconds: |
| 204 | + description: Time interval between config and model versions changes detections |
| 205 | + type: integer |
| 206 | + format: int32 |
| 207 | + default: 0 |
| 208 | + log_level: |
| 209 | + description: Serving log level |
| 210 | + type: string |
| 211 | + default: INFO |
| 212 | + enum: |
| 213 | + - TRACE |
| 214 | + - DEBUG |
| 215 | + - INFO |
| 216 | + - WARNING |
| 217 | + - ERROR |
| 218 | + grpc_workers: |
| 219 | + description: Number of gRPC servers. Default 1. Increase for multi client, high throughput scenarios |
| 220 | + type: integer |
| 221 | + format: int32 |
| 222 | + default: 1 |
| 223 | + rest_workers: |
| 224 | + description: Number of worker threads in REST server - has no effect if rest_port is not set. Default value depends on number of CPUs. |
| 225 | + type: integer |
| 226 | + format: int32 |
| 227 | + sequence_cleaner_poll_wait_minutes: |
| 228 | + description: Time interval between two consecutive sequence cleaner scans. Default is 5. Zero value disables sequence cleaner. |
| 229 | + type: integer |
| 230 | + format: int32 |
| 231 | + default: 5 |
| 232 | + models_repository: |
| 233 | + type: object |
| 234 | + description: Access definition for model storage |
| 235 | + properties: |
| 236 | + storage_type: |
| 237 | + type: string |
| 238 | + default: google |
| 239 | + enum: |
| 240 | + - S3 |
| 241 | + - google |
| 242 | + - azure |
| 243 | + - cluster |
| 244 | + models_host_path: |
| 245 | + type: string |
| 246 | + description: Host path to be mounted inside the containers as /models dir |
| 247 | + models_volume_claim: |
| 248 | + type: string |
| 249 | + description: Persistent volume claim to be mounted as /models dir |
| 250 | + runAsUser: |
| 251 | + description: >- |
| 252 | + Set the account ID if access to the model repository is restricted. Model server will start with this security context. |
| 253 | + In openshift, you might need to create Security Context Constraints to allow grant permissions for changing the context. |
| 254 | + type: string |
| 255 | + runAsGroup: |
| 256 | + type: string |
| 257 | + description: >- |
| 258 | + Set the group ID if access to the model repository is restricted. Model server will start with this security context. |
| 259 | + In openshift, you might need to create Security Context Constraints to allow grant permissions for changing the context. |
| 260 | + aws_secret_access_key: |
| 261 | + type: string |
| 262 | + aws_access_key_id: |
| 263 | + type: string |
| 264 | + aws_region: |
| 265 | + type: string |
| 266 | + s3_compat_api_endpoint: |
| 267 | + type: string |
| 268 | + description: Optional for AWS s3 storage and mandatory for Minio and other s3 compatible storage types |
| 269 | + gcp_creds_secret_name: |
| 270 | + type: string |
| 271 | + description: Secret name including Google Cloud Storage access token |
| 272 | + azure_storage_connection_string: |
| 273 | + type: string |
| 274 | + description: Connection string to download the models from Azure Storage blob containers |
| 275 | + https_proxy: |
| 276 | + description: https proxy to connect to the cloud storage |
| 277 | + type: string |
| 278 | + http_proxy: |
| 279 | + description: http proxy to connect to the cloud storage |
| 280 | + type: string |
| 281 | + x-kubernetes-preserve-unknown-fields: true |
| 282 | + status: |
| 283 | + description: Status defines the observed state of Ovms |
| 284 | + type: object |
| 285 | + x-kubernetes-preserve-unknown-fields: true |
| 286 | + subresources: |
| 287 | + status: {} |
| 288 | + scale: |
| 289 | + specReplicasPath: .spec.deployment_parameters.replicas |
| 290 | + statusReplicasPath: .status.replicas |
| 291 | + labelSelectorPath: .status.labelSelector |
| 292 | + conversion: |
| 293 | + strategy: None |
| 294 | +status: |
| 295 | + acceptedNames: |
| 296 | + kind: "" |
| 297 | + plural: "" |
| 298 | + conditions: null |
| 299 | + storedVersions: null |
| 300 | + |
0 commit comments