Documentation
¶
Index ¶
- Variables
- func AiStudioDeploymentLink(tenantId string, subscriptionId string, resourceGroup string, ...) string
- func AiStudioWorkspaceLink(tenantId string, subscriptionId string, resourceGroup string, ...) string
- func IsFinetuneUsageName(usageName string) bool
- func ModelHasDefaultVersion(model AiModel) bool
- func ParseConfig[T comparable](config any) (*T, error)
- func ResolveCapacity(sku AiModelSku, preferred *int32) int32
- type AiModel
- type AiModelDeployment
- type AiModelService
- func (s *AiModelService) FilterModelsByQuotaAcrossLocations(ctx context.Context, subscriptionId string, models []AiModel, ...) ([]AiModel, error)
- func (s *AiModelService) ListFilteredModels(ctx context.Context, subscriptionId string, options *FilterOptions) ([]AiModel, error)
- func (s *AiModelService) ListLocations(ctx context.Context, subscriptionId string) ([]string, error)
- func (s *AiModelService) ListLocationsWithQuota(ctx context.Context, subscriptionId string, allowedLocations []string, ...) ([]string, error)
- func (s *AiModelService) ListModelLocationsWithQuota(ctx context.Context, subscriptionId string, modelName string, ...) ([]ModelLocationQuota, error)
- func (s *AiModelService) ListModelSkus(ctx context.Context, subscriptionId string, modelName string, location string, ...) ([]AiModelSku, error)
- func (s *AiModelService) ListModelVersions(ctx context.Context, subscriptionId string, modelName string, location string) ([]AiModelVersion, string, error)
- func (s *AiModelService) ListModels(ctx context.Context, subscriptionId string, locations []string) ([]AiModel, error)
- func (s *AiModelService) ListUsages(ctx context.Context, subscriptionId string, location string) ([]AiModelUsage, error)
- func (s *AiModelService) ResolveModelDeployments(ctx context.Context, subscriptionId string, modelName string, ...) ([]AiModelDeployment, error)
- func (s *AiModelService) ResolveModelDeploymentsWithQuota(ctx context.Context, subscriptionId string, modelName string, ...) ([]AiModelDeployment, error)
- type AiModelSku
- type AiModelUsage
- type AiModelVersion
- type ComponentConfig
- type DeploymentConfig
- type DeploymentOptions
- type EndpointDeploymentConfig
- type FilterOptions
- type Flow
- type ModelLocationQuota
- type PythonBridge
- type QuotaCheckOptions
- type QuotaRequirement
- type Scope
- type ScriptPath
Constants ¶
This section is empty.
Variables ¶
var ( // ErrQuotaLocationRequired indicates quota checks were requested without exactly one location. ErrQuotaLocationRequired = errors.New("quota checking requires exactly one location") // ErrModelNotFound indicates the requested model was not found in the effective model catalog. ErrModelNotFound = errors.New("model not found") // ErrNoDeploymentMatch indicates no deployment candidate matched provided filters/constraints. ErrNoDeploymentMatch = errors.New("no deployment match") )
Functions ¶
func AiStudioDeploymentLink ¶
func AiStudioDeploymentLink( tenantId string, subscriptionId string, resourceGroup string, workspaceName string, endpointName string, deploymentName string, ) string
AzureAiStudioDeploymentLink returns a link to the Azure AI Studio deployment page
func AiStudioWorkspaceLink ¶
func AiStudioWorkspaceLink(tenantId string, subscriptionId string, resourceGroup string, workspaceName string) string
AiStudioWorkspaceLink returns a link to the Azure AI Studio workspace page
func IsFinetuneUsageName ¶
IsFinetuneUsageName reports whether the given usage name represents a fine-tune SKU. Fine-tune usage names end with "-finetune" (case-insensitive).
func ModelHasDefaultVersion ¶
ModelHasDefaultVersion returns true if any version of the model is marked as default.
func ParseConfig ¶
func ParseConfig[T comparable](config any) (*T, error)
ParseConfig parses a config from a generic interface.
func ResolveCapacity ¶
func ResolveCapacity(sku AiModelSku, preferred *int32) int32
ResolveCapacity resolves the deployment capacity for a SKU. If preferred is set and valid within the SKU's min/max/step constraints, it's used. Otherwise falls back to the SKU's default capacity.
Types ¶
type AiModel ¶
type AiModel struct {
// Name is the model name, e.g. "gpt-4o".
Name string
// Format is the model format, e.g. "OpenAI".
Format string
// LifecycleStatus is the model lifecycle status, e.g. "preview", "stable".
LifecycleStatus string
// Capabilities lists the model's capabilities, e.g. ["chat", "embeddings"].
Capabilities []string
// Versions lists the available versions of this model.
Versions []AiModelVersion
// Locations lists the Azure locations where this model is available.
Locations []string
}
AiModel represents an AI model available in the Azure Cognitive Services catalog. It is SDK-agnostic and decoupled from armcognitiveservices types.
func FilterModels ¶
func FilterModels(models []AiModel, options *FilterOptions) []AiModel
FilterModels applies FilterOptions to a list of models.
func FilterModelsByQuota ¶
func FilterModelsByQuota( models []AiModel, usages []AiModelUsage, minRemaining float64, ) []AiModel
FilterModelsByQuota cross-references models' SKU usage names against usage data to filter out models without sufficient remaining capacity.
type AiModelDeployment ¶
type AiModelDeployment struct {
// ModelName is the model name, e.g. "gpt-4o".
ModelName string
// Format is the model format, e.g. "OpenAI".
Format string
// Version is the model version, e.g. "2024-05-13".
Version string
// Location is the Azure location for this deployment.
Location string
// Sku is the selected SKU for this deployment.
Sku AiModelSku
// Capacity is the resolved deployment capacity in units.
// Resolved from: DeploymentOptions.Capacity → Sku.DefaultCapacity → 0 (caller must handle).
Capacity int32
// RemainingQuota is the subscription quota remaining at this location for this SKU.
// Only populated when a quota check is performed. nil means no quota check was done.
RemainingQuota *float64
}
AiModelDeployment is a fully resolved deployment configuration.
Capacity vs Quota:
- Capacity is deployment-level: how many units this specific deployment will consume.
- RemainingQuota is subscription-level: how much total capacity remains at this location for this SKU across all deployments (limit - current_value from usage API).
Constraint: Capacity must be <= RemainingQuota for the deployment to succeed.
type AiModelService ¶
type AiModelService struct {
// contains filtered or unexported fields
}
AiModelService provides operations for querying AI model availability, resolving deployments, and checking quota/usage from Azure Cognitive Services.
func NewAiModelService ¶
func NewAiModelService( azureClient *azapi.AzureClient, subManager *account.SubscriptionsManager, ) *AiModelService
NewAiModelService creates a new AiModelService.
func (*AiModelService) FilterModelsByQuotaAcrossLocations ¶
func (s *AiModelService) FilterModelsByQuotaAcrossLocations( ctx context.Context, subscriptionId string, models []AiModel, locations []string, minRemaining float64, ) ([]AiModel, error)
FilterModelsByQuotaAcrossLocations filters models to those having sufficient quota in at least one location. When locations is empty, model-declared locations are used.
func (*AiModelService) ListFilteredModels ¶
func (s *AiModelService) ListFilteredModels( ctx context.Context, subscriptionId string, options *FilterOptions, ) ([]AiModel, error)
ListFilteredModels fetches and filters AI models based on the provided criteria.
func (*AiModelService) ListLocations ¶
func (s *AiModelService) ListLocations( ctx context.Context, subscriptionId string, ) ([]string, error)
ListLocations returns subscription location names that can be used for model queries.
func (*AiModelService) ListLocationsWithQuota ¶
func (s *AiModelService) ListLocationsWithQuota( ctx context.Context, subscriptionId string, allowedLocations []string, requirements []QuotaRequirement, ) ([]string, error)
ListLocationsWithQuota returns locations with sufficient quota for all given requirements. When allowedLocations are provided, they are intersected with AI Services-supported locations to avoid querying locations where AI Services are not available.
func (*AiModelService) ListModelLocationsWithQuota ¶
func (s *AiModelService) ListModelLocationsWithQuota( ctx context.Context, subscriptionId string, modelName string, allowedLocations []string, minRemaining float64, ) ([]ModelLocationQuota, error)
ListModelLocationsWithQuota returns model locations that have sufficient remaining quota. MaxRemainingQuota is the max remaining quota across the model's SKU usage names in each location where usage data exists.
func (*AiModelService) ListModelSkus ¶
func (s *AiModelService) ListModelSkus( ctx context.Context, subscriptionId string, modelName string, location string, version string, ) ([]AiModelSku, error)
ListModelSkus returns available SKUs for a model+version at a location.
func (*AiModelService) ListModelVersions ¶
func (s *AiModelService) ListModelVersions( ctx context.Context, subscriptionId string, modelName string, location string, ) ([]AiModelVersion, string, error)
ListModelVersions returns available versions for a specific model at a location.
func (*AiModelService) ListModels ¶
func (s *AiModelService) ListModels( ctx context.Context, subscriptionId string, locations []string, ) ([]AiModel, error)
ListModels fetches AI models from the Azure Cognitive Services catalog. If locations is empty, fetches across all subscription locations in parallel.
func (*AiModelService) ListUsages ¶
func (s *AiModelService) ListUsages( ctx context.Context, subscriptionId string, location string, ) ([]AiModelUsage, error)
ListUsages returns quota/usage data for a location.
func (*AiModelService) ResolveModelDeployments ¶
func (s *AiModelService) ResolveModelDeployments( ctx context.Context, subscriptionId string, modelName string, options *DeploymentOptions, ) ([]AiModelDeployment, error)
ResolveModelDeployments returns all valid deployment configurations for the given model. Returns multiple candidates when multiple version/SKU/location combos are valid. Capacity resolution: options.Capacity → SKU default → 0 (caller must handle).
func (*AiModelService) ResolveModelDeploymentsWithQuota ¶
func (s *AiModelService) ResolveModelDeploymentsWithQuota( ctx context.Context, subscriptionId string, modelName string, options *DeploymentOptions, quotaOpts *QuotaCheckOptions, ) ([]AiModelDeployment, error)
ResolveModelDeploymentsWithQuota resolves deployments and filters by quota. Skips SKUs where resolved capacity exceeds remaining quota. Populates RemainingQuota on results.
type AiModelSku ¶
type AiModelSku struct {
// Name is the SKU name, e.g. "GlobalStandard", "Standard".
Name string
// UsageName is the quota usage name used to join with usage/quota data,
// e.g. "OpenAI.Standard.gpt-4o".
UsageName string
// DefaultCapacity is the suggested deployment capacity (0 if unavailable).
DefaultCapacity int32
// MinCapacity is the minimum allowed deployment capacity.
MinCapacity int32
// MaxCapacity is the maximum allowed deployment capacity.
MaxCapacity int32
// CapacityStep is the capacity increment granularity.
CapacityStep int32
}
AiModelSku represents a deployment SKU with its capacity constraints.
type AiModelUsage ¶
type AiModelUsage struct {
// Name is the quota usage name, e.g. "OpenAI.Standard.gpt-4o".
Name string
// CurrentValue is the amount of quota currently consumed.
CurrentValue float64
// Limit is the total quota limit for this usage name.
Limit float64
}
AiModelUsage represents a subscription-level quota/usage entry for a specific model SKU at a location.
type AiModelVersion ¶
type AiModelVersion struct {
// Version is the version string, e.g. "2024-05-13".
Version string
// IsDefault indicates whether this is the default version.
IsDefault bool
// Skus lists the available SKUs for this version.
Skus []AiModelSku
}
AiModelVersion represents a specific version of an AI model.
type ComponentConfig ¶
type ComponentConfig struct {
Name osutil.ExpandableString `yaml:"name,omitempty"`
Path string `yaml:"path,omitempty"`
Overrides map[string]osutil.ExpandableString `yaml:"overrides,omitempty"`
}
ComponentConfig is a base configuration structure used by multiple AI components
type DeploymentConfig ¶
type DeploymentConfig struct {
ComponentConfig `yaml:",inline"`
// A map of environment variables to set for the deployment
Environment osutil.ExpandableMap `yaml:"environment,omitempty"`
}
type DeploymentOptions ¶
type DeploymentOptions struct {
// Locations lists preferred locations. If empty, location is left unset on results.
Locations []string
// Versions lists preferred versions. If empty, all versions are included.
Versions []string
// Skus lists preferred SKU names, e.g. ["GlobalStandard", "Standard"]. If empty, all SKUs are included.
Skus []string
// Capacity is the preferred deployment capacity. If set and valid
// (within min/max, aligned to step), used directly. If nil, uses SKU default.
Capacity *int32
// IncludeFinetuneSkus controls whether fine-tune SKUs (usage names ending with
// "-finetune") are included. Defaults to false (excluded).
IncludeFinetuneSkus bool
}
DeploymentOptions specifies preferences for resolving a model deployment. All fields are optional filters. When empty, no filtering is applied for that dimension.
type EndpointDeploymentConfig ¶
type EndpointDeploymentConfig struct {
Workspace osutil.ExpandableString `yaml:"workspace,omitempty"`
Environment *ComponentConfig `yaml:"environment,omitempty"`
Model *ComponentConfig `yaml:"model,omitempty"`
Flow *ComponentConfig `yaml:"flow,omitempty"`
Deployment *DeploymentConfig `yaml:"deployment,omitempty"`
}
EndpointDeploymentConfig is a configuration structure for an ML online endpoint deployment
type FilterOptions ¶
type FilterOptions struct {
// Locations filters to models available at these locations.
Locations []string
// Capabilities filters by model capabilities, e.g. ["chat", "embeddings"].
Capabilities []string
// Formats filters by model format, e.g. ["OpenAI"].
Formats []string
// Statuses filters by lifecycle status, e.g. ["preview", "stable"].
Statuses []string
// ExcludeModelNames excludes models by name (for multi-model selection flows).
ExcludeModelNames []string
}
FilterOptions specifies criteria for filtering AI models.
type Flow ¶
type Flow struct {
Name string `json:"name"`
Description string `json:"description"`
Type string `json:"type"`
Path string `json:"path"`
DisplayName string `json:"display_name"`
Tags map[string]string `json:"tags"`
}
Flow is a configuration to defined a Prompt flow component
type ModelLocationQuota ¶
type ModelLocationQuota struct {
// Location is the Azure location name.
Location string
// MaxRemainingQuota is the maximum remaining quota across model SKUs with usage entries.
MaxRemainingQuota float64
}
ModelLocationQuota represents model quota availability in a specific location.
type PythonBridge ¶
type PythonBridge interface {
Initialize(ctx context.Context) error
RequiredExternalTools(ctx context.Context) []tools.ExternalTool
Run(ctx context.Context, scriptName ScriptPath, args ...string) (*exec.RunResult, error)
}
PythonBridge is an interface to execute python components from the embedded AI resources project
func NewPythonBridge ¶
func NewPythonBridge( azdCtx *azdcontext.AzdContext, pythonCli *python.Cli, ) PythonBridge
NewPythonBridge creates a new PythonBridge instance
type QuotaCheckOptions ¶
type QuotaCheckOptions struct {
// MinRemainingCapacity is the minimum remaining quota required per SKU.
// Models/deployments where no SKU meets this threshold are excluded.
// 0 means "any remaining > 0" (i.e. not fully exhausted).
MinRemainingCapacity float64
}
QuotaCheckOptions enables quota-aware model/deployment selection. When provided, the service fetches usage data alongside the model catalog and cross-references via AiModelSku.UsageName == AiModelUsage.Name.
type QuotaRequirement ¶
type QuotaRequirement struct {
// UsageName is the quota usage name to check, e.g. "OpenAI.Standard.gpt-4o".
UsageName string
// MinCapacity is the minimum remaining capacity needed. If 0, defaults to 1.
MinCapacity float64
}
QuotaRequirement specifies a single quota check: the usage name to check and the minimum remaining capacity needed.
type Scope ¶
type Scope struct {
// contains filtered or unexported fields
}
Scope is a context based structure to define the Azure scope of a AI component
func (*Scope) ResourceGroup ¶
ResourceGroup returns the resource group from the scope
func (*Scope) SubscriptionId ¶
SubscriptionId returns the subscription ID from the scope
type ScriptPath ¶
type ScriptPath string
ScriptPath is a type to represent the path of a Python script
const ( // PromptFlowClient is the path to the PromptFlow Client Python script PromptFlowClient ScriptPath = "pf_client.py" // MLClient is the path to the ML Client Python script MLClient ScriptPath = "ml_client.py" )