GPU Support and Go

Table of Contents


Introduction

We present here an example on how to deploy using the Go client. This is part of our integration tests.


Example

func TestVMWithGPUDeployment(t *testing.T) {
 tfPluginClient, err := setup()
 assert.NoError(t, err)

 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 defer cancel()

 publicKey, privateKey, err := GenerateSSHKeyPair()
 assert.NoError(t, err)

 twinID := uint64(tfPluginClient.TwinID)
 nodeFilter := types.NodeFilter{
  Status:   &statusUp,
  FreeSRU:  convertGBToBytes(20),
  FreeMRU:  convertGBToBytes(8),
  RentedBy: &twinID,
  HasGPU:   &trueVal,
 }

 nodes, err := deployer.FilterNodes(ctx, tfPluginClient, nodeFilter)
 if err != nil {
  t.Skip("no available nodes found")
 }
 nodeID := uint32(nodes[0].NodeID)

 nodeClient, err := tfPluginClient.NcPool.GetNodeClient(tfPluginClient.SubstrateConn, nodeID)
 assert.NoError(t, err)

 gpus, err := nodeClient.GPUs(ctx)
 assert.NoError(t, err)

 network := workloads.ZNet{
  Name:        "gpuNetwork",
  Description: "network for testing gpu",
  Nodes:       []uint32{nodeID},
  IPRange: gridtypes.NewIPNet(net.IPNet{
   IP:   net.IPv4(10, 20, 0, 0),
   Mask: net.CIDRMask(16, 32),
  }),
  AddWGAccess: false,
 }

 disk := workloads.Disk{
  Name:   "gpuDisk",
  SizeGB: 20,
 }

 vm := workloads.VM{
  Name:       "gpu",
  Flist:      "https://hub.grid.tf/tf-official-vms/ubuntu-22.04.flist",
  CPU:        4,
  Planetary:  true,
  Memory:     1024 * 8,
  GPUs:       ConvertGPUsToStr(gpus),
  Entrypoint: "/init.sh",
  EnvVars: map[string]string{
   "SSH_KEY": publicKey,
  },
  Mounts: []workloads.Mount{
   {DiskName: disk.Name, MountPoint: "/data"},
  },
  NetworkName: network.Name,
 }

 err = tfPluginClient.NetworkDeployer.Deploy(ctx, &network)
 assert.NoError(t, err)

 defer func() {
  err = tfPluginClient.NetworkDeployer.Cancel(ctx, &network)
  assert.NoError(t, err)
 }()

 dl := workloads.NewDeployment("gpu", nodeID, "", nil, network.Name, []workloads.Disk{disk}, nil, []workloads.VM{vm}, nil)
 err = tfPluginClient.DeploymentDeployer.Deploy(ctx, &dl)
 assert.NoError(t, err)

 defer func() {
  err = tfPluginClient.DeploymentDeployer.Cancel(ctx, &dl)
  assert.NoError(t, err)
 }()

 vm, err = tfPluginClient.State.LoadVMFromGrid(nodeID, vm.Name, dl.Name)
 assert.NoError(t, err)
 assert.Equal(t, vm.GPUs, ConvertGPUsToStr(gpus))

 time.Sleep(30 * time.Second)
 output, err := RemoteRun("root", vm.YggIP, "lspci -v", privateKey)
 assert.NoError(t, err)
 assert.Contains(t, string(output), gpus[0].Vendor)
}

More Information

For more information on this, you can check this Client Pull Request on how to support the new calls to list GPUs and to deploy a machine with GPU.