ljy266987 commited on
Commit
43cd5f2
1 Parent(s): d2f567c
Files changed (2) hide show
  1. app.py +15 -0
  2. spaces/zero/torch.py +2 -2
app.py CHANGED
@@ -4,6 +4,21 @@ import os
4
  # 获取全部环境变量
5
  env_vars = os.environ
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # 遍历并打印环境变量
8
  for key, value in env_vars.items():
9
  print(f"{key}: {value}")
 
4
  # 获取全部环境变量
5
  env_vars = os.environ
6
 
7
+ os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = 0
8
+ import torch
9
+
10
+ if torch.cuda.is_available():
11
+ print("CUDA is available. Listing available GPUs:")
12
+ # 获取并打印GPU数量
13
+ num_gpus = torch.cuda.device_count()
14
+ for i in range(num_gpus):
15
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
16
+ # 其他相关信息,例如内存
17
+ print(f" Memory Allocated: {torch.cuda.memory_allocated(i) / 1024 ** 2:.0f} MB")
18
+ print(f" Memory Reserved: {torch.cuda.memory_reserved(i) / 1024 ** 2:.0f} MB")
19
+ else:
20
+ print("CUDA is not available.")
21
+
22
  # 遍历并打印环境变量
23
  for key, value in env_vars.items():
24
  print(f"{key}: {value}")
spaces/zero/torch.py CHANGED
@@ -241,14 +241,14 @@ if (torch := maybe_import_torch()):
241
  bitsandbytes.unpatch()
242
 
243
  def _move(nvidia_uuid: str):
244
- os.environ['CUDA_VISIBLE_DEVICES'] = "5,8,10,11,14,15"
245
  import subprocess
246
  subprocess.run(
247
  ['nvidia-smi'], text=True
248
  )
249
 
250
  print("_move(nvidia_uuid: str)")
251
- torch.Tensor([5]).cuda() # CUDA init
252
  for op in to_ops.items():
253
  tensor, parsed_args = op
254
  _, dtype, _, memory_format = parsed_args
 
241
  bitsandbytes.unpatch()
242
 
243
  def _move(nvidia_uuid: str):
244
+ os.environ['CUDA_VISIBLE_DEVICES'] = "0,1,2,3,4,5"
245
  import subprocess
246
  subprocess.run(
247
  ['nvidia-smi'], text=True
248
  )
249
 
250
  print("_move(nvidia_uuid: str)")
251
+ torch.Tensor([0]).cuda() # CUDA init
252
  for op in to_ops.items():
253
  tensor, parsed_args = op
254
  _, dtype, _, memory_format = parsed_args