DmitriiKhizbullin commited on
Commit
44339bc
1 Parent(s): edee466

Multiprocessing 8

Browse files
Files changed (2) hide show
  1. file_list.txt +0 -3
  2. src/fetch_files.py +28 -18
file_list.txt CHANGED
@@ -48,9 +48,6 @@ demo_data/demo/2/43789_right.jpeg
48
  demo_data/demo/2/7906_right.jpeg
49
  demo_data/demo/2/31160_left.jpeg
50
  demo_data/demo/2/7805_right.jpeg
51
- release_ckpts/2023-12-20_18-39-06_30301601_V100_x4_resnet50_896_setseeds/inference/preprocessor_config.json
52
- release_ckpts/2023-12-20_18-39-06_30301601_V100_x4_resnet50_896_setseeds/inference/config.json
53
- release_ckpts/2023-12-20_18-39-06_30301601_V100_x4_resnet50_896_setseeds/inference/model.safetensors
54
  release_ckpts/2023-12-24_20-02-18_30345221_V100_x4_resnet34/inference/preprocessor_config.json
55
  release_ckpts/2023-12-24_20-02-18_30345221_V100_x4_resnet34/inference/model.safetensors
56
  release_ckpts/2023-12-24_20-02-18_30345221_V100_x4_resnet34/inference/config.json
 
48
  demo_data/demo/2/7906_right.jpeg
49
  demo_data/demo/2/31160_left.jpeg
50
  demo_data/demo/2/7805_right.jpeg
 
 
 
51
  release_ckpts/2023-12-24_20-02-18_30345221_V100_x4_resnet34/inference/preprocessor_config.json
52
  release_ckpts/2023-12-24_20-02-18_30345221_V100_x4_resnet34/inference/model.safetensors
53
  release_ckpts/2023-12-24_20-02-18_30345221_V100_x4_resnet34/inference/config.json
src/fetch_files.py CHANGED
@@ -1,33 +1,43 @@
1
  import os
2
  import requests
 
3
 
4
  REPO_ROOT = os.path.realpath(os.path.join(os.path.split(__file__)[0], ".."))
5
 
6
 
7
- def fetch_files():
8
  s3_url = "https://sdaia-kaust-public.s3.us-east-2.amazonaws.com/diabetic-retinopathy-detection/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  flist_path = os.path.join(REPO_ROOT, "file_list.txt")
 
10
  with open(flist_path, "r") as file:
11
  for line in file:
12
  subpath = line.strip()
13
- url = f"{s3_url}{subpath}"
14
- target_path = os.path.join(REPO_ROOT, subpath)
15
- if os.path.exists(target_path):
16
- print(f"File already in place: {target_path}")
17
- continue
18
- print(f"Downloading {url}...")
19
- req = requests.get(url)
20
- if not req.ok:
21
- print(f"Failed to download {url}")
22
- continue
23
- print(f"Downloaded {url}")
24
- target_dir = os.path.split(target_path)[0]
25
- os.makedirs(target_dir, exist_ok=True)
26
- with open(target_path, "wb") as fdst:
27
- fdst.write(req.content)
28
- print(f"Saved to {target_path}")
29
  print("File fetching done")
30
- return
31
 
32
 
33
  if __name__ == "__main__":
 
1
  import os
2
  import requests
3
+ import multiprocessing
4
 
5
  REPO_ROOT = os.path.realpath(os.path.join(os.path.split(__file__)[0], ".."))
6
 
7
 
8
+ def fetch_one(subpath: str) -> None:
9
  s3_url = "https://sdaia-kaust-public.s3.us-east-2.amazonaws.com/diabetic-retinopathy-detection/"
10
+
11
+ url = f"{s3_url}{subpath}"
12
+ target_path = os.path.join(REPO_ROOT, subpath)
13
+ if os.path.exists(target_path):
14
+ print(f"File already in place: {target_path}")
15
+ return
16
+ print(f"Downloading {url}...")
17
+ req = requests.get(url)
18
+ if not req.ok:
19
+ print(f"Failed to download {url}")
20
+ return
21
+ print(f"Downloaded {url}")
22
+ target_dir = os.path.split(target_path)[0]
23
+ os.makedirs(target_dir, exist_ok=True)
24
+ with open(target_path, "wb") as fdst:
25
+ fdst.write(req.content)
26
+ print(f"Saved to {target_path}")
27
+
28
+
29
+ def fetch_files():
30
  flist_path = os.path.join(REPO_ROOT, "file_list.txt")
31
+ subpaths = []
32
  with open(flist_path, "r") as file:
33
  for line in file:
34
  subpath = line.strip()
35
+ subpaths.append(subpath)
36
+
37
+ with multiprocessing.Pool(8) as pool:
38
+ pool.map(fetch_one, subpaths)
39
+
 
 
 
 
 
 
 
 
 
 
 
40
  print("File fetching done")
 
41
 
42
 
43
  if __name__ == "__main__":