Gajesh Ladhar commited on
Commit
a176fb5
·
1 Parent(s): c71037b

utils added

Browse files
Files changed (2) hide show
  1. src/data.py +1 -1
  2. src/utils.py +77 -0
src/data.py CHANGED
@@ -46,7 +46,7 @@ class DinoDataset(Dataset):
46
  queue_size (int): Max queue length for shared store
47
  """
48
  if imgsz < 320:
49
- raise ValueError("❗️imgsz must be ≥ 320 for stable patch extraction — got {}".format(imgsz))
50
  self.imgsz = imgsz
51
  metadata_url = "https://huggingface.co/datasets/gajeshladhar/core-five/resolve/main/metadata.parquet"
52
  self.df_metadata = gpd.read_parquet(fsspec.open(metadata_url).open())
 
46
  queue_size (int): Max queue length for shared store
47
  """
48
  if imgsz < 320:
49
+ raise ValueError("imgsz must be ≥ 320 for stable patch extraction — got {}".format(imgsz))
50
  self.imgsz = imgsz
51
  metadata_url = "https://huggingface.co/datasets/gajeshladhar/core-five/resolve/main/metadata.parquet"
52
  self.df_metadata = gpd.read_parquet(fsspec.open(metadata_url).open())
src/utils.py CHANGED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import torch
4
+ from torch import nn
5
+ from torch.amp import autocast
6
+ import torch.nn.functional as F
7
+ from torch.utils.data import Dataset, DataLoader
8
+
9
+ import copy
10
+ import queue
11
+ import numpy as np
12
+ import pandas as pd
13
+ import geopandas as gpd
14
+
15
+ import fsspec
16
+ import xarray as xr
17
+ from tqdm.notebook import tqdm
18
+
19
+ from ultralytics import YOLO
20
+ from IPython.display import clear_output
21
+
22
+ from multiprocessing import Manager
23
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
24
+
25
+ import huggingface_hub as hf
26
+ import albumentations as A
27
+
28
+ import h5py
29
+ import requests
30
+ from io import BytesIO
31
+
32
+ import datetime
33
+ from pathlib import Path
34
+ import tempfile
35
+ import shutil
36
+
37
+ # parallel processing of static datasets
38
+ manager = Manager()
39
+ shared_store = manager.list()
40
+ process_pool = ProcessPoolExecutor(max_workers=6)
41
+
42
+ def write_last_updated(path="store_last_updated.txt"):
43
+ with tempfile.NamedTemporaryFile("w", delete=False, dir=".") as tmp:
44
+ tmp.write(f"{datetime.datetime.now().isoformat()}")
45
+ tmp_path = tmp.name
46
+ shutil.move(tmp_path, path)
47
+
48
+
49
+ class AddPoissonNoise(A.ImageOnlyTransform):
50
+ def __init__(self, p=0.5):
51
+ super().__init__(p)
52
+
53
+ def apply(self, image, **params):
54
+ image = image.astype(np.float32) / 255.0 if image.dtype == np.uint8 else image.copy()
55
+ noisy = np.random.poisson(image * 255.0)
56
+ return np.clip(noisy, 0, 255).astype('uint8')
57
+
58
+ class AddSaltPepperNoise(A.ImageOnlyTransform):
59
+ def __init__(self, amount=0.02, salt_vs_pepper=0.5, p=0.5):
60
+ super(AddSaltPepperNoise, self).__init__(p)
61
+ self.amount = amount
62
+ self.salt_vs_pepper = salt_vs_pepper
63
+
64
+ def apply(self, image, **params):
65
+ noisy = image.copy()
66
+ num_salt = np.ceil(self.amount * image.size * self.salt_vs_pepper)
67
+ num_pepper = np.ceil(self.amount * image.size * (1.0 - self.salt_vs_pepper))
68
+
69
+ # Salt noise
70
+ coords = [np.random.randint(0, i - 1, int(num_salt)) for i in image.shape]
71
+ noisy[tuple(coords)] = 1
72
+
73
+ # Pepper noise
74
+ coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in image.shape]
75
+ noisy[tuple(coords)] = 0
76
+
77
+ return noisy