gajeshladhar
/

core-jepa

@@ -46,7 +46,7 @@ class DinoDataset(Dataset):
             queue_size (int): Max queue length for shared store
         """
         if imgsz < 320:
-            raise ValueError("❗️imgsz must be ≥ 320 for stable patch extraction — got {}".format(imgsz))
         self.imgsz = imgsz
         metadata_url = "https://huggingface.co/datasets/gajeshladhar/core-five/resolve/main/metadata.parquet"
         self.df_metadata = gpd.read_parquet(fsspec.open(metadata_url).open())

             queue_size (int): Max queue length for shared store
         """
         if imgsz < 320:
+            raise ValueError("imgsz must be ≥ 320 for stable patch extraction — got {}".format(imgsz))
         self.imgsz = imgsz
         metadata_url = "https://huggingface.co/datasets/gajeshladhar/core-five/resolve/main/metadata.parquet"
         self.df_metadata = gpd.read_parquet(fsspec.open(metadata_url).open())

src/utils.py CHANGED Viewed

	@@ -0,0 +1,77 @@

+import io
+import os
+import torch
+from torch import nn
+from torch.amp import autocast
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+import copy
+import queue
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+import fsspec
+import xarray as xr
+from tqdm.notebook import tqdm
+from ultralytics import YOLO
+from IPython.display import clear_output
+from multiprocessing import Manager
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+import huggingface_hub as hf
+import albumentations as A
+import h5py
+import requests
+from io import BytesIO
+import datetime
+from pathlib import Path
+import tempfile
+import shutil
+# parallel processing of static datasets
+manager = Manager()
+shared_store = manager.list()
+process_pool = ProcessPoolExecutor(max_workers=6)
+def write_last_updated(path="store_last_updated.txt"):
+    with tempfile.NamedTemporaryFile("w", delete=False, dir=".") as tmp:
+        tmp.write(f"{datetime.datetime.now().isoformat()}")
+        tmp_path = tmp.name
+    shutil.move(tmp_path, path)
+class AddPoissonNoise(A.ImageOnlyTransform):
+    def __init__(self, p=0.5):
+        super().__init__(p)
+    def apply(self, image, **params):
+        image = image.astype(np.float32) / 255.0 if image.dtype == np.uint8 else image.copy()
+        noisy = np.random.poisson(image * 255.0)
+        return np.clip(noisy, 0, 255).astype('uint8')
+class AddSaltPepperNoise(A.ImageOnlyTransform):
+    def __init__(self, amount=0.02, salt_vs_pepper=0.5, p=0.5):
+        super(AddSaltPepperNoise, self).__init__(p)
+        self.amount = amount
+        self.salt_vs_pepper = salt_vs_pepper
+    def apply(self, image, **params):
+        noisy = image.copy()
+        num_salt = np.ceil(self.amount * image.size * self.salt_vs_pepper)
+        num_pepper = np.ceil(self.amount * image.size * (1.0 - self.salt_vs_pepper))
+        # Salt noise
+        coords = [np.random.randint(0, i - 1, int(num_salt)) for i in image.shape]
+        noisy[tuple(coords)] = 1
+        # Pepper noise
+        coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in image.shape]
+        noisy[tuple(coords)] = 0
+        return noisy