xiaohei66 commited on
Commit
fdd645c
·
1 Parent(s): 5e50c66

Fix PaddleOCR-VL image size inference for ambiguous RGB shapes (#96)

Browse files

- Fix PaddleOCR-VL image size inference for ambiguous RGB shapes (c6f1fab0351b3170df29e6e98978933482c14bb4)

Files changed (1) hide show
  1. image_processing_paddleocr_vl.py +4 -3
image_processing_paddleocr_vl.py CHANGED
@@ -338,6 +338,10 @@ class PaddleOCRVLImageProcessor(BaseImageProcessor):
338
  """
339
  images = make_list_of_images(images)
340
 
 
 
 
 
341
  if do_convert_rgb:
342
  images = [convert_to_rgb(image) for image in images]
343
 
@@ -349,9 +353,6 @@ class PaddleOCRVLImageProcessor(BaseImageProcessor):
349
  "It looks like you are trying to rescale already rescaled images. If the input"
350
  " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
351
  )
352
- if input_data_format is None:
353
- # We assume that all images have the same channel dimension format.
354
- input_data_format = infer_channel_dimension_format(images[0])
355
 
356
  height, width = get_image_size(images[0], channel_dim=input_data_format)
357
  resized_height, resized_width = height, width
 
338
  """
339
  images = make_list_of_images(images)
340
 
341
+ if input_data_format is None:
342
+ # We assume that all images have the same channel dimension format.
343
+ input_data_format = ChannelDimension.LAST if isinstance(images[0], Image.Image) else infer_channel_dimension_format(images[0])
344
+
345
  if do_convert_rgb:
346
  images = [convert_to_rgb(image) for image in images]
347
 
 
353
  "It looks like you are trying to rescale already rescaled images. If the input"
354
  " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
355
  )
 
 
 
356
 
357
  height, width = get_image_size(images[0], channel_dim=input_data_format)
358
  resized_height, resized_width = height, width