Fix PaddleOCR-VL image size inference for ambiguous RGB shapes (#96)
Browse files- Fix PaddleOCR-VL image size inference for ambiguous RGB shapes (c6f1fab0351b3170df29e6e98978933482c14bb4)
image_processing_paddleocr_vl.py
CHANGED
|
@@ -338,6 +338,10 @@ class PaddleOCRVLImageProcessor(BaseImageProcessor):
|
|
| 338 |
"""
|
| 339 |
images = make_list_of_images(images)
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
if do_convert_rgb:
|
| 342 |
images = [convert_to_rgb(image) for image in images]
|
| 343 |
|
|
@@ -349,9 +353,6 @@ class PaddleOCRVLImageProcessor(BaseImageProcessor):
|
|
| 349 |
"It looks like you are trying to rescale already rescaled images. If the input"
|
| 350 |
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
|
| 351 |
)
|
| 352 |
-
if input_data_format is None:
|
| 353 |
-
# We assume that all images have the same channel dimension format.
|
| 354 |
-
input_data_format = infer_channel_dimension_format(images[0])
|
| 355 |
|
| 356 |
height, width = get_image_size(images[0], channel_dim=input_data_format)
|
| 357 |
resized_height, resized_width = height, width
|
|
|
|
| 338 |
"""
|
| 339 |
images = make_list_of_images(images)
|
| 340 |
|
| 341 |
+
if input_data_format is None:
|
| 342 |
+
# We assume that all images have the same channel dimension format.
|
| 343 |
+
input_data_format = ChannelDimension.LAST if isinstance(images[0], Image.Image) else infer_channel_dimension_format(images[0])
|
| 344 |
+
|
| 345 |
if do_convert_rgb:
|
| 346 |
images = [convert_to_rgb(image) for image in images]
|
| 347 |
|
|
|
|
| 353 |
"It looks like you are trying to rescale already rescaled images. If the input"
|
| 354 |
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
|
| 355 |
)
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
height, width = get_image_size(images[0], channel_dim=input_data_format)
|
| 358 |
resized_height, resized_width = height, width
|