PaddlePaddle
/

PaddleOCR-VL

@@ -338,6 +338,10 @@ class PaddleOCRVLImageProcessor(BaseImageProcessor):
         """
         images = make_list_of_images(images)
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
@@ -349,9 +353,6 @@ class PaddleOCRVLImageProcessor(BaseImageProcessor):
                 "It looks like you are trying to rescale already rescaled images. If the input"
                 " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
             )
-        if input_data_format is None:
-            # We assume that all images have the same channel dimension format.
-            input_data_format = infer_channel_dimension_format(images[0])
         height, width = get_image_size(images[0], channel_dim=input_data_format)
         resized_height, resized_width = height, width

         """
         images = make_list_of_images(images)
+        if input_data_format is None:
+            # We assume that all images have the same channel dimension format.
+            input_data_format = ChannelDimension.LAST if isinstance(images[0], Image.Image) else infer_channel_dimension_format(images[0])
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
                 "It looks like you are trying to rescale already rescaled images. If the input"
                 " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
             )
         height, width = get_image_size(images[0], channel_dim=input_data_format)
         resized_height, resized_width = height, width