Spaces:
Build error
Build error
| from PyPDF2 import PdfReader | |
| class Paper(object): | |
| def __init__(self, pdf_obj: PdfReader) -> None: | |
| self._pdf_obj = pdf_obj | |
| self._paper_meta = self._pdf_obj.metadata | |
| def iter_pages(self, iter_text_len: int = 3000): | |
| page_idx = 0 | |
| for page in self._pdf_obj.pages: | |
| txt = page.extract_text() | |
| for i in range((len(txt) // iter_text_len) + 1): | |
| yield page_idx, i, txt[i * iter_text_len:(i + 1) * iter_text_len] | |
| page_idx += 1 | |
| if __name__ == '__main__': | |
| reader = PdfReader('../alexnet.pdf') | |
| paper = Paper(reader) | |