import sys import traceback from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias import PIL.Image import gi gi.require_version("Gtk", "3.0") from gi.repository import GLib, Gio, Gtk, GObject gi.require_foreign("cairo") import cairo import fitz Coords: TypeAlias = Tuple[float, float] class Selection: def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None): self.bounds = bounds self.columns: list[float] = list(columns or []) class Page: def __init__(self, index: int, raw: fitz.Page): self.index = index self.raw = raw self.selections: list[Selection] = [] class Document: def __init__(self, filename: str): self.filename = filename self.raw = fitz.Document(filename) self.pages = [] for i, p in enumerate(self.raw.pages()): # type: ignore page = Page(index=i, raw=p) self.pages.append(page) class PdfPage(Gtk.DrawingArea): def __init__(self, page, *args, **kwargs): super().__init__(*args, **kwargs) self.page: Page = page pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=50) # type: ignore self.set_size_request(pix.width, pix.height) self.connect("draw", self.on_draw, {}) if self.page.index == 4: self.page.selections.append(Selection( ((0.1, 0.18), (0.9, 0.72)), columns=[0.08, 0.24, 0.34, 0.42, 0.51, 0.59, 0.67, 0.73, 0.91] )) def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer): width = widget.get_allocated_width() height = widget.get_allocated_height() sctx = widget.get_style_context() Gtk.render_background(sctx, cr, 0, 0, width, height) pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=50) # type: ignore img = PIL.Image.frombytes("RGBA" if pix.alpha else "RGB", [pix.width, pix.height], pix.samples) img.putalpha(1) img = PIL.Image.merge("RGBA", (lambda r, g, b, a: (b, g, r, a))(*img.split())) # type: ignore mv: memoryview = memoryview(bytearray(img.tobytes())) ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height) cr.set_source_surface(ims, 0, 0) cr.paint() for sel in self.page.selections: sel_x1 = sel.bounds[0][0] * pix.width sel_y1 = sel.bounds[0][1] * pix.height sel_x2 = sel.bounds[1][0] * pix.width sel_y2 = sel.bounds[1][1] * pix.height # Base settings cr.set_line_cap(cairo.LINE_CAP_BUTT) cr.set_line_width(2) # Columns (draw first - below selection) cr.set_dash([5]) cr.set_source_rgba(1, 0, 0) for col in sel.columns: col_x = sel_x1 + (sel_x2 - sel_x1) * col cr.move_to(col_x, sel_y1) cr.line_to(col_x, sel_y2) cr.stroke() # Selection cr.rectangle(sel_x1, sel_y1, sel_x2 - sel_x1, sel_y2 - sel_y1) # White part of the pattern cr.set_source_rgba(1, 1, 1) cr.set_dash([5], 5) cr.stroke_preserve() # important preserve - reuse rectangle # Black part of the pattern cr.set_source_rgba(0, 0, 0) cr.set_dash([5]) cr.stroke() @Gtk.Template.from_file("MainWindow.glade") class MainWindow(Gtk.ApplicationWindow): __gtype_name__ = "main_window" open_button: Gtk.Button = Gtk.Template.Child() # type: ignore header_bar: Gtk.HeaderBar = Gtk.Template.Child() # type: ignore main_paned: Gtk.Paned = Gtk.Template.Child() # type: ignore pdf_list_box: Gtk.ListBox = Gtk.Template.Child() # type: ignore pdfFileFilter: Gtk.FileFilter = Gtk.Template.Child() # type: ignore def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.app: Application = self.get_application() # type: ignore assert self.app is not None self.app.connect("notify::document", self.on_document_updated) # @Gtk.Template.Callback() # def example_button_released_cb(self, widget: Gtk.Button, **kwargs): # assert self.example_button == widget # print(widget.get_label()) # widget.set_label("woah") @Gtk.Template.Callback() def on_open_button_clicked(self, widget, *args, **kwargs): dialog = Gtk.FileChooserDialog( title="Choose PDF File to open", transient_for=self, # equivalent to parent= action=Gtk.FileChooserAction.OPEN, filter=self.pdfFileFilter, modal=True, ) dialog.add_button("Cancel", Gtk.ResponseType.CANCEL) dialog.add_button("Open", Gtk.ResponseType.ACCEPT) response = dialog.run() # type: ignore if response == Gtk.ResponseType.ACCEPT: filename: str = dialog.get_filename() # type: ignore try: self.app.set_property("document", Document(filename)) except Exception as e: message_dialog = Gtk.MessageDialog( title="An error has occured.", transient_for=self, modal=True, message_type=Gtk.MessageType.ERROR, text=repr(e), secondary_text=traceback.format_exc(), buttons=Gtk.ButtonsType.OK, ) message_dialog.run() # type: ignore message_dialog.destroy() dialog.destroy() # Note: this won't run unless a new document object is put into place # editing an existing one won't trigger it def on_document_updated(self, recvobj, gparamstring): document: Document = self.app.get_property("document") self.header_bar.set_title(document.filename.split("/")[-1]) self.header_bar.set_subtitle(document.filename) for child in self.pdf_list_box.get_children(): if type(child) is Gtk.ListBoxRow: child.destroy() for i in document.pages: self.pdf_list_box.add(PdfPage(i)) self.pdf_list_box.show_all() @Gtk.Template.Callback() def on_open_button_small_clicked(self, widget, **kwargs): TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf" try: self.app.set_property("document", Document(TEST_FILENAME)) except Exception as e: message_dialog = Gtk.MessageDialog( title="An error has occured.", transient_for=self, modal=True, message_type=Gtk.MessageType.ERROR, text=repr(e), secondary_text=traceback.format_exc(), buttons=Gtk.ButtonsType.OK, ) message_dialog.run() # type: ignore message_dialog.destroy() class Application(Gtk.Application): document = GObject.Property(type=GObject.TYPE_PYOBJECT, flags=GObject.ParamFlags.READWRITE) def __init__(self, *args, **kwargs): super().__init__( *args, application_id="zone.lunareclipse.pdf_table_extractor", flags=Gio.ApplicationFlags.FLAGS_NONE, # flags=Gio.ApplicationFlags.HANDLES_COMMAND_LINE, # TODO **kwargs ) self.window = None def do_activate(self): self.window = self.window or MainWindow(application=self) self.window.show_all() # type: ignore if __name__ == "__main__": app = Application() app.run(sys.argv)