2024-04-27 21:23:40 +00:00
|
|
|
import sys
|
|
|
|
import traceback
|
|
|
|
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
|
|
|
|
|
2024-04-28 13:16:30 +00:00
|
|
|
import PIL.Image
|
2024-04-27 21:23:40 +00:00
|
|
|
import gi
|
2024-04-27 21:29:33 +00:00
|
|
|
|
2024-04-27 21:23:40 +00:00
|
|
|
gi.require_version("Gtk", "3.0")
|
|
|
|
from gi.repository import GLib, Gio, Gtk, GObject
|
2024-04-27 21:29:33 +00:00
|
|
|
|
2024-04-27 21:23:40 +00:00
|
|
|
gi.require_foreign("cairo")
|
|
|
|
import cairo
|
|
|
|
|
|
|
|
import fitz
|
|
|
|
|
|
|
|
Coords: TypeAlias = Tuple[float, float]
|
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
|
2024-04-27 21:23:40 +00:00
|
|
|
class Selection:
|
|
|
|
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
|
|
|
|
self.bounds = bounds
|
2024-05-14 18:44:03 +00:00
|
|
|
self.columns: list[float] = list(columns or [])
|
2024-04-27 21:23:40 +00:00
|
|
|
|
2024-05-14 18:44:03 +00:00
|
|
|
class Page:
|
|
|
|
def __init__(self, index: int, raw: fitz.Page):
|
|
|
|
self.index = index
|
|
|
|
self.raw = raw
|
|
|
|
self.selections: list[Selection] = []
|
2024-04-27 21:29:33 +00:00
|
|
|
|
2024-04-27 21:23:40 +00:00
|
|
|
class Document:
|
|
|
|
def __init__(self, filename: str):
|
|
|
|
self.filename = filename
|
2024-05-14 18:44:03 +00:00
|
|
|
self.raw = fitz.Document(filename)
|
|
|
|
self.pages = []
|
|
|
|
|
|
|
|
for i, p in enumerate(self.raw.pages()): # type: ignore
|
|
|
|
page = Page(index=i, raw=p)
|
|
|
|
self.pages.append(page)
|
2024-04-27 21:23:40 +00:00
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
|
2024-04-27 21:23:40 +00:00
|
|
|
class PdfPage(Gtk.DrawingArea):
|
|
|
|
def __init__(self, page, *args, **kwargs):
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
2024-05-14 18:44:03 +00:00
|
|
|
self.page: Page = page
|
|
|
|
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
self.set_size_request(pix.width, pix.height)
|
|
|
|
self.connect("draw", self.on_draw, {})
|
|
|
|
|
2024-05-14 18:44:03 +00:00
|
|
|
if self.page.index == 4:
|
|
|
|
self.page.selections.append(Selection(
|
|
|
|
((0.1, 0.18), (0.9, 0.72)),
|
|
|
|
columns=[0.08, 0.24, 0.34, 0.42, 0.51, 0.59, 0.67, 0.73, 0.91]
|
|
|
|
))
|
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
|
|
|
|
width = widget.get_allocated_width()
|
|
|
|
height = widget.get_allocated_height()
|
2024-04-28 13:06:23 +00:00
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
sctx = widget.get_style_context()
|
2024-04-27 21:23:40 +00:00
|
|
|
Gtk.render_background(sctx, cr, 0, 0, width, height)
|
2024-04-28 14:52:13 +00:00
|
|
|
|
2024-05-14 18:44:03 +00:00
|
|
|
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
|
2024-04-28 13:16:30 +00:00
|
|
|
img = PIL.Image.frombytes("RGBA" if pix.alpha else "RGB", [pix.width, pix.height], pix.samples)
|
|
|
|
img.putalpha(1)
|
2024-05-14 18:44:03 +00:00
|
|
|
img = PIL.Image.merge("RGBA", (lambda r, g, b, a: (b, g, r, a))(*img.split())) # type: ignore
|
2024-04-28 13:16:30 +00:00
|
|
|
mv: memoryview = memoryview(bytearray(img.tobytes()))
|
2024-04-27 21:29:33 +00:00
|
|
|
ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height)
|
2024-04-27 21:23:40 +00:00
|
|
|
cr.set_source_surface(ims, 0, 0)
|
|
|
|
cr.paint()
|
|
|
|
|
2024-05-14 18:44:03 +00:00
|
|
|
for sel in self.page.selections:
|
|
|
|
sel_x1 = sel.bounds[0][0]*pix.width
|
|
|
|
sel_y1 = sel.bounds[0][1]*pix.height
|
|
|
|
sel_x2 = sel.bounds[1][0]*pix.width
|
|
|
|
sel_y2 = sel.bounds[1][1]*pix.height
|
|
|
|
|
|
|
|
# Base settings
|
|
|
|
cr.set_line_cap(cairo.LINE_CAP_BUTT)
|
|
|
|
cr.set_line_width(2)
|
|
|
|
|
|
|
|
# Columns (draw first - below selection)
|
|
|
|
cr.set_dash([5])
|
|
|
|
cr.set_source_rgba(1, 0, 0)
|
|
|
|
for col in sel.columns:
|
|
|
|
col_x = sel_x1 + (sel_x2 - sel_x1) * col
|
|
|
|
cr.move_to(col_x, sel_y1)
|
|
|
|
cr.line_to(col_x, sel_y2)
|
|
|
|
cr.stroke()
|
|
|
|
|
|
|
|
# Selection
|
|
|
|
cr.rectangle(sel_x1, sel_y1, sel_x2 - sel_x1, sel_y2 - sel_y1)
|
|
|
|
|
|
|
|
# White part of the pattern
|
|
|
|
cr.set_source_rgba(1, 1, 1)
|
|
|
|
cr.set_dash([5], 5)
|
|
|
|
cr.stroke_preserve() # important preserve - reuse rectangle
|
|
|
|
|
|
|
|
# Black part of the pattern
|
|
|
|
cr.set_source_rgba(0, 0, 0)
|
|
|
|
cr.set_dash([5])
|
|
|
|
cr.stroke()
|
|
|
|
|
2024-04-27 21:23:40 +00:00
|
|
|
|
|
|
|
@Gtk.Template.from_file("MainWindow.glade")
|
|
|
|
class MainWindow(Gtk.ApplicationWindow):
|
|
|
|
__gtype_name__ = "main_window"
|
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
open_button: Gtk.Button = Gtk.Template.Child() # type: ignore
|
|
|
|
header_bar: Gtk.HeaderBar = Gtk.Template.Child() # type: ignore
|
|
|
|
main_paned: Gtk.Paned = Gtk.Template.Child() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
pdf_list_box: Gtk.ListBox = Gtk.Template.Child() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
pdfFileFilter: Gtk.FileFilter = Gtk.Template.Child() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
self.app: Application = self.get_application() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
assert self.app is not None
|
|
|
|
|
|
|
|
self.app.connect("notify::document", self.on_document_updated)
|
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
# @Gtk.Template.Callback()
|
|
|
|
# def example_button_released_cb(self, widget: Gtk.Button, **kwargs):
|
2024-04-27 21:23:40 +00:00
|
|
|
# assert self.example_button == widget
|
|
|
|
# print(widget.get_label())
|
|
|
|
# widget.set_label("woah")
|
|
|
|
|
|
|
|
@Gtk.Template.Callback()
|
|
|
|
def on_open_button_clicked(self, widget, *args, **kwargs):
|
|
|
|
dialog = Gtk.FileChooserDialog(
|
|
|
|
title="Choose PDF File to open",
|
2024-04-27 21:29:33 +00:00
|
|
|
transient_for=self, # equivalent to parent=
|
2024-04-27 21:23:40 +00:00
|
|
|
action=Gtk.FileChooserAction.OPEN,
|
|
|
|
filter=self.pdfFileFilter,
|
|
|
|
modal=True,
|
|
|
|
)
|
|
|
|
dialog.add_button("Cancel", Gtk.ResponseType.CANCEL)
|
|
|
|
dialog.add_button("Open", Gtk.ResponseType.ACCEPT)
|
|
|
|
|
2024-04-27 21:29:33 +00:00
|
|
|
response = dialog.run() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
if response == Gtk.ResponseType.ACCEPT:
|
2024-04-27 21:29:33 +00:00
|
|
|
filename: str = dialog.get_filename() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
try:
|
|
|
|
self.app.set_property("document", Document(filename))
|
|
|
|
except Exception as e:
|
|
|
|
message_dialog = Gtk.MessageDialog(
|
|
|
|
title="An error has occured.",
|
|
|
|
transient_for=self,
|
|
|
|
modal=True,
|
|
|
|
message_type=Gtk.MessageType.ERROR,
|
|
|
|
text=repr(e),
|
|
|
|
secondary_text=traceback.format_exc(),
|
|
|
|
buttons=Gtk.ButtonsType.OK,
|
|
|
|
)
|
2024-04-27 21:29:33 +00:00
|
|
|
message_dialog.run() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
message_dialog.destroy()
|
|
|
|
dialog.destroy()
|
|
|
|
|
|
|
|
# Note: this won't run unless a new document object is put into place
|
|
|
|
# editing an existing one won't trigger it
|
|
|
|
def on_document_updated(self, recvobj, gparamstring):
|
|
|
|
document: Document = self.app.get_property("document")
|
2024-05-14 18:44:03 +00:00
|
|
|
self.header_bar.set_title(document.filename.split("/")[-1])
|
|
|
|
self.header_bar.set_subtitle(document.filename)
|
|
|
|
|
|
|
|
for child in self.pdf_list_box.get_children():
|
|
|
|
if type(child) is Gtk.ListBoxRow:
|
|
|
|
child.destroy()
|
2024-04-27 21:23:40 +00:00
|
|
|
|
2024-05-14 18:44:03 +00:00
|
|
|
for i in document.pages:
|
|
|
|
self.pdf_list_box.add(PdfPage(i))
|
2024-04-27 21:29:33 +00:00
|
|
|
|
|
|
|
self.pdf_list_box.show_all()
|
2024-04-27 21:23:40 +00:00
|
|
|
|
|
|
|
@Gtk.Template.Callback()
|
|
|
|
def on_open_button_small_clicked(self, widget, **kwargs):
|
2024-05-14 18:44:03 +00:00
|
|
|
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
|
|
|
|
try:
|
|
|
|
self.app.set_property("document", Document(TEST_FILENAME))
|
|
|
|
except Exception as e:
|
|
|
|
message_dialog = Gtk.MessageDialog(
|
|
|
|
title="An error has occured.",
|
|
|
|
transient_for=self,
|
|
|
|
modal=True,
|
|
|
|
message_type=Gtk.MessageType.ERROR,
|
|
|
|
text=repr(e),
|
|
|
|
secondary_text=traceback.format_exc(),
|
|
|
|
buttons=Gtk.ButtonsType.OK,
|
|
|
|
)
|
|
|
|
message_dialog.run() # type: ignore
|
|
|
|
message_dialog.destroy()
|
2024-04-27 21:23:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Application(Gtk.Application):
|
|
|
|
document = GObject.Property(type=GObject.TYPE_PYOBJECT, flags=GObject.ParamFlags.READWRITE)
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__(
|
|
|
|
*args,
|
|
|
|
application_id="zone.lunareclipse.pdf_table_extractor",
|
|
|
|
flags=Gio.ApplicationFlags.FLAGS_NONE,
|
2024-04-27 21:29:33 +00:00
|
|
|
# flags=Gio.ApplicationFlags.HANDLES_COMMAND_LINE, # TODO
|
2024-04-27 21:23:40 +00:00
|
|
|
**kwargs
|
|
|
|
)
|
|
|
|
self.window = None
|
|
|
|
|
|
|
|
def do_activate(self):
|
|
|
|
self.window = self.window or MainWindow(application=self)
|
2024-04-27 21:29:33 +00:00
|
|
|
self.window.show_all() # type: ignore
|
2024-04-27 21:23:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
app = Application()
|
|
|
|
app.run(sys.argv)
|