Compare commits

...

4 Commits

Author SHA1 Message Date
LunarEclipse e124669185 Added basic selection rendering (incl. testing/demo code) 2024-05-14 20:44:03 +02:00
LunarEclipse ba660af9eb small cleanup 2024-04-28 16:52:13 +02:00
LunarEclipse 8c8a01f1f1 nattyfix 2 2024-04-28 15:16:30 +02:00
LunarEclipse 7c871a6387 deleted experiments dir 2024-04-28 15:16:11 +02:00
8 changed files with 87 additions and 291 deletions

View File

@ -3,6 +3,8 @@
<interface> <interface>
<requires lib="gtk+" version="3.24"/> <requires lib="gtk+" version="3.24"/>
<template class="main_window" parent="GtkApplicationWindow"> <template class="main_window" parent="GtkApplicationWindow">
<property name="width-request">800</property>
<property name="height-request">500</property>
<property name="can-focus">False</property> <property name="can-focus">False</property>
<property name="title" translatable="yes">PDF Table Extractor</property> <property name="title" translatable="yes">PDF Table Extractor</property>
<property name="icon-name">document-page-setup</property> <property name="icon-name">document-page-setup</property>

View File

@ -1,67 +0,0 @@
import sys
import traceback
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import GLib, Gio, Gtk, GObject
gi.require_foreign("cairo")
import cairo
class CustomDrawingArea(Gtk.DrawingArea):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.set_size_request(200, 200)
self.set_app_paintable(True)
self.connect_after("draw", self.on_draw, {})
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
width = widget.get_allocated_width()
height = widget.get_allocated_height()
print(f"w: {width}, h: {height}")
sctx = widget.get_style_context()
Gtk.render_background(sctx, cr, 0, 0, width, height)
cr.set_source_rgba(1.0, 0.0, 0.0, 1.0)
cr.rectangle(0, 0, width, height)
cr.fill()
class MainWindow(Gtk.ApplicationWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.app: Application = self.get_application() # type: ignore
assert self.app is not None
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL)
self.add(box)
self.area = CustomDrawingArea()
frame = Gtk.Frame(label="DrawingArea")
frame.add(self.area)
box.pack_start(frame, expand=True, fill=True, padding=20)
box.add(Gtk.Button(label=":)"))
class Application(Gtk.Application):
def __init__(self, *args, **kwargs):
super().__init__(
*args,
application_id="zone.lunareclipse.draw_test",
flags=Gio.ApplicationFlags.FLAGS_NONE,
**kwargs
)
self.window = None
def do_activate(self):
self.window = self.window or MainWindow(application=self)
self.window.show_all()
if __name__ == "__main__":
app = Application()
app.run(sys.argv)

View File

@ -1,17 +0,0 @@
import argparse
import tabula
if __name__ == "__main__":
#parser = argparse.ArgumentParser()
#parser.add_argument("filename")
#args = parser.parse_args()
table = tabula.io.read_pdf(
"../sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf",
pages=[5],
lattice=False,
multiple_tables=True,
)
print(table)
print("test")

View File

@ -1,54 +0,0 @@
import sys
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
from PySide6.QtWidgets import QApplication, QDialog, QHBoxLayout, QLabel, QMainWindow, QPushButton, QVBoxLayout, QWidget
from PySide6.QtCore import Slot
from PySide6.QtPdf import QPdfDocument
from PySide6.QtPdfWidgets import QPdfView
import fitz
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
Coords: TypeAlias = Tuple[float, float]
class Selection:
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
self.bounds = bounds
self.columns = columns
class Document:
def __init__(self, filename: str):
self.filename = filename
self.document = fitz.Document(filename)
self.selections: Dict[int, List[Selection]] = {}
class SelectablePdfView(QPdfView):
pass
class State():
pass
class Frontend():
def __init__(self, argv: Sequence[str]):
self.app = QApplication(argv)
self.window = QMainWindow()
self.state = State()
self.thumbnails = QWidget()
self.pdfDocument = QPdfDocument()
self.optionsPanel = QWidget()
central_widget = QWidget()
cw_layout = QHBoxLayout(central_widget)
cw_layout.addWidget(self.thumbnails)
cw_layout.addWidget(self.optionsPanel)
self.window.setCentralWidget(central_widget)
def exec(self):
self.window.show()
self.app.exec()
if __name__ == "__main__":
app = Frontend(sys.argv)
app.exec()

View File

@ -1,41 +0,0 @@
import sys
from typing import Sequence
from PySide6.QtWidgets import QApplication, QDialog, QLabel, QPushButton, QVBoxLayout
from PySide6.QtCore import Slot
class Application():
def __init__(self, argv: Sequence[str]):
self.app = QApplication(argv)
self.counter = 0
self.window = QDialog()
self.layout = QVBoxLayout(self.window)
self.label = QLabel("0")
self.button_increment = QPushButton("Increment counter!")
self.button_decrement = QPushButton("Decrement counter!")
self.window.setWindowTitle("PDF Table Extractor")
self.layout.addWidget(self.label)
self.layout.addWidget(self.button_increment)
self.layout.addWidget(self.button_decrement)
self.button_increment.clicked.connect(self.increment)
self.button_decrement.clicked.connect(self.decrement)
print(self.window.layout())
@Slot()
def increment(self):
self.counter += 1
self.label.setText(f"{self.counter}")
@Slot()
def decrement(self):
self.counter -= 1
self.label.setText(f"{self.counter}")
def exec(self):
self.window.show()
self.app.exec()
if __name__ == "__main__":
app = Application(sys.argv)
app.exec()

View File

@ -1,84 +0,0 @@
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
class FileChooserWindow(Gtk.Window):
def __init__(self):
super().__init__(title="FileChooser Example")
box = Gtk.Box(spacing=6)
self.add(box)
button1 = Gtk.Button(label="Choose File")
button1.connect("clicked", self.on_file_clicked)
box.add(button1)
button2 = Gtk.Button(label="Choose Folder")
button2.connect("clicked", self.on_folder_clicked)
box.add(button2)
def on_file_clicked(self, widget):
dialog = Gtk.FileChooserDialog(
title="Please choose a file", parent=self, action=Gtk.FileChooserAction.OPEN
)
dialog.add_buttons(
Gtk.STOCK_CANCEL,
Gtk.ResponseType.CANCEL,
Gtk.STOCK_OPEN,
Gtk.ResponseType.OK,
)
self.add_filters(dialog)
response = dialog.run()
if response == Gtk.ResponseType.OK:
print("Open clicked")
print("File selected: " + dialog.get_filename())
elif response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
dialog.destroy()
def add_filters(self, dialog):
filter_text = Gtk.FileFilter()
filter_text.set_name("Text files")
filter_text.add_mime_type("text/plain")
dialog.add_filter(filter_text)
filter_py = Gtk.FileFilter()
filter_py.set_name("Python files")
filter_py.add_mime_type("text/x-python")
dialog.add_filter(filter_py)
filter_any = Gtk.FileFilter()
filter_any.set_name("Any files")
filter_any.add_pattern("*")
dialog.add_filter(filter_any)
def on_folder_clicked(self, widget):
dialog = Gtk.FileChooserDialog(
title="Please choose a folder",
parent=self,
action=Gtk.FileChooserAction.SELECT_FOLDER,
)
dialog.add_buttons(
Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, "Select", Gtk.ResponseType.OK
)
dialog.set_default_size(800, 400)
response = dialog.run()
if response == Gtk.ResponseType.OK:
print("Select clicked")
print("Folder selected: " + dialog.get_filename())
elif response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
dialog.destroy()
win = FileChooserWindow()
win.connect("destroy", Gtk.main_quit)
win.show_all()
Gtk.main()

View File

@ -2,6 +2,7 @@ import sys
import traceback import traceback
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
import PIL.Image
import gi import gi
gi.require_version("Gtk", "3.0") gi.require_version("Gtk", "3.0")
@ -12,54 +13,94 @@ import cairo
import fitz import fitz
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
Coords: TypeAlias = Tuple[float, float] Coords: TypeAlias = Tuple[float, float]
class Selection: class Selection:
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None): def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
self.bounds = bounds self.bounds = bounds
self.columns = columns self.columns: list[float] = list(columns or [])
class Page:
def __init__(self, index: int, raw: fitz.Page):
self.index = index
self.raw = raw
self.selections: list[Selection] = []
class Document: class Document:
def __init__(self, filename: str): def __init__(self, filename: str):
self.filename = filename self.filename = filename
self.document = fitz.Document(filename) self.raw = fitz.Document(filename)
self.selections: Dict[int, List[Selection]] = {} self.pages = []
for i, p in enumerate(self.raw.pages()): # type: ignore
class State(): page = Page(index=i, raw=p)
pass self.pages.append(page)
class PdfPage(Gtk.DrawingArea): class PdfPage(Gtk.DrawingArea):
def __init__(self, page, *args, **kwargs): def __init__(self, page, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.page: fitz.Page = page self.page: Page = page
pix = self.page.get_pixmap(dpi=96) # type: ignore pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
self.set_size_request(pix.width, pix.height) self.set_size_request(pix.width, pix.height)
self.set_app_paintable(True) # type: ignore
self.connect("draw", self.on_draw, {}) self.connect("draw", self.on_draw, {})
if self.page.index == 4:
self.page.selections.append(Selection(
((0.1, 0.18), (0.9, 0.72)),
columns=[0.08, 0.24, 0.34, 0.42, 0.51, 0.59, 0.67, 0.73, 0.91]
))
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer): def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
# app: Application = widget.get_window().get_application() # type: ignore
width = widget.get_allocated_width() width = widget.get_allocated_width()
height = widget.get_allocated_height() height = widget.get_allocated_height()
cr.set_source_rgba(255, 255, 255)
cr.paint()
sctx = widget.get_style_context() sctx = widget.get_style_context()
Gtk.render_background(sctx, cr, 0, 0, width, height) Gtk.render_background(sctx, cr, 0, 0, width, height)
pix = self.page.get_pixmap(dpi=96, alpha=True) # type: ignore
mv: memoryview = pix.samples_mv pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
img = PIL.Image.frombytes("RGBA" if pix.alpha else "RGB", [pix.width, pix.height], pix.samples)
img.putalpha(1)
img = PIL.Image.merge("RGBA", (lambda r, g, b, a: (b, g, r, a))(*img.split())) # type: ignore
mv: memoryview = memoryview(bytearray(img.tobytes()))
ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height) ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height)
cr.set_source_surface(ims, 0, 0) cr.set_source_surface(ims, 0, 0)
cr.paint() cr.paint()
for sel in self.page.selections:
sel_x1 = sel.bounds[0][0]*pix.width
sel_y1 = sel.bounds[0][1]*pix.height
sel_x2 = sel.bounds[1][0]*pix.width
sel_y2 = sel.bounds[1][1]*pix.height
# Base settings
cr.set_line_cap(cairo.LINE_CAP_BUTT)
cr.set_line_width(2)
# Columns (draw first - below selection)
cr.set_dash([5])
cr.set_source_rgba(1, 0, 0)
for col in sel.columns:
col_x = sel_x1 + (sel_x2 - sel_x1) * col
cr.move_to(col_x, sel_y1)
cr.line_to(col_x, sel_y2)
cr.stroke()
# Selection
cr.rectangle(sel_x1, sel_y1, sel_x2 - sel_x1, sel_y2 - sel_y1)
# White part of the pattern
cr.set_source_rgba(1, 1, 1)
cr.set_dash([5], 5)
cr.stroke_preserve() # important preserve - reuse rectangle
# Black part of the pattern
cr.set_source_rgba(0, 0, 0)
cr.set_dash([5])
cr.stroke()
@Gtk.Template.from_file("MainWindow.glade") @Gtk.Template.from_file("MainWindow.glade")
class MainWindow(Gtk.ApplicationWindow): class MainWindow(Gtk.ApplicationWindow):
@ -81,8 +122,6 @@ class MainWindow(Gtk.ApplicationWindow):
self.app.connect("notify::document", self.on_document_updated) self.app.connect("notify::document", self.on_document_updated)
# self.pdf_list_box.add(PdfPage())
# @Gtk.Template.Callback() # @Gtk.Template.Callback()
# def example_button_released_cb(self, widget: Gtk.Button, **kwargs): # def example_button_released_cb(self, widget: Gtk.Button, **kwargs):
# assert self.example_button == widget # assert self.example_button == widget
@ -124,20 +163,35 @@ class MainWindow(Gtk.ApplicationWindow):
# editing an existing one won't trigger it # editing an existing one won't trigger it
def on_document_updated(self, recvobj, gparamstring): def on_document_updated(self, recvobj, gparamstring):
document: Document = self.app.get_property("document") document: Document = self.app.get_property("document")
self.header_bar.set_title(document.filename.split("/")[-1]) # type: ignore self.header_bar.set_title(document.filename.split("/")[-1])
self.header_bar.set_subtitle(document.filename) # type: ignore self.header_bar.set_subtitle(document.filename)
for i in document.document.pages(): # type: ignore for child in self.pdf_list_box.get_children():
row = Gtk.ListBoxRow() if type(child) is Gtk.ListBoxRow:
page = PdfPage(i) child.destroy()
row.add(page)
self.pdf_list_box.add(row) # type: ignore for i in document.pages:
self.pdf_list_box.add(PdfPage(i))
self.pdf_list_box.show_all() self.pdf_list_box.show_all()
@Gtk.Template.Callback() @Gtk.Template.Callback()
def on_open_button_small_clicked(self, widget, **kwargs): def on_open_button_small_clicked(self, widget, **kwargs):
pass TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
try:
self.app.set_property("document", Document(TEST_FILENAME))
except Exception as e:
message_dialog = Gtk.MessageDialog(
title="An error has occured.",
transient_for=self,
modal=True,
message_type=Gtk.MessageType.ERROR,
text=repr(e),
secondary_text=traceback.format_exc(),
buttons=Gtk.ButtonsType.OK,
)
message_dialog.run() # type: ignore
message_dialog.destroy()
class Application(Gtk.Application): class Application(Gtk.Application):

View File

@ -11,11 +11,14 @@ python = ">=3.11,<3.13"
pygobject = "^3.48.2" pygobject = "^3.48.2"
tabula-py = "^2.9.0" tabula-py = "^2.9.0"
PyMuPDF = "^1.24.0" PyMuPDF = "^1.24.0"
pillow = "^10.3.0"
#pdfplumber = "^0.11.0" #pdfplumber = "^0.11.0"
#PySide6 = "^6.6.3" # qt #PySide6 = "^6.6.3" # qt
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
pygobject-stubs = "^2.11.0" # this is actually broken lol # this is actually broken lol, fix by running following in venv:
# pip uninstall pygobject-stubs && pip install pygobject-stubs --no-cache-dir --config-settings=config=Gtk3,Gdk3,Soup2
pygobject-stubs = "^2.11.0"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]