Compare commits

..

4 Commits

Author SHA1 Message Date
LunarEclipse e124669185 Added basic selection rendering (incl. testing/demo code) 2024-05-14 20:44:03 +02:00
LunarEclipse ba660af9eb small cleanup 2024-04-28 16:52:13 +02:00
LunarEclipse 8c8a01f1f1 nattyfix 2 2024-04-28 15:16:30 +02:00
LunarEclipse 7c871a6387 deleted experiments dir 2024-04-28 15:16:11 +02:00
8 changed files with 87 additions and 291 deletions

View File

@ -3,6 +3,8 @@
<interface>
<requires lib="gtk+" version="3.24"/>
<template class="main_window" parent="GtkApplicationWindow">
<property name="width-request">800</property>
<property name="height-request">500</property>
<property name="can-focus">False</property>
<property name="title" translatable="yes">PDF Table Extractor</property>
<property name="icon-name">document-page-setup</property>

View File

@ -1,67 +0,0 @@
import sys
import traceback
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import GLib, Gio, Gtk, GObject
gi.require_foreign("cairo")
import cairo
class CustomDrawingArea(Gtk.DrawingArea):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.set_size_request(200, 200)
self.set_app_paintable(True)
self.connect_after("draw", self.on_draw, {})
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
width = widget.get_allocated_width()
height = widget.get_allocated_height()
print(f"w: {width}, h: {height}")
sctx = widget.get_style_context()
Gtk.render_background(sctx, cr, 0, 0, width, height)
cr.set_source_rgba(1.0, 0.0, 0.0, 1.0)
cr.rectangle(0, 0, width, height)
cr.fill()
class MainWindow(Gtk.ApplicationWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.app: Application = self.get_application() # type: ignore
assert self.app is not None
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL)
self.add(box)
self.area = CustomDrawingArea()
frame = Gtk.Frame(label="DrawingArea")
frame.add(self.area)
box.pack_start(frame, expand=True, fill=True, padding=20)
box.add(Gtk.Button(label=":)"))
class Application(Gtk.Application):
def __init__(self, *args, **kwargs):
super().__init__(
*args,
application_id="zone.lunareclipse.draw_test",
flags=Gio.ApplicationFlags.FLAGS_NONE,
**kwargs
)
self.window = None
def do_activate(self):
self.window = self.window or MainWindow(application=self)
self.window.show_all()
if __name__ == "__main__":
app = Application()
app.run(sys.argv)

View File

@ -1,17 +0,0 @@
import argparse
import tabula
if __name__ == "__main__":
#parser = argparse.ArgumentParser()
#parser.add_argument("filename")
#args = parser.parse_args()
table = tabula.io.read_pdf(
"../sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf",
pages=[5],
lattice=False,
multiple_tables=True,
)
print(table)
print("test")

View File

@ -1,54 +0,0 @@
import sys
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
from PySide6.QtWidgets import QApplication, QDialog, QHBoxLayout, QLabel, QMainWindow, QPushButton, QVBoxLayout, QWidget
from PySide6.QtCore import Slot
from PySide6.QtPdf import QPdfDocument
from PySide6.QtPdfWidgets import QPdfView
import fitz
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
Coords: TypeAlias = Tuple[float, float]
class Selection:
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
self.bounds = bounds
self.columns = columns
class Document:
def __init__(self, filename: str):
self.filename = filename
self.document = fitz.Document(filename)
self.selections: Dict[int, List[Selection]] = {}
class SelectablePdfView(QPdfView):
pass
class State():
pass
class Frontend():
def __init__(self, argv: Sequence[str]):
self.app = QApplication(argv)
self.window = QMainWindow()
self.state = State()
self.thumbnails = QWidget()
self.pdfDocument = QPdfDocument()
self.optionsPanel = QWidget()
central_widget = QWidget()
cw_layout = QHBoxLayout(central_widget)
cw_layout.addWidget(self.thumbnails)
cw_layout.addWidget(self.optionsPanel)
self.window.setCentralWidget(central_widget)
def exec(self):
self.window.show()
self.app.exec()
if __name__ == "__main__":
app = Frontend(sys.argv)
app.exec()

View File

@ -1,41 +0,0 @@
import sys
from typing import Sequence
from PySide6.QtWidgets import QApplication, QDialog, QLabel, QPushButton, QVBoxLayout
from PySide6.QtCore import Slot
class Application():
def __init__(self, argv: Sequence[str]):
self.app = QApplication(argv)
self.counter = 0
self.window = QDialog()
self.layout = QVBoxLayout(self.window)
self.label = QLabel("0")
self.button_increment = QPushButton("Increment counter!")
self.button_decrement = QPushButton("Decrement counter!")
self.window.setWindowTitle("PDF Table Extractor")
self.layout.addWidget(self.label)
self.layout.addWidget(self.button_increment)
self.layout.addWidget(self.button_decrement)
self.button_increment.clicked.connect(self.increment)
self.button_decrement.clicked.connect(self.decrement)
print(self.window.layout())
@Slot()
def increment(self):
self.counter += 1
self.label.setText(f"{self.counter}")
@Slot()
def decrement(self):
self.counter -= 1
self.label.setText(f"{self.counter}")
def exec(self):
self.window.show()
self.app.exec()
if __name__ == "__main__":
app = Application(sys.argv)
app.exec()

View File

@ -1,84 +0,0 @@
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
class FileChooserWindow(Gtk.Window):
def __init__(self):
super().__init__(title="FileChooser Example")
box = Gtk.Box(spacing=6)
self.add(box)
button1 = Gtk.Button(label="Choose File")
button1.connect("clicked", self.on_file_clicked)
box.add(button1)
button2 = Gtk.Button(label="Choose Folder")
button2.connect("clicked", self.on_folder_clicked)
box.add(button2)
def on_file_clicked(self, widget):
dialog = Gtk.FileChooserDialog(
title="Please choose a file", parent=self, action=Gtk.FileChooserAction.OPEN
)
dialog.add_buttons(
Gtk.STOCK_CANCEL,
Gtk.ResponseType.CANCEL,
Gtk.STOCK_OPEN,
Gtk.ResponseType.OK,
)
self.add_filters(dialog)
response = dialog.run()
if response == Gtk.ResponseType.OK:
print("Open clicked")
print("File selected: " + dialog.get_filename())
elif response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
dialog.destroy()
def add_filters(self, dialog):
filter_text = Gtk.FileFilter()
filter_text.set_name("Text files")
filter_text.add_mime_type("text/plain")
dialog.add_filter(filter_text)
filter_py = Gtk.FileFilter()
filter_py.set_name("Python files")
filter_py.add_mime_type("text/x-python")
dialog.add_filter(filter_py)
filter_any = Gtk.FileFilter()
filter_any.set_name("Any files")
filter_any.add_pattern("*")
dialog.add_filter(filter_any)
def on_folder_clicked(self, widget):
dialog = Gtk.FileChooserDialog(
title="Please choose a folder",
parent=self,
action=Gtk.FileChooserAction.SELECT_FOLDER,
)
dialog.add_buttons(
Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, "Select", Gtk.ResponseType.OK
)
dialog.set_default_size(800, 400)
response = dialog.run()
if response == Gtk.ResponseType.OK:
print("Select clicked")
print("Folder selected: " + dialog.get_filename())
elif response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
dialog.destroy()
win = FileChooserWindow()
win.connect("destroy", Gtk.main_quit)
win.show_all()
Gtk.main()

View File

@ -2,6 +2,7 @@ import sys
import traceback
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
import PIL.Image
import gi
gi.require_version("Gtk", "3.0")
@ -12,54 +13,94 @@ import cairo
import fitz
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
Coords: TypeAlias = Tuple[float, float]
class Selection:
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
self.bounds = bounds
self.columns = columns
self.columns: list[float] = list(columns or [])
class Page:
def __init__(self, index: int, raw: fitz.Page):
self.index = index
self.raw = raw
self.selections: list[Selection] = []
class Document:
def __init__(self, filename: str):
self.filename = filename
self.document = fitz.Document(filename)
self.selections: Dict[int, List[Selection]] = {}
self.raw = fitz.Document(filename)
self.pages = []
class State():
pass
for i, p in enumerate(self.raw.pages()): # type: ignore
page = Page(index=i, raw=p)
self.pages.append(page)
class PdfPage(Gtk.DrawingArea):
def __init__(self, page, *args, **kwargs):
super().__init__(*args, **kwargs)
self.page: fitz.Page = page
pix = self.page.get_pixmap(dpi=96) # type: ignore
self.page: Page = page
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
self.set_size_request(pix.width, pix.height)
self.set_app_paintable(True) # type: ignore
self.connect("draw", self.on_draw, {})
if self.page.index == 4:
self.page.selections.append(Selection(
((0.1, 0.18), (0.9, 0.72)),
columns=[0.08, 0.24, 0.34, 0.42, 0.51, 0.59, 0.67, 0.73, 0.91]
))
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
# app: Application = widget.get_window().get_application() # type: ignore
width = widget.get_allocated_width()
height = widget.get_allocated_height()
cr.set_source_rgba(255, 255, 255)
cr.paint()
sctx = widget.get_style_context()
Gtk.render_background(sctx, cr, 0, 0, width, height)
pix = self.page.get_pixmap(dpi=96, alpha=True) # type: ignore
mv: memoryview = pix.samples_mv
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
img = PIL.Image.frombytes("RGBA" if pix.alpha else "RGB", [pix.width, pix.height], pix.samples)
img.putalpha(1)
img = PIL.Image.merge("RGBA", (lambda r, g, b, a: (b, g, r, a))(*img.split())) # type: ignore
mv: memoryview = memoryview(bytearray(img.tobytes()))
ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height)
cr.set_source_surface(ims, 0, 0)
cr.paint()
for sel in self.page.selections:
sel_x1 = sel.bounds[0][0]*pix.width
sel_y1 = sel.bounds[0][1]*pix.height
sel_x2 = sel.bounds[1][0]*pix.width
sel_y2 = sel.bounds[1][1]*pix.height
# Base settings
cr.set_line_cap(cairo.LINE_CAP_BUTT)
cr.set_line_width(2)
# Columns (draw first - below selection)
cr.set_dash([5])
cr.set_source_rgba(1, 0, 0)
for col in sel.columns:
col_x = sel_x1 + (sel_x2 - sel_x1) * col
cr.move_to(col_x, sel_y1)
cr.line_to(col_x, sel_y2)
cr.stroke()
# Selection
cr.rectangle(sel_x1, sel_y1, sel_x2 - sel_x1, sel_y2 - sel_y1)
# White part of the pattern
cr.set_source_rgba(1, 1, 1)
cr.set_dash([5], 5)
cr.stroke_preserve() # important preserve - reuse rectangle
# Black part of the pattern
cr.set_source_rgba(0, 0, 0)
cr.set_dash([5])
cr.stroke()
@Gtk.Template.from_file("MainWindow.glade")
class MainWindow(Gtk.ApplicationWindow):
@ -81,8 +122,6 @@ class MainWindow(Gtk.ApplicationWindow):
self.app.connect("notify::document", self.on_document_updated)
# self.pdf_list_box.add(PdfPage())
# @Gtk.Template.Callback()
# def example_button_released_cb(self, widget: Gtk.Button, **kwargs):
# assert self.example_button == widget
@ -124,20 +163,35 @@ class MainWindow(Gtk.ApplicationWindow):
# editing an existing one won't trigger it
def on_document_updated(self, recvobj, gparamstring):
document: Document = self.app.get_property("document")
self.header_bar.set_title(document.filename.split("/")[-1]) # type: ignore
self.header_bar.set_subtitle(document.filename) # type: ignore
self.header_bar.set_title(document.filename.split("/")[-1])
self.header_bar.set_subtitle(document.filename)
for i in document.document.pages(): # type: ignore
row = Gtk.ListBoxRow()
page = PdfPage(i)
row.add(page)
self.pdf_list_box.add(row) # type: ignore
for child in self.pdf_list_box.get_children():
if type(child) is Gtk.ListBoxRow:
child.destroy()
for i in document.pages:
self.pdf_list_box.add(PdfPage(i))
self.pdf_list_box.show_all()
@Gtk.Template.Callback()
def on_open_button_small_clicked(self, widget, **kwargs):
pass
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
try:
self.app.set_property("document", Document(TEST_FILENAME))
except Exception as e:
message_dialog = Gtk.MessageDialog(
title="An error has occured.",
transient_for=self,
modal=True,
message_type=Gtk.MessageType.ERROR,
text=repr(e),
secondary_text=traceback.format_exc(),
buttons=Gtk.ButtonsType.OK,
)
message_dialog.run() # type: ignore
message_dialog.destroy()
class Application(Gtk.Application):

View File

@ -11,11 +11,14 @@ python = ">=3.11,<3.13"
pygobject = "^3.48.2"
tabula-py = "^2.9.0"
PyMuPDF = "^1.24.0"
pillow = "^10.3.0"
#pdfplumber = "^0.11.0"
#PySide6 = "^6.6.3" # qt
[tool.poetry.group.dev.dependencies]
pygobject-stubs = "^2.11.0" # this is actually broken lol
# this is actually broken lol, fix by running following in venv:
# pip uninstall pygobject-stubs && pip install pygobject-stubs --no-cache-dir --config-settings=config=Gtk3,Gdk3,Soup2
pygobject-stubs = "^2.11.0"
[build-system]
requires = ["poetry-core"]