Compare commits

..

No commits in common. "e1246691857e7df95d0b405838b0414c6a8c00c9" and "968f18e107adc898e2a51dca4aff0242aa9a965b" have entirely different histories.

8 changed files with 291 additions and 87 deletions

View File

@ -3,8 +3,6 @@
<interface> <interface>
<requires lib="gtk+" version="3.24"/> <requires lib="gtk+" version="3.24"/>
<template class="main_window" parent="GtkApplicationWindow"> <template class="main_window" parent="GtkApplicationWindow">
<property name="width-request">800</property>
<property name="height-request">500</property>
<property name="can-focus">False</property> <property name="can-focus">False</property>
<property name="title" translatable="yes">PDF Table Extractor</property> <property name="title" translatable="yes">PDF Table Extractor</property>
<property name="icon-name">document-page-setup</property> <property name="icon-name">document-page-setup</property>

67
experiments/draw_test.py Normal file
View File

@ -0,0 +1,67 @@
import sys
import traceback
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import GLib, Gio, Gtk, GObject
gi.require_foreign("cairo")
import cairo
class CustomDrawingArea(Gtk.DrawingArea):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.set_size_request(200, 200)
self.set_app_paintable(True)
self.connect_after("draw", self.on_draw, {})
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
width = widget.get_allocated_width()
height = widget.get_allocated_height()
print(f"w: {width}, h: {height}")
sctx = widget.get_style_context()
Gtk.render_background(sctx, cr, 0, 0, width, height)
cr.set_source_rgba(1.0, 0.0, 0.0, 1.0)
cr.rectangle(0, 0, width, height)
cr.fill()
class MainWindow(Gtk.ApplicationWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.app: Application = self.get_application() # type: ignore
assert self.app is not None
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL)
self.add(box)
self.area = CustomDrawingArea()
frame = Gtk.Frame(label="DrawingArea")
frame.add(self.area)
box.pack_start(frame, expand=True, fill=True, padding=20)
box.add(Gtk.Button(label=":)"))
class Application(Gtk.Application):
def __init__(self, *args, **kwargs):
super().__init__(
*args,
application_id="zone.lunareclipse.draw_test",
flags=Gio.ApplicationFlags.FLAGS_NONE,
**kwargs
)
self.window = None
def do_activate(self):
self.window = self.window or MainWindow(application=self)
self.window.show_all()
if __name__ == "__main__":
app = Application()
app.run(sys.argv)

17
experiments/main.py Normal file
View File

@ -0,0 +1,17 @@
import argparse
import tabula
if __name__ == "__main__":
#parser = argparse.ArgumentParser()
#parser.add_argument("filename")
#args = parser.parse_args()
table = tabula.io.read_pdf(
"../sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf",
pages=[5],
lattice=False,
multiple_tables=True,
)
print(table)
print("test")

View File

@ -0,0 +1,54 @@
import sys
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
from PySide6.QtWidgets import QApplication, QDialog, QHBoxLayout, QLabel, QMainWindow, QPushButton, QVBoxLayout, QWidget
from PySide6.QtCore import Slot
from PySide6.QtPdf import QPdfDocument
from PySide6.QtPdfWidgets import QPdfView
import fitz
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
Coords: TypeAlias = Tuple[float, float]
class Selection:
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
self.bounds = bounds
self.columns = columns
class Document:
def __init__(self, filename: str):
self.filename = filename
self.document = fitz.Document(filename)
self.selections: Dict[int, List[Selection]] = {}
class SelectablePdfView(QPdfView):
pass
class State():
pass
class Frontend():
def __init__(self, argv: Sequence[str]):
self.app = QApplication(argv)
self.window = QMainWindow()
self.state = State()
self.thumbnails = QWidget()
self.pdfDocument = QPdfDocument()
self.optionsPanel = QWidget()
central_widget = QWidget()
cw_layout = QHBoxLayout(central_widget)
cw_layout.addWidget(self.thumbnails)
cw_layout.addWidget(self.optionsPanel)
self.window.setCentralWidget(central_widget)
def exec(self):
self.window.show()
self.app.exec()
if __name__ == "__main__":
app = Frontend(sys.argv)
app.exec()

41
experiments/qt6_test.py Normal file
View File

@ -0,0 +1,41 @@
import sys
from typing import Sequence
from PySide6.QtWidgets import QApplication, QDialog, QLabel, QPushButton, QVBoxLayout
from PySide6.QtCore import Slot
class Application():
def __init__(self, argv: Sequence[str]):
self.app = QApplication(argv)
self.counter = 0
self.window = QDialog()
self.layout = QVBoxLayout(self.window)
self.label = QLabel("0")
self.button_increment = QPushButton("Increment counter!")
self.button_decrement = QPushButton("Decrement counter!")
self.window.setWindowTitle("PDF Table Extractor")
self.layout.addWidget(self.label)
self.layout.addWidget(self.button_increment)
self.layout.addWidget(self.button_decrement)
self.button_increment.clicked.connect(self.increment)
self.button_decrement.clicked.connect(self.decrement)
print(self.window.layout())
@Slot()
def increment(self):
self.counter += 1
self.label.setText(f"{self.counter}")
@Slot()
def decrement(self):
self.counter -= 1
self.label.setText(f"{self.counter}")
def exec(self):
self.window.show()
self.app.exec()
if __name__ == "__main__":
app = Application(sys.argv)
app.exec()

84
experiments/test.py Normal file
View File

@ -0,0 +1,84 @@
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
class FileChooserWindow(Gtk.Window):
def __init__(self):
super().__init__(title="FileChooser Example")
box = Gtk.Box(spacing=6)
self.add(box)
button1 = Gtk.Button(label="Choose File")
button1.connect("clicked", self.on_file_clicked)
box.add(button1)
button2 = Gtk.Button(label="Choose Folder")
button2.connect("clicked", self.on_folder_clicked)
box.add(button2)
def on_file_clicked(self, widget):
dialog = Gtk.FileChooserDialog(
title="Please choose a file", parent=self, action=Gtk.FileChooserAction.OPEN
)
dialog.add_buttons(
Gtk.STOCK_CANCEL,
Gtk.ResponseType.CANCEL,
Gtk.STOCK_OPEN,
Gtk.ResponseType.OK,
)
self.add_filters(dialog)
response = dialog.run()
if response == Gtk.ResponseType.OK:
print("Open clicked")
print("File selected: " + dialog.get_filename())
elif response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
dialog.destroy()
def add_filters(self, dialog):
filter_text = Gtk.FileFilter()
filter_text.set_name("Text files")
filter_text.add_mime_type("text/plain")
dialog.add_filter(filter_text)
filter_py = Gtk.FileFilter()
filter_py.set_name("Python files")
filter_py.add_mime_type("text/x-python")
dialog.add_filter(filter_py)
filter_any = Gtk.FileFilter()
filter_any.set_name("Any files")
filter_any.add_pattern("*")
dialog.add_filter(filter_any)
def on_folder_clicked(self, widget):
dialog = Gtk.FileChooserDialog(
title="Please choose a folder",
parent=self,
action=Gtk.FileChooserAction.SELECT_FOLDER,
)
dialog.add_buttons(
Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, "Select", Gtk.ResponseType.OK
)
dialog.set_default_size(800, 400)
response = dialog.run()
if response == Gtk.ResponseType.OK:
print("Select clicked")
print("Folder selected: " + dialog.get_filename())
elif response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
dialog.destroy()
win = FileChooserWindow()
win.connect("destroy", Gtk.main_quit)
win.show_all()
Gtk.main()

View File

@ -2,7 +2,6 @@ import sys
import traceback import traceback
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
import PIL.Image
import gi import gi
gi.require_version("Gtk", "3.0") gi.require_version("Gtk", "3.0")
@ -13,94 +12,54 @@ import cairo
import fitz import fitz
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
Coords: TypeAlias = Tuple[float, float] Coords: TypeAlias = Tuple[float, float]
class Selection: class Selection:
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None): def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
self.bounds = bounds self.bounds = bounds
self.columns: list[float] = list(columns or []) self.columns = columns
class Page:
def __init__(self, index: int, raw: fitz.Page):
self.index = index
self.raw = raw
self.selections: list[Selection] = []
class Document: class Document:
def __init__(self, filename: str): def __init__(self, filename: str):
self.filename = filename self.filename = filename
self.raw = fitz.Document(filename) self.document = fitz.Document(filename)
self.pages = [] self.selections: Dict[int, List[Selection]] = {}
for i, p in enumerate(self.raw.pages()): # type: ignore
page = Page(index=i, raw=p) class State():
self.pages.append(page) pass
class PdfPage(Gtk.DrawingArea): class PdfPage(Gtk.DrawingArea):
def __init__(self, page, *args, **kwargs): def __init__(self, page, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.page: Page = page self.page: fitz.Page = page
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore pix = self.page.get_pixmap(dpi=96) # type: ignore
self.set_size_request(pix.width, pix.height) self.set_size_request(pix.width, pix.height)
self.set_app_paintable(True) # type: ignore
self.connect("draw", self.on_draw, {}) self.connect("draw", self.on_draw, {})
if self.page.index == 4:
self.page.selections.append(Selection(
((0.1, 0.18), (0.9, 0.72)),
columns=[0.08, 0.24, 0.34, 0.42, 0.51, 0.59, 0.67, 0.73, 0.91]
))
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer): def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
# app: Application = widget.get_window().get_application() # type: ignore
width = widget.get_allocated_width() width = widget.get_allocated_width()
height = widget.get_allocated_height() height = widget.get_allocated_height()
cr.set_source_rgba(255, 255, 255)
cr.paint()
sctx = widget.get_style_context() sctx = widget.get_style_context()
Gtk.render_background(sctx, cr, 0, 0, width, height) Gtk.render_background(sctx, cr, 0, 0, width, height)
pix = self.page.get_pixmap(dpi=96, alpha=True) # type: ignore
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore mv: memoryview = pix.samples_mv
img = PIL.Image.frombytes("RGBA" if pix.alpha else "RGB", [pix.width, pix.height], pix.samples)
img.putalpha(1)
img = PIL.Image.merge("RGBA", (lambda r, g, b, a: (b, g, r, a))(*img.split())) # type: ignore
mv: memoryview = memoryview(bytearray(img.tobytes()))
ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height) ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height)
cr.set_source_surface(ims, 0, 0) cr.set_source_surface(ims, 0, 0)
cr.paint() cr.paint()
for sel in self.page.selections:
sel_x1 = sel.bounds[0][0]*pix.width
sel_y1 = sel.bounds[0][1]*pix.height
sel_x2 = sel.bounds[1][0]*pix.width
sel_y2 = sel.bounds[1][1]*pix.height
# Base settings
cr.set_line_cap(cairo.LINE_CAP_BUTT)
cr.set_line_width(2)
# Columns (draw first - below selection)
cr.set_dash([5])
cr.set_source_rgba(1, 0, 0)
for col in sel.columns:
col_x = sel_x1 + (sel_x2 - sel_x1) * col
cr.move_to(col_x, sel_y1)
cr.line_to(col_x, sel_y2)
cr.stroke()
# Selection
cr.rectangle(sel_x1, sel_y1, sel_x2 - sel_x1, sel_y2 - sel_y1)
# White part of the pattern
cr.set_source_rgba(1, 1, 1)
cr.set_dash([5], 5)
cr.stroke_preserve() # important preserve - reuse rectangle
# Black part of the pattern
cr.set_source_rgba(0, 0, 0)
cr.set_dash([5])
cr.stroke()
@Gtk.Template.from_file("MainWindow.glade") @Gtk.Template.from_file("MainWindow.glade")
class MainWindow(Gtk.ApplicationWindow): class MainWindow(Gtk.ApplicationWindow):
@ -122,6 +81,8 @@ class MainWindow(Gtk.ApplicationWindow):
self.app.connect("notify::document", self.on_document_updated) self.app.connect("notify::document", self.on_document_updated)
# self.pdf_list_box.add(PdfPage())
# @Gtk.Template.Callback() # @Gtk.Template.Callback()
# def example_button_released_cb(self, widget: Gtk.Button, **kwargs): # def example_button_released_cb(self, widget: Gtk.Button, **kwargs):
# assert self.example_button == widget # assert self.example_button == widget
@ -163,35 +124,20 @@ class MainWindow(Gtk.ApplicationWindow):
# editing an existing one won't trigger it # editing an existing one won't trigger it
def on_document_updated(self, recvobj, gparamstring): def on_document_updated(self, recvobj, gparamstring):
document: Document = self.app.get_property("document") document: Document = self.app.get_property("document")
self.header_bar.set_title(document.filename.split("/")[-1]) self.header_bar.set_title(document.filename.split("/")[-1]) # type: ignore
self.header_bar.set_subtitle(document.filename) self.header_bar.set_subtitle(document.filename) # type: ignore
for child in self.pdf_list_box.get_children(): for i in document.document.pages(): # type: ignore
if type(child) is Gtk.ListBoxRow: row = Gtk.ListBoxRow()
child.destroy() page = PdfPage(i)
row.add(page)
for i in document.pages: self.pdf_list_box.add(row) # type: ignore
self.pdf_list_box.add(PdfPage(i))
self.pdf_list_box.show_all() self.pdf_list_box.show_all()
@Gtk.Template.Callback() @Gtk.Template.Callback()
def on_open_button_small_clicked(self, widget, **kwargs): def on_open_button_small_clicked(self, widget, **kwargs):
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf" pass
try:
self.app.set_property("document", Document(TEST_FILENAME))
except Exception as e:
message_dialog = Gtk.MessageDialog(
title="An error has occured.",
transient_for=self,
modal=True,
message_type=Gtk.MessageType.ERROR,
text=repr(e),
secondary_text=traceback.format_exc(),
buttons=Gtk.ButtonsType.OK,
)
message_dialog.run() # type: ignore
message_dialog.destroy()
class Application(Gtk.Application): class Application(Gtk.Application):

View File

@ -11,14 +11,11 @@ python = ">=3.11,<3.13"
pygobject = "^3.48.2" pygobject = "^3.48.2"
tabula-py = "^2.9.0" tabula-py = "^2.9.0"
PyMuPDF = "^1.24.0" PyMuPDF = "^1.24.0"
pillow = "^10.3.0"
#pdfplumber = "^0.11.0" #pdfplumber = "^0.11.0"
#PySide6 = "^6.6.3" # qt #PySide6 = "^6.6.3" # qt
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
# this is actually broken lol, fix by running following in venv: pygobject-stubs = "^2.11.0" # this is actually broken lol
# pip uninstall pygobject-stubs && pip install pygobject-stubs --no-cache-dir --config-settings=config=Gtk3,Gdk3,Soup2
pygobject-stubs = "^2.11.0"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]