Compare commits
4 Commits
968f18e107
...
e124669185
Author | SHA1 | Date |
---|---|---|
LunarEclipse | e124669185 | |
LunarEclipse | ba660af9eb | |
LunarEclipse | 8c8a01f1f1 | |
LunarEclipse | 7c871a6387 |
|
@ -3,6 +3,8 @@
|
|||
<interface>
|
||||
<requires lib="gtk+" version="3.24"/>
|
||||
<template class="main_window" parent="GtkApplicationWindow">
|
||||
<property name="width-request">800</property>
|
||||
<property name="height-request">500</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="title" translatable="yes">PDF Table Extractor</property>
|
||||
<property name="icon-name">document-page-setup</property>
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
import sys
|
||||
import traceback
|
||||
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
|
||||
|
||||
import gi
|
||||
gi.require_version("Gtk", "3.0")
|
||||
from gi.repository import GLib, Gio, Gtk, GObject
|
||||
gi.require_foreign("cairo")
|
||||
import cairo
|
||||
|
||||
|
||||
class CustomDrawingArea(Gtk.DrawingArea):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self.set_size_request(200, 200)
|
||||
self.set_app_paintable(True)
|
||||
self.connect_after("draw", self.on_draw, {})
|
||||
|
||||
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
|
||||
width = widget.get_allocated_width()
|
||||
height = widget.get_allocated_height()
|
||||
print(f"w: {width}, h: {height}")
|
||||
|
||||
sctx = widget.get_style_context()
|
||||
Gtk.render_background(sctx, cr, 0, 0, width, height)
|
||||
|
||||
cr.set_source_rgba(1.0, 0.0, 0.0, 1.0)
|
||||
cr.rectangle(0, 0, width, height)
|
||||
cr.fill()
|
||||
|
||||
|
||||
class MainWindow(Gtk.ApplicationWindow):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self.app: Application = self.get_application() # type: ignore
|
||||
assert self.app is not None
|
||||
|
||||
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL)
|
||||
self.add(box)
|
||||
|
||||
self.area = CustomDrawingArea()
|
||||
frame = Gtk.Frame(label="DrawingArea")
|
||||
frame.add(self.area)
|
||||
box.pack_start(frame, expand=True, fill=True, padding=20)
|
||||
box.add(Gtk.Button(label=":)"))
|
||||
|
||||
|
||||
class Application(Gtk.Application):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(
|
||||
*args,
|
||||
application_id="zone.lunareclipse.draw_test",
|
||||
flags=Gio.ApplicationFlags.FLAGS_NONE,
|
||||
**kwargs
|
||||
)
|
||||
self.window = None
|
||||
|
||||
def do_activate(self):
|
||||
self.window = self.window or MainWindow(application=self)
|
||||
self.window.show_all()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = Application()
|
||||
app.run(sys.argv)
|
|
@ -1,17 +0,0 @@
|
|||
import argparse
|
||||
import tabula
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#parser = argparse.ArgumentParser()
|
||||
#parser.add_argument("filename")
|
||||
#args = parser.parse_args()
|
||||
|
||||
table = tabula.io.read_pdf(
|
||||
"../sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf",
|
||||
pages=[5],
|
||||
lattice=False,
|
||||
multiple_tables=True,
|
||||
)
|
||||
print(table)
|
||||
print("test")
|
|
@ -1,54 +0,0 @@
|
|||
import sys
|
||||
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
|
||||
|
||||
from PySide6.QtWidgets import QApplication, QDialog, QHBoxLayout, QLabel, QMainWindow, QPushButton, QVBoxLayout, QWidget
|
||||
from PySide6.QtCore import Slot
|
||||
from PySide6.QtPdf import QPdfDocument
|
||||
from PySide6.QtPdfWidgets import QPdfView
|
||||
import fitz
|
||||
|
||||
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
|
||||
|
||||
Coords: TypeAlias = Tuple[float, float]
|
||||
|
||||
class Selection:
|
||||
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
|
||||
self.bounds = bounds
|
||||
self.columns = columns
|
||||
|
||||
class Document:
|
||||
def __init__(self, filename: str):
|
||||
self.filename = filename
|
||||
self.document = fitz.Document(filename)
|
||||
self.selections: Dict[int, List[Selection]] = {}
|
||||
|
||||
class SelectablePdfView(QPdfView):
|
||||
pass
|
||||
|
||||
class State():
|
||||
pass
|
||||
|
||||
class Frontend():
|
||||
def __init__(self, argv: Sequence[str]):
|
||||
self.app = QApplication(argv)
|
||||
self.window = QMainWindow()
|
||||
self.state = State()
|
||||
|
||||
self.thumbnails = QWidget()
|
||||
self.pdfDocument = QPdfDocument()
|
||||
self.optionsPanel = QWidget()
|
||||
|
||||
central_widget = QWidget()
|
||||
cw_layout = QHBoxLayout(central_widget)
|
||||
cw_layout.addWidget(self.thumbnails)
|
||||
cw_layout.addWidget(self.optionsPanel)
|
||||
self.window.setCentralWidget(central_widget)
|
||||
|
||||
|
||||
def exec(self):
|
||||
self.window.show()
|
||||
self.app.exec()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = Frontend(sys.argv)
|
||||
app.exec()
|
|
@ -1,41 +0,0 @@
|
|||
import sys
|
||||
from typing import Sequence
|
||||
|
||||
from PySide6.QtWidgets import QApplication, QDialog, QLabel, QPushButton, QVBoxLayout
|
||||
from PySide6.QtCore import Slot
|
||||
|
||||
class Application():
|
||||
def __init__(self, argv: Sequence[str]):
|
||||
self.app = QApplication(argv)
|
||||
self.counter = 0
|
||||
self.window = QDialog()
|
||||
self.layout = QVBoxLayout(self.window)
|
||||
self.label = QLabel("0")
|
||||
self.button_increment = QPushButton("Increment counter!")
|
||||
self.button_decrement = QPushButton("Decrement counter!")
|
||||
|
||||
self.window.setWindowTitle("PDF Table Extractor")
|
||||
self.layout.addWidget(self.label)
|
||||
self.layout.addWidget(self.button_increment)
|
||||
self.layout.addWidget(self.button_decrement)
|
||||
self.button_increment.clicked.connect(self.increment)
|
||||
self.button_decrement.clicked.connect(self.decrement)
|
||||
print(self.window.layout())
|
||||
|
||||
@Slot()
|
||||
def increment(self):
|
||||
self.counter += 1
|
||||
self.label.setText(f"{self.counter}")
|
||||
|
||||
@Slot()
|
||||
def decrement(self):
|
||||
self.counter -= 1
|
||||
self.label.setText(f"{self.counter}")
|
||||
|
||||
def exec(self):
|
||||
self.window.show()
|
||||
self.app.exec()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = Application(sys.argv)
|
||||
app.exec()
|
|
@ -1,84 +0,0 @@
|
|||
import gi
|
||||
|
||||
gi.require_version("Gtk", "3.0")
|
||||
from gi.repository import Gtk
|
||||
|
||||
|
||||
class FileChooserWindow(Gtk.Window):
|
||||
def __init__(self):
|
||||
super().__init__(title="FileChooser Example")
|
||||
|
||||
box = Gtk.Box(spacing=6)
|
||||
self.add(box)
|
||||
|
||||
button1 = Gtk.Button(label="Choose File")
|
||||
button1.connect("clicked", self.on_file_clicked)
|
||||
box.add(button1)
|
||||
|
||||
button2 = Gtk.Button(label="Choose Folder")
|
||||
button2.connect("clicked", self.on_folder_clicked)
|
||||
box.add(button2)
|
||||
|
||||
def on_file_clicked(self, widget):
|
||||
dialog = Gtk.FileChooserDialog(
|
||||
title="Please choose a file", parent=self, action=Gtk.FileChooserAction.OPEN
|
||||
)
|
||||
dialog.add_buttons(
|
||||
Gtk.STOCK_CANCEL,
|
||||
Gtk.ResponseType.CANCEL,
|
||||
Gtk.STOCK_OPEN,
|
||||
Gtk.ResponseType.OK,
|
||||
)
|
||||
|
||||
self.add_filters(dialog)
|
||||
|
||||
response = dialog.run()
|
||||
if response == Gtk.ResponseType.OK:
|
||||
print("Open clicked")
|
||||
print("File selected: " + dialog.get_filename())
|
||||
elif response == Gtk.ResponseType.CANCEL:
|
||||
print("Cancel clicked")
|
||||
|
||||
dialog.destroy()
|
||||
|
||||
def add_filters(self, dialog):
|
||||
filter_text = Gtk.FileFilter()
|
||||
filter_text.set_name("Text files")
|
||||
filter_text.add_mime_type("text/plain")
|
||||
dialog.add_filter(filter_text)
|
||||
|
||||
filter_py = Gtk.FileFilter()
|
||||
filter_py.set_name("Python files")
|
||||
filter_py.add_mime_type("text/x-python")
|
||||
dialog.add_filter(filter_py)
|
||||
|
||||
filter_any = Gtk.FileFilter()
|
||||
filter_any.set_name("Any files")
|
||||
filter_any.add_pattern("*")
|
||||
dialog.add_filter(filter_any)
|
||||
|
||||
def on_folder_clicked(self, widget):
|
||||
dialog = Gtk.FileChooserDialog(
|
||||
title="Please choose a folder",
|
||||
parent=self,
|
||||
action=Gtk.FileChooserAction.SELECT_FOLDER,
|
||||
)
|
||||
dialog.add_buttons(
|
||||
Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, "Select", Gtk.ResponseType.OK
|
||||
)
|
||||
dialog.set_default_size(800, 400)
|
||||
|
||||
response = dialog.run()
|
||||
if response == Gtk.ResponseType.OK:
|
||||
print("Select clicked")
|
||||
print("Folder selected: " + dialog.get_filename())
|
||||
elif response == Gtk.ResponseType.CANCEL:
|
||||
print("Cancel clicked")
|
||||
|
||||
dialog.destroy()
|
||||
|
||||
|
||||
win = FileChooserWindow()
|
||||
win.connect("destroy", Gtk.main_quit)
|
||||
win.show_all()
|
||||
Gtk.main()
|
|
@ -2,6 +2,7 @@ import sys
|
|||
import traceback
|
||||
from typing import Dict, List, Optional, Sequence, Tuple, TypeAlias
|
||||
|
||||
import PIL.Image
|
||||
import gi
|
||||
|
||||
gi.require_version("Gtk", "3.0")
|
||||
|
@ -12,54 +13,94 @@ import cairo
|
|||
|
||||
import fitz
|
||||
|
||||
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
|
||||
|
||||
Coords: TypeAlias = Tuple[float, float]
|
||||
|
||||
|
||||
class Selection:
|
||||
def __init__(self, bounds: Tuple[Coords, Coords], columns: Optional[Sequence[float]] = None):
|
||||
self.bounds = bounds
|
||||
self.columns = columns
|
||||
self.columns: list[float] = list(columns or [])
|
||||
|
||||
class Page:
|
||||
def __init__(self, index: int, raw: fitz.Page):
|
||||
self.index = index
|
||||
self.raw = raw
|
||||
self.selections: list[Selection] = []
|
||||
|
||||
class Document:
|
||||
def __init__(self, filename: str):
|
||||
self.filename = filename
|
||||
self.document = fitz.Document(filename)
|
||||
self.selections: Dict[int, List[Selection]] = {}
|
||||
self.raw = fitz.Document(filename)
|
||||
self.pages = []
|
||||
|
||||
|
||||
class State():
|
||||
pass
|
||||
for i, p in enumerate(self.raw.pages()): # type: ignore
|
||||
page = Page(index=i, raw=p)
|
||||
self.pages.append(page)
|
||||
|
||||
|
||||
class PdfPage(Gtk.DrawingArea):
|
||||
def __init__(self, page, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self.page: fitz.Page = page
|
||||
pix = self.page.get_pixmap(dpi=96) # type: ignore
|
||||
self.page: Page = page
|
||||
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
|
||||
self.set_size_request(pix.width, pix.height)
|
||||
self.set_app_paintable(True) # type: ignore
|
||||
self.connect("draw", self.on_draw, {})
|
||||
|
||||
if self.page.index == 4:
|
||||
self.page.selections.append(Selection(
|
||||
((0.1, 0.18), (0.9, 0.72)),
|
||||
columns=[0.08, 0.24, 0.34, 0.42, 0.51, 0.59, 0.67, 0.73, 0.91]
|
||||
))
|
||||
|
||||
def on_draw(self, widget: Gtk.DrawingArea, cr: cairo.Context, data: GObject.GPointer):
|
||||
# app: Application = widget.get_window().get_application() # type: ignore
|
||||
width = widget.get_allocated_width()
|
||||
height = widget.get_allocated_height()
|
||||
|
||||
cr.set_source_rgba(255, 255, 255)
|
||||
cr.paint()
|
||||
|
||||
sctx = widget.get_style_context()
|
||||
Gtk.render_background(sctx, cr, 0, 0, width, height)
|
||||
pix = self.page.get_pixmap(dpi=96, alpha=True) # type: ignore
|
||||
mv: memoryview = pix.samples_mv
|
||||
|
||||
pix: fitz.Pixmap = self.page.raw.get_pixmap(dpi=96) # type: ignore
|
||||
img = PIL.Image.frombytes("RGBA" if pix.alpha else "RGB", [pix.width, pix.height], pix.samples)
|
||||
img.putalpha(1)
|
||||
img = PIL.Image.merge("RGBA", (lambda r, g, b, a: (b, g, r, a))(*img.split())) # type: ignore
|
||||
mv: memoryview = memoryview(bytearray(img.tobytes()))
|
||||
ims = cairo.ImageSurface.create_for_data(mv, cairo.Format.RGB24, pix.width, pix.height)
|
||||
cr.set_source_surface(ims, 0, 0)
|
||||
cr.paint()
|
||||
|
||||
for sel in self.page.selections:
|
||||
sel_x1 = sel.bounds[0][0]*pix.width
|
||||
sel_y1 = sel.bounds[0][1]*pix.height
|
||||
sel_x2 = sel.bounds[1][0]*pix.width
|
||||
sel_y2 = sel.bounds[1][1]*pix.height
|
||||
|
||||
# Base settings
|
||||
cr.set_line_cap(cairo.LINE_CAP_BUTT)
|
||||
cr.set_line_width(2)
|
||||
|
||||
# Columns (draw first - below selection)
|
||||
cr.set_dash([5])
|
||||
cr.set_source_rgba(1, 0, 0)
|
||||
for col in sel.columns:
|
||||
col_x = sel_x1 + (sel_x2 - sel_x1) * col
|
||||
cr.move_to(col_x, sel_y1)
|
||||
cr.line_to(col_x, sel_y2)
|
||||
cr.stroke()
|
||||
|
||||
# Selection
|
||||
cr.rectangle(sel_x1, sel_y1, sel_x2 - sel_x1, sel_y2 - sel_y1)
|
||||
|
||||
# White part of the pattern
|
||||
cr.set_source_rgba(1, 1, 1)
|
||||
cr.set_dash([5], 5)
|
||||
cr.stroke_preserve() # important preserve - reuse rectangle
|
||||
|
||||
# Black part of the pattern
|
||||
cr.set_source_rgba(0, 0, 0)
|
||||
cr.set_dash([5])
|
||||
cr.stroke()
|
||||
|
||||
|
||||
@Gtk.Template.from_file("MainWindow.glade")
|
||||
class MainWindow(Gtk.ApplicationWindow):
|
||||
|
@ -81,8 +122,6 @@ class MainWindow(Gtk.ApplicationWindow):
|
|||
|
||||
self.app.connect("notify::document", self.on_document_updated)
|
||||
|
||||
# self.pdf_list_box.add(PdfPage())
|
||||
|
||||
# @Gtk.Template.Callback()
|
||||
# def example_button_released_cb(self, widget: Gtk.Button, **kwargs):
|
||||
# assert self.example_button == widget
|
||||
|
@ -124,20 +163,35 @@ class MainWindow(Gtk.ApplicationWindow):
|
|||
# editing an existing one won't trigger it
|
||||
def on_document_updated(self, recvobj, gparamstring):
|
||||
document: Document = self.app.get_property("document")
|
||||
self.header_bar.set_title(document.filename.split("/")[-1]) # type: ignore
|
||||
self.header_bar.set_subtitle(document.filename) # type: ignore
|
||||
self.header_bar.set_title(document.filename.split("/")[-1])
|
||||
self.header_bar.set_subtitle(document.filename)
|
||||
|
||||
for i in document.document.pages(): # type: ignore
|
||||
row = Gtk.ListBoxRow()
|
||||
page = PdfPage(i)
|
||||
row.add(page)
|
||||
self.pdf_list_box.add(row) # type: ignore
|
||||
for child in self.pdf_list_box.get_children():
|
||||
if type(child) is Gtk.ListBoxRow:
|
||||
child.destroy()
|
||||
|
||||
for i in document.pages:
|
||||
self.pdf_list_box.add(PdfPage(i))
|
||||
|
||||
self.pdf_list_box.show_all()
|
||||
|
||||
@Gtk.Template.Callback()
|
||||
def on_open_button_small_clicked(self, widget, **kwargs):
|
||||
pass
|
||||
TEST_FILENAME = "/home/luna/Documents/Resources/Praca Licencjacka/sources/2018_Torres-Benitez_Metabolomic analysis Parmotrema.pdf"
|
||||
try:
|
||||
self.app.set_property("document", Document(TEST_FILENAME))
|
||||
except Exception as e:
|
||||
message_dialog = Gtk.MessageDialog(
|
||||
title="An error has occured.",
|
||||
transient_for=self,
|
||||
modal=True,
|
||||
message_type=Gtk.MessageType.ERROR,
|
||||
text=repr(e),
|
||||
secondary_text=traceback.format_exc(),
|
||||
buttons=Gtk.ButtonsType.OK,
|
||||
)
|
||||
message_dialog.run() # type: ignore
|
||||
message_dialog.destroy()
|
||||
|
||||
|
||||
class Application(Gtk.Application):
|
||||
|
|
|
@ -11,11 +11,14 @@ python = ">=3.11,<3.13"
|
|||
pygobject = "^3.48.2"
|
||||
tabula-py = "^2.9.0"
|
||||
PyMuPDF = "^1.24.0"
|
||||
pillow = "^10.3.0"
|
||||
#pdfplumber = "^0.11.0"
|
||||
#PySide6 = "^6.6.3" # qt
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pygobject-stubs = "^2.11.0" # this is actually broken lol
|
||||
# this is actually broken lol, fix by running following in venv:
|
||||
# pip uninstall pygobject-stubs && pip install pygobject-stubs --no-cache-dir --config-settings=config=Gtk3,Gdk3,Soup2
|
||||
pygobject-stubs = "^2.11.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
Loading…
Reference in New Issue