Commit c3820b67 authored by Moritz Lipp's avatar Moritz Lipp

Locate and extract images from pages

parent 00792eae
......@@ -7,10 +7,9 @@
#include <mupdf/pdf.h>
#include "plugin.h"
#include "utils.h"
static void pdf_zathura_image_free(zathura_image_t* image);
static void get_images(zathura_page_t* page, pdf_obj* dict, girara_list_t* list);
static void get_resources(zathura_page_t* page, pdf_obj* resource, girara_list_t* list);
girara_list_t*
pdf_page_images_get(zathura_page_t* page, mupdf_page_t* mupdf_page, zathura_error_t* error)
......@@ -30,16 +29,7 @@ pdf_page_images_get(zathura_page_t* page, mupdf_page_t* mupdf_page, zathura_erro
mupdf_document_t* mupdf_document = zathura_document_get_data(document);
pdf_obj* page_object = pdf_load_object(mupdf_document->ctx, (pdf_document*) mupdf_document->document, zathura_page_get_index(page), 0);
if (page_object == NULL) {
goto error_free;
}
pdf_obj* resource = pdf_dict_gets(mupdf_document->ctx, page_object, "Resources");
if (resource == NULL) {
goto error_free;
}
/* Setup image list */
list = girara_list_new();
if (list == NULL) {
if (error != NULL) {
......@@ -50,7 +40,25 @@ pdf_page_images_get(zathura_page_t* page, mupdf_page_t* mupdf_page, zathura_erro
girara_list_set_free_function(list, (girara_free_function_t) pdf_zathura_image_free);
get_resources(page, resource, list);
/* Extract images */
mupdf_page_extract_text(mupdf_document, mupdf_page);
fz_page_block* block;
for (block = mupdf_page->text->blocks; block < mupdf_page->text->blocks + mupdf_page->text->len; block++) {
if (block->type == FZ_PAGE_BLOCK_IMAGE) {
fz_image_block *image_block = block->u.image;
zathura_image_t* zathura_image = g_malloc(sizeof(zathura_image_t));
zathura_image->position.x1 = image_block->bbox.x0;
zathura_image->position.y1 = image_block->bbox.y0;
zathura_image->position.x2 = image_block->bbox.x1;
zathura_image->position.y2 = image_block->bbox.y1;
zathura_image->data = image_block->image;
girara_list_append(list, zathura_image);
}
}
return list;
......@@ -69,109 +77,82 @@ error_ret:
return NULL;
}
static void
pdf_zathura_image_free(zathura_image_t* image)
cairo_surface_t*
pdf_page_image_get_cairo(zathura_page_t* page, mupdf_page_t* mupdf_page,
zathura_image_t* image, zathura_error_t* error)
{
if (image == NULL) {
return;
if (page == NULL || mupdf_page == NULL || image == NULL || image->data == NULL) {
if (error != NULL) {
*error = ZATHURA_ERROR_INVALID_ARGUMENTS;
}
goto error_ret;
}
g_free(image);
}
fz_image* mupdf_image = (fz_image*) image->data;
static void
get_images(zathura_page_t* page, pdf_obj* dict, girara_list_t* list)
{
if (dict == NULL || list == NULL) {
return;
}
fz_pixmap* pixmap = NULL;
cairo_surface_t* surface = NULL;
if (page == NULL) {
return;
pixmap = fz_new_pixmap_from_image(mupdf_page->ctx, mupdf_image, 0, 0);
if (pixmap == NULL) {
goto error_free;
}
zathura_document_t* document = zathura_page_get_document(page);
if (document == NULL) {
return;
surface = cairo_image_surface_create(CAIRO_FORMAT_RGB24, mupdf_image->w, mupdf_image->h);
if (surface == NULL) {
goto error_free;
}
mupdf_document_t* mupdf_document = zathura_document_get_data(document);
unsigned char* surface_data = cairo_image_surface_get_data(surface);
int rowstride = cairo_image_surface_get_stride(surface);
for (int i = 0; i < pdf_dict_len(mupdf_document->ctx, dict); i++) {
pdf_obj* image_dict = pdf_dict_get_val(mupdf_document->ctx, dict, i);
if (pdf_is_dict(mupdf_document->ctx, image_dict) == 0) {
continue;
}
unsigned char* s = fz_pixmap_samples(mupdf_page->ctx, pixmap);
unsigned int n = fz_pixmap_components(mupdf_page->ctx, pixmap);
pdf_obj* type = pdf_dict_gets(mupdf_document->ctx, image_dict, "Subtype");
if (strcmp(pdf_to_name(mupdf_document->ctx, type), "Image") != 0) {
continue;
}
for (unsigned int y = 0; y < fz_pixmap_height(mupdf_page->ctx, pixmap); y++) {
for (unsigned int x = 0; x < fz_pixmap_width(mupdf_page->ctx, pixmap); x++) {
guchar* p = surface_data + y * rowstride + x * 4;
bool duplicate = false;
GIRARA_LIST_FOREACH(list, zathura_image_t*, iter, image)
if (image->data == image_dict) {
duplicate = true;
break;
// RGB
if (n == 4) {
p[0] = s[2];
p[1] = s[1];
p[2] = s[0];
// Gray-scale or mask
} else {
p[0] = s[0];
p[1] = s[0];
p[2] = s[0];
}
GIRARA_LIST_FOREACH_END(list, zathura_image_t*, iter, image);
if (duplicate == true) {
continue;
s += n;
}
}
pdf_obj* width = pdf_dict_gets(mupdf_document->ctx, image_dict, "Width");
pdf_obj* height = pdf_dict_gets(mupdf_document->ctx, image_dict, "Height");
zathura_image_t* zathura_image = g_malloc(sizeof(zathura_image_t));
fprintf(stderr, "image\n");
fz_drop_pixmap(mupdf_page->ctx, pixmap);
// FIXME: Get correct image coordinates
zathura_image->data = image_dict;
zathura_image->position.x1 = 0;
zathura_image->position.x2 = pdf_to_int(mupdf_document->ctx, width);
zathura_image->position.y1 = 0;
zathura_image->position.y2 = pdf_to_int(mupdf_document->ctx, height);
return surface;
girara_list_append(list, zathura_image);
}
}
error_free:
static void
get_resources(zathura_page_t* page, pdf_obj* resource, girara_list_t* list)
{
if (resource == NULL || list == NULL) {
return;
if (pixmap != NULL) {
fz_drop_pixmap(mupdf_page->ctx, pixmap);
}
if (page == NULL) {
return;
if (surface != NULL) {
cairo_surface_destroy(surface);
}
zathura_document_t* document = zathura_page_get_document(page);
if (document == NULL) {
return;
}
error_ret:
mupdf_document_t* mupdf_document = zathura_document_get_data(document);
return NULL;
}
pdf_obj* x_object = pdf_dict_gets(mupdf_document->ctx, resource, "XObject");
if (x_object == NULL) {
static void
pdf_zathura_image_free(zathura_image_t* image)
{
if (image == NULL) {
return;
}
get_images(page, x_object, list);
for (int i = 0; i < pdf_dict_len(mupdf_document->ctx, x_object); i++) {
pdf_obj* obj = pdf_dict_get_val(mupdf_document->ctx, x_object, i);
pdf_obj* subsrc = pdf_dict_gets(mupdf_document->ctx, obj, "Resources");
if (subsrc != NULL && pdf_objcmp(mupdf_document->ctx, resource, subsrc)) {
get_resources(page, subsrc, list);
}
}
g_free(image);
}
......@@ -17,12 +17,13 @@ register_functions(zathura_plugin_functions_t* functions)
functions->page_links_get = (zathura_plugin_page_links_get_t) pdf_page_links_get;
#if 0
functions->document_get_information = (zathura_plugin_document_get_information_t) pdf_document_get_information;
functions->page_images_get = (zathura_plugin_page_images_get_t) pdf_page_images_get;
#endif
functions->page_images_get = (zathura_plugin_page_images_get_t) pdf_page_images_get;
functions->page_get_text = (zathura_plugin_page_get_text_t) pdf_page_get_text;
functions->page_render = (zathura_plugin_page_render_t) pdf_page_render;
#if HAVE_CAIRO
functions->page_render_cairo = (zathura_plugin_page_render_cairo_t) pdf_page_render_cairo;
functions->page_image_get_cairo = (zathura_plugin_page_image_get_cairo_t) pdf_page_image_get_cairo;
#endif
}
......
......@@ -113,6 +113,20 @@ girara_list_t* pdf_page_links_get(zathura_page_t* page, mupdf_page_t* mupdf_page
*/
girara_list_t* pdf_page_images_get(zathura_page_t* page, mupdf_page_t* mupdf_page, zathura_error_t* error);
#if HAVE_CAIRO
/**
* Gets the content of the image in a cairo surface
*
* @param page Page
* @param image Image identifier
* @param error Set to an error value (see \ref zathura_error_t) if an
* error occured
* @return The cairo image surface or NULL if an error occured
*/
cairo_surface_t* pdf_page_image_get_cairo(zathura_page_t* page, mupdf_page_t*
mupdf_page, zathura_image_t* image, zathura_error_t* error);
#endif
/**
* Get text for selection
* @param page Page
......
......@@ -7,9 +7,6 @@
#include "plugin.h"
#include "utils.h"
void mupdf_page_extract_text(mupdf_document_t* mupdf_document,
mupdf_page_t* mupdf_page);
char*
pdf_page_get_text(zathura_page_t* page, mupdf_page_t* mupdf_page, zathura_rectangle_t rectangle, zathura_error_t* error)
{
......
......@@ -15,6 +15,10 @@ mupdf_page_extract_text(mupdf_document_t* mupdf_document, mupdf_page_t* mupdf_pa
fz_try (mupdf_page->ctx) {
text_device = fz_new_text_device(mupdf_page->ctx, mupdf_page->sheet, mupdf_page->text);
/* Disable FZ_IGNORE_IMAGE to collect image blocks */
fz_disable_device_hints(mupdf_page->ctx, text_device, FZ_IGNORE_IMAGE);
fz_matrix ctm;
fz_scale(&ctm, 1.0, 1.0);
fz_run_page(mupdf_page->ctx, mupdf_page->page, text_device, &ctm, NULL);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment