Source code for lecture2notes.end_to_end.figure_detection
import logging
import math
import os
import cv2
import numpy as np
from imutils import auto_canny
from PIL import Image, ImageStat
from pythonRLSA import rlsa
from skimage.measure.entropy import shannon_entropy
from tqdm import tqdm
from .helpers import frame_number_filename_mapping
from .text_detection import get_text_bounding_boxes, load_east
logger = logging.getLogger(__name__)
OUTPUT_PATH_MODIFIER = "_figure_"
[docs]def area_of_overlapping_rectangles(a, b):
"""
Find the overlapping area of two rectangles ``a`` and ``b``.
Inspired by https://stackoverflow.com/a/27162334.
"""
dx = min(a[0], b[0]) - max(a[2], b[2]) # xmax, xmax, xmin, xmin
dy = min(a[1], b[1]) - max(a[3], b[3]) # ymax, ymax, ymin, ymin
if (dx >= 0) and (dy >= 0):
return dx * dy
return 0
[docs]def detect_color_image(image, thumb_size=40, MSE_cutoff=22, adjust_color_bias=True):
"""Detect if an image contains color, is black and white, or is grayscale.
Based on `this StackOverflow answer <https://stackoverflow.com/a/23035464>`__.
Args:
image (np.array): Input image
thumb_size (int, optional): Resize image to this size to speed up calculation.
Defaults to 40.
MSE_cutoff (int, optional): A larger value requires more color for an image to be
labeled as "color". Defaults to 22.
adjust_color_bias (bool, optional): Mean color bias adjustment, which improves the
prediction. Defaults to True.
Returns:
str: Either "grayscale", "color", "b&w" (black and white), or "unknown".
"""
pil_img = Image.fromarray(image)
bands = pil_img.getbands()
if bands == ("R", "G", "B") or bands == ("R", "G", "B", "A"):
thumb = pil_img.resize((thumb_size, thumb_size))
SSE, bias = 0, [0, 0, 0]
if adjust_color_bias:
bias = ImageStat.Stat(thumb).mean[:3]
bias = [b - sum(bias) / 3 for b in bias]
for pixel in thumb.getdata():
mu = sum(pixel) / 3
SSE += sum(
(pixel[i] - mu - bias[i]) * (pixel[i] - mu - bias[i]) for i in [0, 1, 2]
)
MSE = float(SSE) / (thumb_size * thumb_size)
if MSE <= MSE_cutoff:
return "grayscale"
return "color"
if len(bands) == 1:
return "b&w"
return "unknown"
[docs]def convert_coords_to_corners(box):
x, y, w, h = box
x_values = (x + w, x)
y_values = (y + h, y)
rectangle = (max(x_values), max(y_values), min(x_values), min(y_values))
return rectangle
[docs]def detect_figures(
image_path,
output_path=None,
east="frozen_east_text_detection.pb",
text_area_overlap_threshold=0.32, # 0.15
figure_max_area_percentage=0.60,
text_max_area_percentage=0.30,
large_box_detection=True,
do_color_check=True,
do_text_check=True,
entropy_check=2.5,
do_remove_subfigures=True,
do_rlsa=False,
):
"""Detect figures located in a slide.
Args:
image_path (str): Path to the image to process.
output_path (str, optional): Path to save the figures. Defaults to
``[filename]_figure_[index].[ext]``.
east (str or cv2.dnn_Net, optional): Path to the EAST model file or the pre-trained
EAST model loaded with :meth:`~lecture2notes.end_to_end.text_detection.load_east`. ``do_text_check`` must
be true for this option to take effect. Defaults to "frozen_east_text_detection.pb".
text_area_overlap_threshold (float, optional): The percentage of the figure that
can contain text. If the area of the text in the figure is greater than this
value, the figure is discarded. ``do_text_check`` must be true for this option
to take effect. Defaults to 0.10.
figure_max_area_percentage (float, optional): The maximum percentage of the area of the
original image that a figure can take up. If the figure uses more area than
``original_image_area*figure_max_area_percentage`` then the figure will be discarded.
Defaults to 0.70.
text_max_area_percentage (float, optional): The maximum percentage of the area of the
original image that a block of text (as identified by the EAST model) can take up.
If the text block uses more area than ``original_image_area*text_max_area_percentage``
then that text block will be ignored. ``do_text_check`` must be true for this option
to take effect. Defaults to 0.30.
large_box_detection (bool, optional): Detect edges and classify large rectangles as
figures. This will ignore `do_color_check` and
`do_text_check`. This is useful for finding tables for example. Defaults to True.
do_color_check (bool, optional): Check that potential figures contain color. This
helps to remove large quantities of black and white text form the potential
figure list. Defaults to True.
do_text_check (bool, optional): Check that only `text_area_overlap_threshold` of
potential figures contains text. This is useful to remove blocks of text that
are mistakenly classified as figures. Checking for text increases processing
time so be careful if processing a large number of files. Defaults to True.
entropy_check (float, optional): Check that the entropy of all potential figures is above
this value. Figures with a ``shannon_entropy`` lower than this value will be removed.
Set to ``False`` to disable this check. The ``shannon_entropy`` implementation is from
``skimage.measure.entropy``. IMPORTANT: This check applies to both the regular tests
*and* ``large_box_detection``, which most check do not apply to. Defaults to 3.5.
do_remove_subfigures (bool, optional): Check that there are no overlapping figures.
If an overlapping figure is detected, the smaller figure will be deleted. This
is useful to have enabled when using `large_box_detection` since
`large_box_detection` will commonly mistakenly detect subfigures. Defaults to True.
do_rlsa (bool, optional): Use RLSA (Run Length Smoothing Algorithm) instead of dilation.
Does not apply to `large_box_detection`. Defaults to False.
Returns:
tuple: (figures, output_paths) A list of figures extracted from the input slide image
and a list of paths to those figures on disk.
"""
image = cv2.imread(image_path)
# image = cv2.copyMakeBorder(
# image,
# 20,
# 20,
# 20,
# 20,
# cv2.BORDER_CONSTANT,
# value=[0, 0, 0],
# )
image_height = image.shape[0]
image_width = image.shape[1]
image_area = image_height * image_width
if not output_path:
file_parse = os.path.splitext(str(image_path))
filename = file_parse[0]
ext = file_parse[1]
start_output_path = filename + OUTPUT_PATH_MODIFIER
if do_text_check:
text_bounding_boxes = get_text_bounding_boxes(image, east)
# Remove boxes that are too large
text_bounding_boxes = [
box
for box in text_bounding_boxes
if area_of_corner_box(box) # area of box
< text_max_area_percentage * image_area
]
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
blurred = cv2.GaussianBlur(gray_thresh, (3, 3), 0)
# Need to use canny in addition to threshold in case the threshold is inverted.
# Difference between edges and contours: https://stackoverflow.com/a/17104541
canny = auto_canny(blurred)
# "large" pertains to components that are used to find figures not surrounded by a border
# "small" is used to find rectangles on the slide, which are likely figures
canny_dilated_large = cv2.dilate(canny, np.ones((22, 22), dtype=np.uint8))
canny_dilated_small = cv2.dilate(canny, np.ones((3, 3), dtype=np.uint8))
# cv2.imwrite("canny_dilated_large.png", canny_dilated_large)
# cv2.imwrite("canny_dilated_small.png", canny_dilated_small)
if do_rlsa:
x, y = canny.shape
value = max(math.ceil(x / 70), math.ceil(y / 70)) + 20 # heuristic
rlsa_result = ~rlsa.rlsa(~canny, True, True, value) # rlsa application
canny_dilated_large = rlsa_result
# cv2.imwrite('rlsah.png', rlsa_result)
contours_large = cv2.findContours(
canny_dilated_large, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
contours_large = (
contours_large[0] if len(contours_large) == 2 else contours_large[1]
)
# large_contour_img = image.copy()
# large_contour_img = cv2.drawContours(
# large_contour_img, contours_large, -1, (0, 255, 0), 3
# )
# cv2.imwrite("large_contour_img.png", large_contour_img)
bounding_boxes_large = np.array(
[cv2.boundingRect(contour) for contour in contours_large]
)
if large_box_detection:
contours_small = cv2.findContours(
canny_dilated_small, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
contours_small = (
contours_small[0] if len(contours_small) == 2 else contours_small[1]
)
# small_contour_img = image.copy()
# small_contour_img = cv2.drawContours(
# small_contour_img, contours_small, -1, (0, 255, 0), 3
# )
# cv2.imwrite("small_contour_img.png", small_contour_img)
max_area = int(figure_max_area_percentage * image_area)
min_area = (image_height // 3) * (image_width // 6)
min_area_small = min_area
padding = image_height // 70
figures = []
all_figure_boxes = []
output_paths = []
if large_box_detection:
# none_tested = True
for contour in contours_small:
perimeter = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.1 * perimeter, True)
# Figure has 4 corners and it is convex
if (
len(approx) == 4
and cv2.isContourConvex(approx)
and min_area_small < cv2.contourArea(approx) < max_area
):
# none_tested = False
# min_area_small = cv2.contourArea(approx)
figure_contour = approx[:, 0]
# if not none_tested:
bounding_box = cv2.boundingRect(figure_contour)
x, y, w, h = bounding_box
figure = original[
y - padding : y + h + padding, x - padding : x + w + padding
]
figures.append(figure)
all_figure_boxes.append(convert_coords_to_corners(bounding_box))
for box in bounding_boxes_large:
x, y, w, h = box
area = w * h
aspect_ratio = w / h
if min_area < area < max_area and 0.2 < aspect_ratio < 6:
# Draw bounding box rectangle, crop using numpy slicing
roi_rectangle = convert_coords_to_corners(box)
# cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 3)
# cv2.imwrite("rect.png", image)
if (
y + h >= image_height
or x + w >= image_width
or y <= image_height
or x <= image_width
):
potential_figure = original[y : y + h, x : x + w]
else:
potential_figure = original[
y - padding : y + h + padding, x - padding : x + w + padding
]
# cv2.imwrite("potential_figure.png", potential_figure)
# Go to next figure if the `potential_figure` is empty
if potential_figure.size == 0:
continue
# Start all checks as passed (aka True). These lines ensure that if
# the checks are intentionally disabled then the potential figure is
# always added because `checks_passed` will be true.
text_overlap_under_threshold = True
roi_is_color = True
if do_text_check:
total_area_overlapped = sum(
area_of_overlapping_rectangles(roi_rectangle, text_rectangle)
for text_rectangle in text_bounding_boxes
)
logger.debug("Total area overlapped by text: %i", total_area_overlapped)
text_overlap_under_threshold = (
total_area_overlapped < text_area_overlap_threshold * area
)
if do_color_check:
roi_is_color = detect_color_image(potential_figure) == "color"
checks_passed = roi_is_color and text_overlap_under_threshold
if checks_passed:
figures.append(potential_figure)
all_figure_boxes.append(roi_rectangle)
if do_remove_subfigures:
remove_idxs = []
for idx, figure in enumerate(all_figure_boxes):
for compare_idx, figure_to_compare in enumerate(
all_figure_boxes[idx + 1 :]
):
overlapping_area = area_of_overlapping_rectangles(
figure, figure_to_compare
)
if overlapping_area > 0:
figure_area = area_of_corner_box(figure)
figure_to_compare_area = area_of_corner_box(figure_to_compare)
if figure_area > figure_to_compare_area:
remove_idxs.append(compare_idx)
else:
remove_idxs.append(idx)
figures = [
figure for idx, figure in enumerate(figures) if idx not in remove_idxs
]
for idx, figure in enumerate(figures):
if entropy_check:
# If `entropy_check` is a boolean, then set it to the default
if type(entropy_check) is bool and entropy_check:
entropy_check = 2.5
gray = cv2.cvtColor(figure, cv2.COLOR_BGR2GRAY)
high_entropy = shannon_entropy(gray) > entropy_check
if not high_entropy:
continue
full_output_path = start_output_path + str(idx) + ext
output_paths.append(full_output_path)
cv2.imwrite(full_output_path, figure)
logger.debug("Number of Figures Detected: %i", len(figures))
return figures, output_paths
[docs]def all_in_folder(
path,
remove_original=False,
east="frozen_east_text_detection.pb",
do_text_check=True,
**kwargs
):
"""
Perform figure detection on every file in folder and return new paths.
``**kwargs`` is passed to :meth:`~lecture2notes.end_to_end.figure_detection.detect_figures`.
"""
figure_paths = []
images = os.listdir(path)
images.sort()
if do_text_check:
east = load_east(east)
for item in tqdm(images, total=len(images), desc="> Figure Detection: Progress"):
current_path = os.path.join(path, item)
if os.path.isfile(current_path) and OUTPUT_PATH_MODIFIER not in str(
current_path
):
# Above checks that file exists and does not contain `OUTPUT_PATH_MODIFIER` because that would
# indicate that the file has already been processed.
_, output_paths = detect_figures(
current_path, east=east, do_text_check=do_text_check, **kwargs
)
figure_paths.extend(output_paths)
if remove_original:
os.remove(current_path)
logger.debug("> Figure Detection: Returning figure paths")
return figure_paths
[docs]def add_figures_to_ssa(ssa, figures_path):
# If the SSA contains frame numbers
if "frame_number" in ssa[0].keys():
mapping = frame_number_filename_mapping(figures_path)
for idx, slide in enumerate(ssa):
current_slide_idx = slide["frame_number"]
try:
ssa[idx]["figure_paths"] = mapping[current_slide_idx]
except KeyError: # Ignore frames that have no figures
pass
return ssa
# import matplotlib.pyplot as plt
# all_in_folder("delete/")
# detect_figures("delete/img_01054_noborder.jpg")
# detect_figures("g-yPqNmrgYw-img_146.jpg", east="lecture2notes/end_to_end/frozen_east_text_detection.pb")