Module: StackedPdfGenerator::CropMarkDetector

Defined in:
lib/stacked_pdf_generator/crop_mark_detector.rb

Overview

Detects printer crop marks on the first page of a PDF and returns the rectangular region delimited by them.

Pattern expected: 4 short horizontal segments (or rectangles) per page, 2 at the top (left + right edges) and 2 at the bottom. Marks must touch either the left edge (x ≈ 0) or the right edge (x ≈ page_width). Only the vertical extent is constrained by the marks; horizontally we keep the full page width.

Defined Under Namespace

Classes: DetectedBox, LineCollector

Constant Summary collapse

DEFAULT_MARK_MAX_LENGTH_MM =

≈ 10.6 mm (30 pts)

30.0 / 72.0 * 25.4
DEFAULT_EDGE_TOLERANCE_MM =

≈ 0.35 mm (1 pt)

1.0 / 72.0 * 25.4
HORIZONTAL_TOLERANCE_PTS =
0.5
Y_CLUSTER_TOLERANCE_PTS =
2.0

Class Method Summary collapse

Class Method Details

.call(pdf_path, mark_max_length_pts: DEFAULT_MARK_MAX_LENGTH_MM * 72.0 / 25.4, edge_tolerance_pts: DEFAULT_EDGE_TOLERANCE_MM * 72.0 / 25.4) ⇒ Object

Inspect the first page of pdf_path and return a DetectedBox in PDF points, or raise ProcessingError with a descriptive message.

Raises:



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/stacked_pdf_generator/crop_mark_detector.rb', line 28

def call(pdf_path, mark_max_length_pts: DEFAULT_MARK_MAX_LENGTH_MM * 72.0 / 25.4,
         edge_tolerance_pts: DEFAULT_EDGE_TOLERANCE_MM * 72.0 / 25.4)
  doc = HexaPDF::Document.open(pdf_path)
  page = doc.pages[0]
  raise ProcessingError, 'PDF has no pages' unless page

  page_w = page.box.width.to_f
  page_h = page.box.height.to_f
  collector = LineCollector.new
  page.process_contents(collector)

  candidates = filter_candidates(collector.lines, page_w,
                                 mark_max_length_pts: mark_max_length_pts,
                                 edge_tolerance_pts: edge_tolerance_pts)

  if candidates.empty?
    raise ProcessingError,
          "No crop marks detected on page 1 (looked for short horizontal " \
          "segments touching the left or right edge; mark_max_length=" \
          "#{mark_max_length_pts.round(2)}pts, edge_tolerance=" \
          "#{edge_tolerance_pts.round(2)}pts). Inspected " \
          "#{collector.lines.size} path segments."
  end

  clusters = cluster_by_y(candidates)
  validate_clusters!(clusters, page_w, edge_tolerance_pts)

  ys = clusters.map { |c| median(c.map { |seg| seg[1] }) }.sort
  DetectedBox.new(left: 0.0, bottom: ys.first, right: page_w, top: ys.last,
                  page_width: page_w, page_height: page_h)
end

.cluster_by_y(segments) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/stacked_pdf_generator/crop_mark_detector.rb', line 71

def cluster_by_y(segments)
  sorted = segments.sort_by { |seg| seg[1] }
  clusters = []
  sorted.each do |seg|
    if clusters.last && (seg[1] - clusters.last.last[1]).abs <= Y_CLUSTER_TOLERANCE_PTS
      clusters.last << seg
    else
      clusters << [seg]
    end
  end
  clusters
end

.filter_candidates(lines, page_w, mark_max_length_pts:, edge_tolerance_pts:) ⇒ Object



60
61
62
63
64
65
66
67
68
69
# File 'lib/stacked_pdf_generator/crop_mark_detector.rb', line 60

def filter_candidates(lines, page_w, mark_max_length_pts:, edge_tolerance_pts:)
  lines.select do |x0, y0, x1, y1|
    next false unless (y0 - y1).abs < HORIZONTAL_TOLERANCE_PTS
    next false if (x1 - x0).abs > mark_max_length_pts

    touches_left  = [x0, x1].min <= edge_tolerance_pts
    touches_right = [x0, x1].max >= page_w - edge_tolerance_pts
    touches_left || touches_right
  end
end

.median(values) ⇒ Object



106
107
108
109
110
# File 'lib/stacked_pdf_generator/crop_mark_detector.rb', line 106

def median(values)
  sorted = values.sort
  n = sorted.length
  n.odd? ? sorted[n / 2] : (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
end

.validate_clusters!(clusters, page_w, edge_tolerance_pts) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/stacked_pdf_generator/crop_mark_detector.rb', line 84

def validate_clusters!(clusters, page_w, edge_tolerance_pts)
  ys_summary = clusters.map { |c| median(c.map { |seg| seg[1] }).round(2) }

  if clusters.size != 2
    raise ProcessingError,
          "Expected 2 Y-rows of crop marks (top + bottom), found " \
          "#{clusters.size}. Y values: #{ys_summary.inspect}, " \
          "marks per row: #{clusters.map(&:size).inspect}."
  end

  clusters.each_with_index do |cluster, idx|
    has_left  = cluster.any? { |x0, _, x1, _| [x0, x1].min <= edge_tolerance_pts }
    has_right = cluster.any? { |x0, _, x1, _| [x0, x1].max >= page_w - edge_tolerance_pts }
    unless has_left && has_right
      raise ProcessingError,
            "Crop mark row at y=#{ys_summary[idx]} is missing " \
            "#{has_left ? '' : 'left'}#{has_left || has_right ? '' : ' & '}" \
            "#{has_right ? '' : 'right'} mark. Found #{cluster.size} marks."
    end
  end
end