import fitz # PyMuPDF from PIL import Image, ImageEnhance, ImageFilter import numpy as np import cv2 # === SETTINGS === input_pdf = "Cootie Comics Print 14 MAY 2024.pdf" pages_to_extract = [4, 54] # zero-based: pages 5 & 55 output_pdf = "Coloring_with_the_Cooties_Preview.pdf" def page_to_bw(page): pix = page.get_pixmap(dpi=300) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Step 1: Convert to grayscale gray = img.convert("L") # Step 2: Smooth out halftone dots and color noise gray = gray.filter(ImageFilter.MedianFilter(size=3)) gray = gray.filter(ImageFilter.SMOOTH_MORE) # Step 3: Increase contrast to pop lines gray = ImageEnhance.Contrast(gray).enhance(2.8) gray = ImageEnhance.Brightness(gray).enhance(1.1) # Step 4: Convert to numpy for thresholding arr = np.array(gray) # Step 5: Remove faint paper background (threshold ~200) _, bw = cv2.threshold(arr, 200, 255, cv2.THRESH_BINARY) # Step 6: Dilation to make ink thicker (adjust as needed) kernel = np.ones((1, 1), np.uint8) bw = cv2.erode(bw, kernel, iterations=1) bw = cv2.dilate(bw, kernel, iterations=2) return Image.fromarray(bw) # === MAIN === doc = fitz.open(input_pdf) converted = [] for i in pages_to_extract: print(f"Processing page {i + 1}...") page = doc.load_page(i) bw_img = page_to_bw(page) converted.append(bw_img) if converted: converted[0].save(output_pdf, save_all=True, append_images=converted[1:]) print(f"\n✅ Done! Created: {output_pdf}") else: print("⚠️ No pages were converted. Check your page numbers.")