From 7c4281ef055974c0d77489a0e18fd2ca2bb3af45 Mon Sep 17 00:00:00 2001 From: Wesley van Tilburg Date: Fri, 7 Nov 2025 19:19:48 +0100 Subject: [PATCH] screen: update tresholds on text --- screen.go | 120 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 53 deletions(-) diff --git a/screen.go b/screen.go index 7657b1c..cf040bf 100644 --- a/screen.go +++ b/screen.go @@ -67,63 +67,77 @@ func (d *Device) GetScreenResolution() (int, int, error) { return d.Screenx, d.Screeny, nil } -// Get the text on screen in a certain area using gotesseract func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, whitelist string, multiline bool) (string, error) { - // Take screenshot in memory and get bytes - data, err := d.RunCommand("exec-out screencap -p") - if err != nil { - return "", fmt.Errorf("failed to take screenshot: %w", err) - } + data, err := d.RunCommand("exec-out screencap -p") + if err != nil { + return "", fmt.Errorf("failed to take screenshot: %w", err) + } - // Decode image from bytes - img, err := png.Decode(bytes.NewReader(data)) - if err != nil { - return "", fmt.Errorf("failed to decode PNG: %w", err) - } + img, err := png.Decode(bytes.NewReader(data)) + if err != nil { + return "", fmt.Errorf("failed to decode PNG: %w", err) + } - // Crop region - rect := image.Rect(0, 0, xEnd-xStart, yEnd-yStart) - cropped := image.NewRGBA(rect) - for y := yStart; y < yEnd; y++ { - for x := xStart; x < xEnd; x++ { - cropped.Set(x-xStart, y-yStart, img.At(x, y)) - } - } + rect := image.Rect(0, 0, xEnd-xStart, yEnd-yStart) + cropped := image.NewRGBA(rect) + for y := yStart; y < yEnd; y++ { + for x := xStart; x < xEnd; x++ { + cropped.Set(x-xStart, y-yStart, img.At(x, y)) + } + } - // Convert to grayscale and apply threshold - threshold := uint8(160) - binarized := image.NewGray(rect) - for y := 0; y < rect.Dy(); y++ { - for x := 0; x < rect.Dx(); x++ { - gray := color.GrayModel.Convert(cropped.At(x, y)).(color.Gray) - if gray.Y > threshold { - binarized.SetGray(x, y, color.Gray{Y: 255}) - } else { - binarized.SetGray(x, y, color.Gray{Y: 0}) - } - } - } + // Convert to grayscale + gray := image.NewGray(rect) + for y := 0; y < rect.Dy(); y++ { + for x := 0; x < rect.Dx(); x++ { + gray.Set(x, y, color.GrayModel.Convert(cropped.At(x, y))) + } + } - // Encode binarized image to PNG - var buf bytes.Buffer - if err := png.Encode(&buf, binarized); err != nil { - return "", fmt.Errorf("failed to encode image: %w", err) - } + // Apply simple mean-based adaptive thresholding + binarized := image.NewGray(rect) + window := 15 // size of local region + for y := 0; y < rect.Dy(); y++ { + for x := 0; x < rect.Dx(); x++ { + var sum int + var count int + for dy := -window / 2; dy <= window/2; dy++ { + for dx := -window / 2; dx <= window/2; dx++ { + xx := x + dx + yy := y + dy + if xx >= 0 && xx < rect.Dx() && yy >= 0 && yy < rect.Dy() { + sum += int(gray.GrayAt(xx, yy).Y) + count++ + } + } + } + mean := sum / count + if int(gray.GrayAt(x, y).Y) < mean-10 { + binarized.SetGray(x, y, color.Gray{Y: 0}) + } else { + binarized.SetGray(x, y, color.Gray{Y: 255}) + } + } + } - // Call tesseract - client := gosseract.NewClient() - defer client.Close() - client.SetImageFromBytes(buf.Bytes()) - client.SetWhitelist(whitelist) - if multiline { - client.SetPageSegMode(gosseract.PSM_AUTO) - } else { - client.SetPageSegMode(gosseract.PSM_SINGLE_BLOCK) - } + var buf bytes.Buffer + if err := png.Encode(&buf, binarized); err != nil { + return "", fmt.Errorf("failed to encode image: %w", err) + } - text, err := client.Text() - if err != nil { - return "", fmt.Errorf("tesseract error: %w", err) - } - return text, nil -} + client := gosseract.NewClient() + defer client.Close() + client.SetImageFromBytes(buf.Bytes()) + client.SetWhitelist(whitelist) + if multiline { + client.SetPageSegMode(gosseract.PSM_AUTO) + } else { + client.SetPageSegMode(gosseract.PSM_SINGLE_BLOCK) + } + + text, err := client.Text() + if err != nil { + return "", fmt.Errorf("tesseract error: %w", err) + } + return text, nil +} \ No newline at end of file