From e8b31eb263fa2f91e7779c5fa15ab24517599a9c Mon Sep 17 00:00:00 2001 From: Wesley van Tilburg Date: Fri, 7 Nov 2025 19:19:48 +0100 Subject: [PATCH] screen: update tresholds on text --- screen.go | 52 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/screen.go b/screen.go index 7657b1c..b12cf20 100644 --- a/screen.go +++ b/screen.go @@ -67,21 +67,17 @@ func (d *Device) GetScreenResolution() (int, int, error) { return d.Screenx, d.Screeny, nil } -// Get the text on screen in a certain area using gotesseract -func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, whitelist string, multiline bool) (string, error) { - // Take screenshot in memory and get bytes +func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, whitelist string, multiline bool, textPolarity string) (string, error) { data, err := d.RunCommand("exec-out screencap -p") if err != nil { return "", fmt.Errorf("failed to take screenshot: %w", err) } - // Decode image from bytes img, err := png.Decode(bytes.NewReader(data)) if err != nil { return "", fmt.Errorf("failed to decode PNG: %w", err) } - // Crop region rect := image.Rect(0, 0, xEnd-xStart, yEnd-yStart) cropped := image.NewRGBA(rect) for y := yStart; y < yEnd; y++ { @@ -90,27 +86,57 @@ func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, white } } - // Convert to grayscale and apply threshold - threshold := uint8(160) + // Convert to grayscale and compute average brightness + gray := image.NewGray(rect) + var totalBrightness int + for y := 0; y < rect.Dy(); y++ { + for x := 0; x < rect.Dx(); x++ { + grayVal := color.GrayModel.Convert(cropped.At(x, y)).(color.Gray) + gray.Set(x, y, grayVal) + totalBrightness += int(grayVal.Y) + } + } + avgBrightness := totalBrightness / (rect.Dx() * rect.Dy()) + + // Decide polarity + useLightText := false + switch textPolarity { + case "light": + useLightText = true + case "dark": + useLightText = false + case "auto": + useLightText = avgBrightness < 128 + default: + return "", fmt.Errorf("invalid textPolarity: must be 'dark', 'light', or 'auto'") + } + + // Binarize based on polarity binarized := image.NewGray(rect) for y := 0; y < rect.Dy(); y++ { for x := 0; x < rect.Dx(); x++ { - gray := color.GrayModel.Convert(cropped.At(x, y)).(color.Gray) - if gray.Y > threshold { - binarized.SetGray(x, y, color.Gray{Y: 255}) + grayVal := gray.GrayAt(x, y).Y + if useLightText { + if int(grayVal) > avgBrightness+10 { + binarized.SetGray(x, y, color.Gray{Y: 0}) + } else { + binarized.SetGray(x, y, color.Gray{Y: 255}) + } } else { - binarized.SetGray(x, y, color.Gray{Y: 0}) + if int(grayVal) < avgBrightness-10 { + binarized.SetGray(x, y, color.Gray{Y: 0}) + } else { + binarized.SetGray(x, y, color.Gray{Y: 255}) + } } } } - // Encode binarized image to PNG var buf bytes.Buffer if err := png.Encode(&buf, binarized); err != nil { return "", fmt.Errorf("failed to encode image: %w", err) } - // Call tesseract client := gosseract.NewClient() defer client.Close() client.SetImageFromBytes(buf.Bytes())