diff --git a/screen.go b/screen.go index 7657b1c..22cb4f7 100644 --- a/screen.go +++ b/screen.go @@ -67,7 +67,6 @@ func (d *Device) GetScreenResolution() (int, int, error) { return d.Screenx, d.Screeny, nil } -// Get the text on screen in a certain area using gotesseract func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, whitelist string, multiline bool) (string, error) { // Take screenshot in memory and get bytes data, err := d.RunCommand("exec-out screencap -p") @@ -90,16 +89,37 @@ func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, white } } - // Convert to grayscale and apply threshold - threshold := uint8(160) + // Convert to grayscale and compute average brightness + gray := image.NewGray(rect) + var totalBrightness int + for y := 0; y < rect.Dy(); y++ { + for x := 0; x < rect.Dx(); x++ { + grayVal := color.GrayModel.Convert(cropped.At(x, y)).(color.Gray) + gray.Set(x, y, grayVal) + totalBrightness += int(grayVal.Y) + } + } + avgBrightness := totalBrightness / (rect.Dx() * rect.Dy()) + + // Binarize based on brightness polarity binarized := image.NewGray(rect) for y := 0; y < rect.Dy(); y++ { for x := 0; x < rect.Dx(); x++ { - gray := color.GrayModel.Convert(cropped.At(x, y)).(color.Gray) - if gray.Y > threshold { - binarized.SetGray(x, y, color.Gray{Y: 255}) + grayVal := gray.GrayAt(x, y).Y + if avgBrightness < 128 { + // Dark background, light text + if int(grayVal) > avgBrightness+10 { + binarized.SetGray(x, y, color.Gray{Y: 0}) + } else { + binarized.SetGray(x, y, color.Gray{Y: 255}) + } } else { - binarized.SetGray(x, y, color.Gray{Y: 0}) + // Light background, dark text + if int(grayVal) < avgBrightness-10 { + binarized.SetGray(x, y, color.Gray{Y: 0}) + } else { + binarized.SetGray(x, y, color.Gray{Y: 255}) + } } } } @@ -110,7 +130,7 @@ func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, white return "", fmt.Errorf("failed to encode image: %w", err) } - // Call tesseract + // Call Tesseract client := gosseract.NewClient() defer client.Close() client.SetImageFromBytes(buf.Bytes())