screen: update tresholds on text

This commit is contained in:
2025-11-07 19:19:48 +01:00
parent 53cf344cc0
commit 7c4281ef05

120
screen.go
View File

@@ -67,63 +67,77 @@ func (d *Device) GetScreenResolution() (int, int, error) {
return d.Screenx, d.Screeny, nil return d.Screenx, d.Screeny, nil
} }
// Get the text on screen in a certain area using gotesseract
func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, whitelist string, multiline bool) (string, error) { func (d *Device) GetScreenText(xStart int, xEnd int, yStart int, yEnd int, whitelist string, multiline bool) (string, error) {
// Take screenshot in memory and get bytes data, err := d.RunCommand("exec-out screencap -p")
data, err := d.RunCommand("exec-out screencap -p") if err != nil {
if err != nil { return "", fmt.Errorf("failed to take screenshot: %w", err)
return "", fmt.Errorf("failed to take screenshot: %w", err) }
}
// Decode image from bytes img, err := png.Decode(bytes.NewReader(data))
img, err := png.Decode(bytes.NewReader(data)) if err != nil {
if err != nil { return "", fmt.Errorf("failed to decode PNG: %w", err)
return "", fmt.Errorf("failed to decode PNG: %w", err) }
}
// Crop region rect := image.Rect(0, 0, xEnd-xStart, yEnd-yStart)
rect := image.Rect(0, 0, xEnd-xStart, yEnd-yStart) cropped := image.NewRGBA(rect)
cropped := image.NewRGBA(rect) for y := yStart; y < yEnd; y++ {
for y := yStart; y < yEnd; y++ { for x := xStart; x < xEnd; x++ {
for x := xStart; x < xEnd; x++ { cropped.Set(x-xStart, y-yStart, img.At(x, y))
cropped.Set(x-xStart, y-yStart, img.At(x, y)) }
} }
}
// Convert to grayscale and apply threshold // Convert to grayscale
threshold := uint8(160) gray := image.NewGray(rect)
binarized := image.NewGray(rect) for y := 0; y < rect.Dy(); y++ {
for y := 0; y < rect.Dy(); y++ { for x := 0; x < rect.Dx(); x++ {
for x := 0; x < rect.Dx(); x++ { gray.Set(x, y, color.GrayModel.Convert(cropped.At(x, y)))
gray := color.GrayModel.Convert(cropped.At(x, y)).(color.Gray) }
if gray.Y > threshold { }
binarized.SetGray(x, y, color.Gray{Y: 255})
} else {
binarized.SetGray(x, y, color.Gray{Y: 0})
}
}
}
// Encode binarized image to PNG // Apply simple mean-based adaptive thresholding
var buf bytes.Buffer binarized := image.NewGray(rect)
if err := png.Encode(&buf, binarized); err != nil { window := 15 // size of local region
return "", fmt.Errorf("failed to encode image: %w", err) for y := 0; y < rect.Dy(); y++ {
} for x := 0; x < rect.Dx(); x++ {
var sum int
var count int
for dy := -window / 2; dy <= window/2; dy++ {
for dx := -window / 2; dx <= window/2; dx++ {
xx := x + dx
yy := y + dy
if xx >= 0 && xx < rect.Dx() && yy >= 0 && yy < rect.Dy() {
sum += int(gray.GrayAt(xx, yy).Y)
count++
}
}
}
mean := sum / count
if int(gray.GrayAt(x, y).Y) < mean-10 {
binarized.SetGray(x, y, color.Gray{Y: 0})
} else {
binarized.SetGray(x, y, color.Gray{Y: 255})
}
}
}
// Call tesseract var buf bytes.Buffer
client := gosseract.NewClient() if err := png.Encode(&buf, binarized); err != nil {
defer client.Close() return "", fmt.Errorf("failed to encode image: %w", err)
client.SetImageFromBytes(buf.Bytes()) }
client.SetWhitelist(whitelist)
if multiline {
client.SetPageSegMode(gosseract.PSM_AUTO)
} else {
client.SetPageSegMode(gosseract.PSM_SINGLE_BLOCK)
}
text, err := client.Text() client := gosseract.NewClient()
if err != nil { defer client.Close()
return "", fmt.Errorf("tesseract error: %w", err) client.SetImageFromBytes(buf.Bytes())
} client.SetWhitelist(whitelist)
return text, nil if multiline {
} client.SetPageSegMode(gosseract.PSM_AUTO)
} else {
client.SetPageSegMode(gosseract.PSM_SINGLE_BLOCK)
}
text, err := client.Text()
if err != nil {
return "", fmt.Errorf("tesseract error: %w", err)
}
return text, nil
}