Question

试图找到扫描图像中文本的坐标。扫描的图像具有许多文本数据，需要将该图像数据转换为文本，然后获取文本的坐标。坐标表示边界框，例如X，Y轴，高度和宽度，其中文本为

使用Microsoft OCR ProjectOxford Vision

using Microsoft.ProjectOxford.Vision;
using Microsoft.ProjectOxford.Vision.Contract;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;

 namespace TextExtraction
 {
 class Program
 {  
    const string API_key = "<<Key>>";
    const string API_location = 
    "https://westcentralus.api.cognitive.microsoft.com/vision/v1.0"; 

    static void Main(string[] args)
    {
        string imgToAnalyze = @"C:\Users\abhis\Desktop\image.jpg";
        HandwritingExtraction(imgToAnalyze, false);

        Console.ReadLine();
    }

    public static void PrintResults(string[] res)
    {
        foreach (string r in res)
            Console.WriteLine(r);
        Console.ReadLine();
    }

    public static void HandwritingExtraction(string fname, bool wrds)
    {
        Task.Run(async () =>
        {
            string[] res = await HandwritingExtractionCore(fname, wrds);
            PrintResults(res);

        }).Wait();
    }

    public static async Task<string[]> HandwritingExtractionCore(string fname, bool wrds)
    {
        VisionServiceClient client = new VisionServiceClient(API_key, API_location);
        string[] textres = null;

        if (File.Exists(fname))
            using (Stream stream = File.OpenRead(fname))
            {
                HandwritingRecognitionOperation op = await 
         client.CreateHandwritingRecognitionOperationAsync(stream);
                HandwritingRecognitionOperationResult res = await 
         client.GetHandwritingRecognitionOperationResultAsync(op);

                textres = GetExtracted(res, wrds);
            }

        return textres;
    }

    public static string[] GetExtracted(HandwritingRecognitionOperationResult res, bool wrds)
    {
        List<string> items = new List<string>();

        foreach (HandwritingTextLine l in res.RecognitionResult.Lines)
            if (wrds)
                items.AddRange(GetWords(l));
            else
                items.Add(GetLineAsString(l));

        return items.ToArray();
    }

    public static List<string> GetWords(HandwritingTextLine line)
    {
        List<string> words = new List<string>();

        foreach (HandwritingTextWord w in line.Words)
            words.Add(w.Text);

        return words;
    }

    public static string GetLineAsString(HandwritingTextLine line)
    {
        List<string> words = GetWords(line);
        return words.Count > 0 ? string.Join(" ", words) : string.Empty;
    }
}
}

预期输出： 获取具有各自坐标（x，y，高度，宽度）的文本

Input image

Json输出

{ “ status”：“成功”， “成功”：是的， “失败”：错误， “完成”：是的， “ recognitionResults”：[ { “页面”：1， “ clockwiseOrientation”：359.62， “宽度”：505， “身高”：399， “单位”：“像素”， “行”：[ { “ boundingBox”：[ 224， 58 380， 57， 381， 74， 225， 75 ]， “ text”：“印度政府”， “字”：[ { “ boundingBox”：[ 229， 59， 321， 58 320， 75， 229， 75 ]， “ text”：“政府” }， { “ boundingBox”：[ 324， 58 341， 58 341， 75， 323， 75 ]， “ text”：“ OF” }， { “ boundingBox”：[ 344， 58 381， 58 381， 75， 344， 75 ]， “ text”：“ INDIA” } ] }， { “ boundingBox”：[ 211， 159， 429， 160， 428， 180， 210， 178 ]， “ text”：“ FH faPet / DOB：27/07/1982”， “字”：[ { “ boundingBox”：[ 225， 160， 243， 160， 243， 179， 225， 179 ]， “ text”：“ FH” }， { “ boundingBox”：[ 247， 160， 286， 160， 286， 179， 247， 179 ]， “ text”：“ faPet /” }， { “ boundingBox”：[ 290， 160， 333， 160， 333， 179， 290， 179 ]， “ text”：“ DOB：” }， { “ boundingBox”：[ 337 160， 428， 162， 428， 180， 337 179 ]， “ text”：“ 1982年7月27日” } ] }， { “ boundingBox”：[ 209， 192， 313， 190， 314， 208， 210， 210 ]， “ text”：“ you / MALE”， “字”：[ { “ boundingBox”：[ 214， 192， 247， 192， 246， 209， 214， 210 ]， “发短信给你” }， { “ boundingBox”：[ 254， 192， 260， 192， 260， 209， 254， 209 ]， “ text”：“ /” }， { “ boundingBox”：[ 264， 192， 314， 192， 313， 208， 263， 209 ]， “ text”：“ MALE” } ] }， { “ boundingBox”：[ 201， 314， 351， 313， 352， 330， 202， 331 ]， “ text”：“ 66 66 6666 6666”， “字”：[ { “ boundingBox”：[ 204， 315， 225， 314， 225， 330， 204， 331 ]， “ text”：“ 66” }， { “ boundingBox”：[ 229， 314， 251， 314， 251， 330， 229， 330 ]， “ text”：“ 66” }， { “ boundingBox”：[ 255， 314， 301， 314， 301， 330， 255， 330 ]， “ text”：“ 6666” }， { “ boundingBox”：[ 307， 314， 352， 314， 351， 331 306， 330 ]， “ text”：“ 6666” } ] } ] } ] }

Answer 1

我猜您正在使用Microsoft C＃Azure应用之类的东西。这是您问题的详细链接。

https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/quickstarts/csharp-print-text

在contentString里面。就像……

"language": "en",
    "textAngle": -1.5000000000000335,
    "orientation": "Up",
    "regions": [
        {
            "boundingBox": "154,49,351,575",
            "lines": [
                {
                    "boundingBox": "165,49,340,117",
                    "words": [
                        {
                            "boundingBox": "165,49,63,109",
                            "text": "A"
                        },
                        {
                            "boundingBox": "261,50,244,116",
                            "text": "GOAL"
                        }
                    ]
                },
                {

我用Azsure C＃做过一些项目。但是您的代码看起来并不十分熟悉。

我建议您查看 textres 或 res （在您的代码中）内的所有数据格式，我认为它包含与上面的字符串所示相同的引用< / p>

Answer 2

首先，请注意Microsoft Cognitive Services中有两种不同的用于文本识别的API。 Yuan博士的输出来自OCR API，它具有更广泛的语言覆盖范围，而Tony的输出表明他正在呼叫更新和改进的Read API。

第二，请注意，上面的代码示例Microsoft.ProjectOxford.Vision中引用的客户端SDK已被弃用，您将希望切换到替换Microsoft.Azure.CognitiveServices.Vision.ComputerVision，该示例将为您找到{{3 }}。

最后，是特定问题的答案。 boundingBox字段中表示文档中识别的文本的位置。因此，对于您的示例输出JSON，文本行GOVERNMENT OF INDIA由坐标（224，58），（380，57），（381，74）和（225，75）界定，代表了四个角。它不是x,y,width,height格式以允许旋转。请注意，边界框的单位也包含在JSON中（在您的情况下为像素）。如果您要查找的话，该行中每个单词的位置也在响应JSON中。

使用Microsoft OCR获取扫描图像的文本坐标

2 个答案: