使用Microsoft OCR获取扫描图像的文本坐标

时间:2019-06-02 07:16:25

标签: c# computer-vision microsoft-ocr

试图找到扫描图像中文本的坐标。扫描的图像具有许多文本数据,需要将该图像数据转换为文本,然后获取文本的坐标。坐标表示边界框,例如X,Y轴,高度和宽度,其中文本为

使用Microsoft OCR ProjectOxford Vision

using Microsoft.ProjectOxford.Vision;
using Microsoft.ProjectOxford.Vision.Contract;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;

 namespace TextExtraction
 {
 class Program
 {  
    const string API_key = "<<Key>>";
    const string API_location = 
    "https://westcentralus.api.cognitive.microsoft.com/vision/v1.0"; 

    static void Main(string[] args)
    {
        string imgToAnalyze = @"C:\Users\abhis\Desktop\image.jpg";
        HandwritingExtraction(imgToAnalyze, false);

        Console.ReadLine();
    }

    public static void PrintResults(string[] res)
    {
        foreach (string r in res)
            Console.WriteLine(r);
        Console.ReadLine();
    }

    public static void HandwritingExtraction(string fname, bool wrds)
    {
        Task.Run(async () =>
        {
            string[] res = await HandwritingExtractionCore(fname, wrds);
            PrintResults(res);

        }).Wait();
    }

    public static async Task<string[]> HandwritingExtractionCore(string fname, bool wrds)
    {
        VisionServiceClient client = new VisionServiceClient(API_key, API_location);
        string[] textres = null;

        if (File.Exists(fname))
            using (Stream stream = File.OpenRead(fname))
            {
                HandwritingRecognitionOperation op = await 
         client.CreateHandwritingRecognitionOperationAsync(stream);
                HandwritingRecognitionOperationResult res = await 
         client.GetHandwritingRecognitionOperationResultAsync(op);

                textres = GetExtracted(res, wrds);
            }

        return textres;
    }

    public static string[] GetExtracted(HandwritingRecognitionOperationResult res, bool wrds)
    {
        List<string> items = new List<string>();

        foreach (HandwritingTextLine l in res.RecognitionResult.Lines)
            if (wrds)
                items.AddRange(GetWords(l));
            else
                items.Add(GetLineAsString(l));

        return items.ToArray();
    }

    public static List<string> GetWords(HandwritingTextLine line)
    {
        List<string> words = new List<string>();

        foreach (HandwritingTextWord w in line.Words)
            words.Add(w.Text);

        return words;
    }

    public static string GetLineAsString(HandwritingTextLine line)
    {
        List<string> words = GetWords(line);
        return words.Count > 0 ? string.Join(" ", words) : string.Empty;
    }
}
}

预期输出: 获取具有各自坐标(x,y,高度,宽度)的文本

Input image

Json输出

{   “ status”:“成功”,   “成功”:是的,   “失败”:错误,   “完成”:是的,   “ recognitionResults”:[     {       “页面”:1,       “ clockwiseOrientation”:359.62,       “宽度”:505,       “身高”:399,       “单位”:“像素”,       “行”:[         {           “ boundingBox”:[             224,             58             380,             57,             381,             74,             225,             75           ],           “ text”:“印度政府”,           “字”:[             {               “ boundingBox”:[                 229,                 59,                 321,                 58                 320,                 75,                 229,                 75               ],               “ text”:“政府”             },             {               “ boundingBox”:[                 324,                 58                 341,                 58                 341,                 75,                 323,                 75               ],               “ text”:“ OF”             },             {               “ boundingBox”:[                 344,                 58                 381,                 58                 381,                 75,                 344,                 75               ],               “ text”:“ INDIA”             }           ]         },         {           “ boundingBox”:[             211,             159,             429,             160,             428,             180,             210,             178           ],           “ text”:“ FH faPet / DOB:27/07/1982”,           “字”:[             {               “ boundingBox”:[                 225,                 160,                 243,                 160,                 243,                 179,                 225,                 179               ],               “ text”:“ FH”             },             {               “ boundingBox”:[                 247,                 160,                 286,                 160,                 286,                 179,                 247,                 179               ],               “ text”:“ faPet /”             },             {               “ boundingBox”:[                 290,                 160,                 333,                 160,                 333,                 179,                 290,                 179               ],               “ text”:“ DOB:”             },             {               “ boundingBox”:[                 337                 160,                 428,                 162,                 428,                 180,                 337                 179               ],               “ text”:“ 1982年7月27日”             }           ]         },         {           “ boundingBox”:[             209,             192,             313,             190,             314,             208,             210,             210           ],           “ text”:“ you / MALE”,           “字”:[             {               “ boundingBox”:[                 214,                 192,                 247,                 192,                 246,                 209,                 214,                 210               ],               “发短信给你”             },             {               “ boundingBox”:[                 254,                 192,                 260,                 192,                 260,                 209,                 254,                 209               ],               “ text”:“ /”             },             {               “ boundingBox”:[                 264,                 192,                 314,                 192,                 313,                 208,                 263,                 209               ],               “ text”:“ MALE”             }           ]         },         {           “ boundingBox”:[             201,             314,             351,             313,             352,             330,             202,             331           ],           “ text”:“ 66 66 6666 6666”,           “字”:[             {               “ boundingBox”:[                 204,                 315,                 225,                 314,                 225,                 330,                 204,                 331               ],               “ text”:“ 66”             },             {               “ boundingBox”:[                 229,                 314,                 251,                 314,                 251,                 330,                 229,                 330               ],               “ text”:“ 66”             },             {               “ boundingBox”:[                 255,                 314,                 301,                 314,                 301,                 330,                 255,                 330               ],               “ text”:“ 6666”             },             {               “ boundingBox”:[                 307,                 314,                 352,                 314,                 351,                 331                 306,                 330               ],               “ text”:“ 6666”             }           ]         }       ]     }   ] }

2 个答案:

答案 0 :(得分:1)

我猜您正在使用Microsoft C#Azure应用之类的东西。这是您问题的详细链接。

https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/quickstarts/csharp-print-text

在contentString里面。就像……

"language": "en",
    "textAngle": -1.5000000000000335,
    "orientation": "Up",
    "regions": [
        {
            "boundingBox": "154,49,351,575",
            "lines": [
                {
                    "boundingBox": "165,49,340,117",
                    "words": [
                        {
                            "boundingBox": "165,49,63,109",
                            "text": "A"
                        },
                        {
                            "boundingBox": "261,50,244,116",
                            "text": "GOAL"
                        }
                    ]
                },
                {

我用Azsure C#做过一些项目。但是您的代码看起来并不十分熟悉。

我建议您查看 textres res (在您的代码中)内的所有数据格式,我认为它包含与上面的字符串所示相同的引用< / p>

答案 1 :(得分:1)

首先,请注意Microsoft Cognitive Services中有两种不同的用于文本识别的API。 Yuan博士的输出来自OCR API,它具有更广泛的语言覆盖范围,而Tony的输出表明他正在呼叫更新和改进的Read API

第二,请注意,上面的代码示例Microsoft.ProjectOxford.Vision中引用的客户端SDK已被弃用,您将希望切换到替换Microsoft.Azure.CognitiveServices.Vision.ComputerVision,该示例将为您找到{{3 }}。

最后,是特定问题的答案。 boundingBox字段中表示文档中识别的文本的位置。因此,对于您的示例输出JSON,文本行GOVERNMENT OF INDIA由坐标(224,58),(380,57),(381,74)和(225,75)界定,代表了四个角。它不是x,y,width,height格式以允许旋转。请注意,边界框的单位也包含在JSON中(在您的情况下为像素)。如果您要查找的话,该行中每个单词的位置也在响应JSON中。