396 lines
14 KiB
BibTeX
396 lines
14 KiB
BibTeX
@article{asif2014overview,
|
|
title = {An overview and applications of optical character recognition},
|
|
author = {Asif, AMAM and Hannan, Shaikh Abdul and Perwej, Yusuf and Vithalrao, Mane Arjun},
|
|
year = 2014,
|
|
journal = {International Journal of Advance Research In Science And Engineering},
|
|
volume = 3,
|
|
number = 7,
|
|
urldate = {2024-02-18}
|
|
}
|
|
@online{azurevision_home,
|
|
title = {Azure AI Vision - Homepage},
|
|
author = {Microsoft Corporation},
|
|
url = {https://azure.microsoft.com/en-us/products/ai-services/ai-vision},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{azurevision_pricing,
|
|
title = {Azure AI Vision - Pricing},
|
|
author = {Microsoft Corporation},
|
|
url = {https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/computer-vision/},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@image{bimodal-histogram,
|
|
title = {Example of a histogram exhibiting bimodalty},
|
|
author = {Wikimedia Commons},
|
|
year = 2014,
|
|
url = {https://commons.wikimedia.org/wiki/File:Bimodal-histogram.png},
|
|
urldate = {2024-02-18}
|
|
}
|
|
@inbook{cc_platforms_comparison,
|
|
title = {Comparison of Different Cloud Computing Platforms for Data Analytics},
|
|
author = {Gupta, Urvashi and Sharma, Rohit},
|
|
year = 2023,
|
|
month = {09},
|
|
doi = {10.1007/978-981-99-3716-5_7},
|
|
isbn = {978-981-99-3715-8}
|
|
}
|
|
@article{chowdhary2020natural,
|
|
title = {Natural language processing},
|
|
author = {Chowdhary, K.R.},
|
|
year = 2020,
|
|
journal = {Fundamentals of artificial intelligence},
|
|
publisher = {Springer}
|
|
}
|
|
@article{church1995,
|
|
title = {Commercial Applications of Natural Language Processing},
|
|
author = {Church, Kenneth W. and Rau, Lisa F.},
|
|
year = 1995,
|
|
journal = {Commun. ACM},
|
|
publisher = {Association for Computing Machinery},
|
|
address = {New York, NY, USA},
|
|
volume = 38,
|
|
number = 11,
|
|
doi = {church1995},
|
|
issn = {0001-0782},
|
|
url = {https://doi.org/church1995},
|
|
urldate = {2024-02-18},
|
|
numpages = 9
|
|
}
|
|
@online{copa-data_zenon,
|
|
title = {COPA-DATA zenon - Homepage},
|
|
author = {Ing. Punzenberger COPA-DATA GmbH},
|
|
url = {https://www.copadata.com/en/product/zenon-software-platform-for-industrial-automation-energy-automation/},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@book{DingXiaoqing2012AiCR,
|
|
title = {Advances in Character Recognition},
|
|
author = {Ding, Xiaoqing},
|
|
year = 2012,
|
|
publisher = {IntechOpen},
|
|
doi = {10.5772/2575},
|
|
isbn = {953-51-5669-1},
|
|
url = {https://www.intechopen.com/books/2182},
|
|
urldate = {2024-02-18},
|
|
language = {eng},
|
|
keywords = {Optical character recognition}
|
|
}
|
|
@article{eikvil1993optical,
|
|
title = {Optical character recognition},
|
|
author = {Eikvil, Line},
|
|
year = 1993,
|
|
journal = {citeseer. ist. psu. edu/142042. html},
|
|
volume = 26
|
|
}
|
|
@online{gcv_home,
|
|
title = {Google Cloud Vision - Homepage},
|
|
author = {Google LLC},
|
|
url = {https://cloud.google.com/vision},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{gcv_pricing,
|
|
title = {Google Cloud Vision - Pricing},
|
|
author = {Google LLC},
|
|
url = {https://cloud.google.com/vision/pricing},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{imagemagick,
|
|
title = {ImageMagick Homepage},
|
|
author = {ImageMagick Studio LLC},
|
|
url = {https://www.imagemagick.org/},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{ironocr_home,
|
|
title = {IronOCR for .NET - Homepage},
|
|
author = {Iron Software LLC},
|
|
url = {https://ironsoftware.com/csharp/ocr/},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{anyline_home,
|
|
title = {Anyline - Homepage},
|
|
author = {Anyline GmbH},
|
|
url = {https://anyline.com},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{nuget_magicknet,
|
|
title = {Magick.NET - NuGet},
|
|
url = {https://www.nuget.org/packages/Magick.NET.Core},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{nuget_tesseract,
|
|
title = {Tesseract - NuGet},
|
|
url = {https://www.nuget.org/packages/Tesseract},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@article{islam2017survey,
|
|
title = {A survey on optical character recognition systems},
|
|
author = {Islam, Noman and Islam, Zeeshan and Noor, Nazia},
|
|
year = 2017,
|
|
journal = {arXiv preprint},
|
|
url = {https://doi.org/10.48550/arXiv.1710.05703},
|
|
urldate = {2024-02-18}
|
|
}
|
|
@article{kalyanathaya2019advances,
|
|
title = {Advances in natural language processing: a survey of current research trends, development tools and industry applications},
|
|
author = {Kalyanathaya, Krishna Prakash and Akila, D and Rajesh, P},
|
|
year = 2019,
|
|
journal = {International Journal of Recent Technology and Engineering},
|
|
volume = 7,
|
|
number = {5C}
|
|
}
|
|
@article{kapur1985new,
|
|
title = {A new method for gray-level picture thresholding using the entropy of the histogram},
|
|
author = {Kapur, Jagat Narain and Sahoo, Prasanna K and Wong, Andrew KC},
|
|
year = 1985,
|
|
journal = {Computer vision, graphics, and image processing},
|
|
publisher = {Elsevier},
|
|
volume = 29,
|
|
number = 3
|
|
}
|
|
@inproceedings{karpinski2018metrics,
|
|
title = {Metrics for complete evaluation of ocr performance},
|
|
author = {Karpinski, Romain and Lohani, Devashish and Belaid, Abdel},
|
|
year = 2018,
|
|
booktitle = {IPCV'18 - The 22nd Int'l Conf on Image Processing, Computer Vision, \& Pattern Recognition},
|
|
url = {https://inria.hal.science/hal-01981731}
|
|
}
|
|
@inproceedings{levenshtein1966binary,
|
|
title = {Binary codes capable of correcting deletions, insertions, and reversals},
|
|
author = {Levenshtein, Vladimir I and others},
|
|
year = 1966,
|
|
booktitle = {Soviet physics doklady},
|
|
volume = 10,
|
|
number = 8,
|
|
pages = {707--710},
|
|
organization = {Soviet Union}
|
|
}
|
|
@article{mursari2021effectiveness,
|
|
title = {The effectiveness of image preprocessing on digital handwritten scripts recognition with the implementation of OCR Tesseract},
|
|
author = {Mursari, Lily Rojabiyati and Wibowo, Antoni},
|
|
year = 2021,
|
|
journal = {Computer Engineering and Applications Journal},
|
|
volume = 10,
|
|
number = 3
|
|
}
|
|
@article{otsu1979threshold,
|
|
title = {A threshold selection method from gray-level histograms},
|
|
author = {Otsu, Nobuyuki},
|
|
year = 1979,
|
|
journal = {IEEE transactions on systems, man, and cybernetics},
|
|
publisher = {IEEE},
|
|
volume = 9,
|
|
number = 1,
|
|
doi = {10.1109/TSMC.1979.4310076},
|
|
url = {https://ieeexplore.ieee.org/document/4310076},
|
|
urldate = {2024-02-18}
|
|
}
|
|
@inproceedings{park2008empirical,
|
|
title = {An empirical analysis of word error rate and keyword error rate.},
|
|
author = {Park, Youngja and Patwardhan, Siddharth and Visweswariah, Karthik and Gates, Stephen C},
|
|
year = 2008,
|
|
month = 9,
|
|
doi = {10.21437/Interspeech.2008-537}
|
|
}
|
|
@article{sahoo1988survey,
|
|
title = {A survey of thresholding techniques},
|
|
author = {Sahoo, Prasanna K and Soltani, SAKC and Wong, Andrew KC},
|
|
year = 1988,
|
|
journal = {Computer vision, graphics, and image processing},
|
|
publisher = {Elsevier},
|
|
volume = 41,
|
|
number = 2
|
|
}
|
|
@inproceedings{Smith2007,
|
|
title = {An Overview of the Tesseract OCR Engine},
|
|
author = {Smith, Ray},
|
|
booktitle = {Ninth international conference on document analysis and recognition (ICDAR 2007)},
|
|
volume = 2,
|
|
url = {https://ieeexplore.ieee.org/document/4376991},
|
|
urldate = {2024-02-18},
|
|
date = 2007,
|
|
organization = {IEEE},
|
|
langid = {ngerman}
|
|
}
|
|
@article{sporici2020improving,
|
|
title = {Improving the accuracy of Tesseract 4.0 OCR engine using convolution-based preprocessing},
|
|
author = {Sporici, Dan and Cușnir, Elena and Boiangiu, Costin-Anton},
|
|
year = 2020,
|
|
journal = {Symmetry},
|
|
publisher = {MDPI},
|
|
volume = 12,
|
|
number = 5
|
|
}
|
|
@online{tessdoc,
|
|
title = {Tesseract Documentation},
|
|
url = {https://tesseract-ocr.github.io/},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{tessrepo,
|
|
title = {Tesseract Repository},
|
|
author = {tesseract-ocr},
|
|
url = {https://github.com/tesseract-ocr/tesseract},
|
|
urldate = {2024-01-04},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{textract_home,
|
|
title = {Amazon Textract - Homepage},
|
|
author = {Amazon Web Services, Inc.},
|
|
url = {https://aws.amazon.com/textract},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@online{textract_pricing,
|
|
title = {Amazon Textract - Pricing},
|
|
author = {Amazon Web Services, Inc.},
|
|
url = {https://aws.amazon.com/textract/pricing/},
|
|
urldate = {2024-02-18},
|
|
date = {2023-05-23},
|
|
language = {eng}
|
|
}
|
|
@inproceedings{the_old_bailey_and_ocr,
|
|
title = {The Old Bailey and OCR: Benchmarking AWS, Azure, and GCP with 180,000 Page Images},
|
|
author = {William Ughetta and Kernighan, {Brian W.}},
|
|
year = 2020,
|
|
month = 9,
|
|
day = 29,
|
|
publisher = {Association for Computing Machinery, Inc},
|
|
doi = {10.1145/3395027.3419595},
|
|
keywords = {Amazon Web Services, Google Cloud Platform, Historical Documents, Microsoft Azure, Old Bailey, Optical Character Recognition},
|
|
language = {English (US)}
|
|
}
|
|
@inproceedings{tong1996statistical,
|
|
title = {A Statistical Approach to Automatic OCR Error Correction in Context},
|
|
author = {Tong, Xiang and Evans, David A.},
|
|
year = 1996,
|
|
month = 6,
|
|
booktitle = {Fourth Workshop on Very Large Corpora},
|
|
publisher = {Association for Computational Linguistics},
|
|
address = {Herstmonceux Castle, Sussex, UK},
|
|
url = {https://aclanthology.org/W96-0108},
|
|
editor = {Scott, Donia}
|
|
}
|
|
@image{unimodal-histogram,
|
|
title = {Histogram of tips given in a restaurant},
|
|
author = {Wikimedia Commons},
|
|
year = 2014,
|
|
url = {https://commons.wikimedia.org/wiki/File:Tips-histogram1.png},
|
|
urldate = {2024-02-18}
|
|
}
|
|
@inproceedings{wang2003word,
|
|
title = {Is word error rate a good indicator for spoken language understanding accuracy},
|
|
author = {Wang, Ye-Yi and Acero, Alex and Chelba, Ciprian},
|
|
year = 2003,
|
|
booktitle = {2003 IEEE workshop on automatic speech recognition and understanding (IEEE Cat. No. 03EX721)},
|
|
pages = {577--582},
|
|
organization = {IEEE}
|
|
}
|
|
@article{wilbur1992automatic,
|
|
title = {The automatic identification of stop words},
|
|
author = {Wilbur, W John and Sirotkin, Karl},
|
|
year = 1992,
|
|
journal = {Journal of information science},
|
|
publisher = {Sage Publications Sage CA: Thousand Oaks, CA},
|
|
volume = 18,
|
|
number = 1
|
|
}
|
|
@article{zack1977automatic,
|
|
title = {Automatic measurement of sister chromatid exchange frequency.},
|
|
author = {Zack, Gregory W and Rogers, William E and Latt, Samuel A},
|
|
year = 1977,
|
|
journal = {Journal of Histochemistry \& Cytochemistry},
|
|
publisher = {SAGE Publications Sage CA: Los Angeles, CA},
|
|
volume = 25,
|
|
number = 7
|
|
}
|
|
@inproceedings{seta2009digital,
|
|
title={Digital image interpolation method using higher-order Hermite interpolating polynomials with compact finite-difference},
|
|
author={Seta, Ryo and Okubo, Kan and Tagawa, Norio},
|
|
booktitle={Proceedings: APSIPA ASC 2009: Asia-Pacific Signal and Information Processing Association, 2009 Annual Summit and Conference},
|
|
pages={406--409},
|
|
year={2009},
|
|
organization={Asia-Pacific Signal and Information Processing Association}
|
|
}
|
|
@article{briand2018theory,
|
|
title={Theory and practice of image B-spline interpolation},
|
|
author={Briand, Thibaud and Monasse, Pascal},
|
|
journal={Image Processing On Line},
|
|
volume={8},
|
|
pages={99--141},
|
|
year={2018}
|
|
}
|
|
@article{unser1999splines,
|
|
title={Splines: A perfect fit for signal and image processing},
|
|
author={Unser, Michael},
|
|
journal={IEEE Signal processing magazine},
|
|
volume={16},
|
|
number={6},
|
|
pages={22--38},
|
|
year={1999},
|
|
publisher={IEEE}
|
|
}
|
|
@article{fadnavis2014image,
|
|
title={Image interpolation techniques in digital image processing: an overview},
|
|
author={Fadnavis, Shreyas},
|
|
journal={International Journal of Engineering Research and Applications},
|
|
volume={4},
|
|
number={10},
|
|
pages={70--73},
|
|
year={2014}
|
|
}
|
|
@inproceedings{liu2006multiscale,
|
|
title={Multiscale edge-based text extraction from complex images},
|
|
author={Liu, Xiaoqing and Samarabandu, Jagath},
|
|
booktitle={2006 IEEE International Conference on Multimedia and Expo},
|
|
pages={1721--1724},
|
|
year={2006},
|
|
organization={IEEE}
|
|
}
|
|
@article{cayrol1982fuzzy,
|
|
title={Fuzzy pattern matching},
|
|
author={Cayrol, M and Farreny, H and Prade, H},
|
|
journal={Kybernetes},
|
|
volume={11},
|
|
number={2},
|
|
pages={103--116},
|
|
year={1982},
|
|
publisher={MCB UP Ltd}
|
|
}
|
|
@article{rakshit2010recognition,
|
|
title={Recognition of handwritten textual annotations using tesseract open source ocr engine for information just in time (ijit)},
|
|
author={Rakshit, Sandip and Basu, Subhadip and Ikeda, Hisashi},
|
|
journal={arXiv preprint arXiv:1003.5893},
|
|
year={2010}
|
|
}
|
|
@article{kumar2017noise,
|
|
title={Noise removal and filtering techniques used in medical images},
|
|
author={Kumar, Nalin and Nachamai, M},
|
|
journal={Oriental Journal of Computer Science and Technology},
|
|
volume={10},
|
|
number={1},
|
|
pages={103--113},
|
|
year={2017}
|
|
}
|