bibtex cleanup

This commit is contained in:
Simon
2024-02-17 09:12:58 +01:00
parent 0551aa4ace
commit 72c8d4ee5a
2 changed files with 188 additions and 248 deletions
@@ -13,6 +13,6 @@ Heutige Texterkennungssysteme arbeiten oft mit einer Kombination aus neuralen Ne
\subsection{Filterung der Ergebnisdaten} \subsection{Filterung der Ergebnisdaten}
Das Themengebiet des Natural Language Processing beschäftigt sich mit der Interaktion zwischen menschlicher Sprache und Computern. Techniken aus der Informatik, Linguistik und dem maschinellen Lernen werden kombiniert, um mit menschlicher Sprache umzugehen und beispielsweise Textanalyse, Übersetzungen, Spracherkennung oder Dialogsysteme möglich zu machen \mcite{chowdhary2020natural}. Durch die große Aufmerksamkeit und die vielseitige Nutzung der Technologien -- von automatischer Rechtschreibkontrolle bis hin zu digitalen Sprachassistenten -- sowie dem Aufkommen von neuronalen Netzwerken wurden in diesem Forschungsgebiet in den letzten Jahren immer wieder Fortschritte erzielt \mcite{kalyanathaya2019advances, 10.1145/219717.219778} [TODO neu schreiben weil das ist einfach alles alte technik]. Das Themengebiet des Natural Language Processing beschäftigt sich mit der Interaktion zwischen menschlicher Sprache und Computern. Techniken aus der Informatik, Linguistik und dem maschinellen Lernen werden kombiniert, um mit menschlicher Sprache umzugehen und beispielsweise Textanalyse, Übersetzungen, Spracherkennung oder Dialogsysteme möglich zu machen \mcite{chowdhary2020natural}. Durch die große Aufmerksamkeit und die vielseitige Nutzung der Technologien -- von automatischer Rechtschreibkontrolle bis hin zu digitalen Sprachassistenten -- sowie dem Aufkommen von neuronalen Netzwerken wurden in diesem Forschungsgebiet in den letzten Jahren immer wieder Fortschritte erzielt \mcite{kalyanathaya2019advances, church1995} [TODO neu schreiben weil das ist einfach alles alte technik].
Dadurch gibt es zahlreiche wissenschaftliche Ressourcen, die als Grundlage für die Vorgehensweise zur Interpretation und Extraktion relevanter Schlagworte aus den erkannten Freitextdaten dienen. Dadurch gibt es zahlreiche wissenschaftliche Ressourcen, die als Grundlage für die Vorgehensweise zur Interpretation und Extraktion relevanter Schlagworte aus den erkannten Freitextdaten dienen.
+187 -247
View File
@@ -1,46 +1,67 @@
@book{2007Crs, @article{asif2014overview,
title = {Character recognition systems: a guide for students and practioners}, title = {An overview and applications of optical character recognition},
author = {Cheriet, Mohamed}, author = {Asif, AMAM and Hannan, Shaikh Abdul and Perwej, Yusuf and Vithalrao, Mane Arjun},
year = 2007, year = 2014,
isbn = 9780471415701, journal = {Int. J. Adv. Res. Sci. Eng},
url = {https://permalink.obvsg.at/fho/AC06408992}, volume = 3,
urldate = {2024-02-12}, number = 7
language = {eng},
keywords = {Optical character recognition devices}
} }
@book{2022Scas, @online{azurevision_home,
title = {Soft computing and signal processing: proceedings of 4th ICSCSP 2021}, title = {Azure AI Vision - Homepage},
author = {Reddy, V. Sivakumar}, url = {https://azure.microsoft.com/en-us/products/ai-services/ai-vision},
year = 2022,
series = {Advances in Intelligent Systems and Computing ;},
isbn = {981-16-7088-9},
url = {https://search-fho.obvsg.at/permalink/f/19351jn/FHO_alma5134174850004527},
urldate = {2024-02-12}, urldate = {2024-02-12},
language = {eng}, date = {2023-05-23},
keywords = {Signal processing ; Congresses} language = {eng}
} }
@book{BoochGrady1999Tuml, @online{azurevision_pricing,
title = {The unified modeling language user guide : UML}, title = {Azure AI Vision - Pricing},
author = {Booch, Grady}, url = {https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/computer-vision/},
year = 1999,
series = {Addison-Wesley object technology series},
isbn = {0201571684},
url = {https://permalink.obvsg.at/fho/AC08768402},
urldate = {2024-02-12}, urldate = {2024-02-12},
edition = {3. print..}, date = {2023-05-23},
language = {eng}, language = {eng}
keywords = {Computer software ; Development}
} }
@book{ChaudhuriArindam2017OCRS, @image{bimodal-histogram,
title = {Optical Character Recognition Systems for Different Languages with Soft Computing}, title = {Example of a histogram exhibiting bimodalty},
author = {Chaudhuri, Arindam}, author = {Wikimedia Commons},
year = 2017, year = 2014,
series = {Studies in Fuzziness and Soft Computing 352}, url = {https://commons.wikimedia.org/wiki/File:Bimodal-histogram.png},
isbn = 9783319502526, urldate = {2024-02-12}
url = {https://permalink.obvsg.at/fho/AC12323924}, }
@inbook{cc_platforms_comparison,
title = {Comparison of Different Cloud Computing Platforms for Data Analytics},
author = {Gupta, Urvashi and Sharma, Rohit},
year = 2023,
month = {09},
doi = {10.1007/978-981-99-3716-5_7},
isbn = {978-981-99-3715-8}
}
@article{chowdhary2020natural,
title = {Natural language processing},
author = {Chowdhary, KR1442 and Chowdhary, KR},
year = 2020,
journal = {Fundamentals of artificial intelligence},
publisher = {Springer}
}
@article{church1995,
title = {Commercial Applications of Natural Language Processing},
author = {Church, Kenneth W. and Rau, Lisa F.},
year = 1995,
journal = {Commun. ACM},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = 38,
number = 11,
doi = {church1995},
issn = {0001-0782},
url = {https://doi.org/church1995},
numpages = 9
}
@online{copa-data_zenon,
title = {COPA-DATA zenon - Homepage},
url = {https://www.copadata.com/en/product/zenon-software-platform-for-industrial-automation-energy-automation/},
urldate = {2024-02-12}, urldate = {2024-02-12},
language = {eng}, date = {2023-05-23},
keywords = {Engineering} language = {eng}
} }
@book{DingXiaoqing2012AiCR, @book{DingXiaoqing2012AiCR,
title = {Advances in Character Recognition}, title = {Advances in Character Recognition},
@@ -54,38 +75,16 @@
language = {eng}, language = {eng},
keywords = {Optical character recognition} keywords = {Optical character recognition}
} }
@inproceedings{Smith2007, @article{eikvil1993optical,
title = {An Overview of the Tesseract OCR Engine}, title = {Optical character recognition},
author = {Smith R.}, author = {Eikvil, Line},
url = {https://ieeexplore.ieee.org/document/4376991}, year = 1993,
urldate = {2024-02-12}, journal = {citeseer. ist. psu. edu/142042. html},
date = 2007, volume = 26
langid = {ngerman}
} }
@online{tessdoc, @online{gcv_home,
title = {Tesseract Documentation}, title = {Google Cloud Vision - Homepage},
url = {https://tesseract-ocr.github.io/}, url = {https://cloud.google.com/vision},
urldate = {2024-02-12},
date = {2023-05-23},
language = {eng}
}
@online{imagemagick,
title = {ImageMagick Homepage},
url = {https://www.imagemagick.org/},
urldate = {2024-02-12},
date = {2023-05-23},
language = {eng}
}
@online{textract_pricing,
title = {Amazon Textract - Pricing},
url = {https://aws.amazon.com/textract/pricing/},
urldate = {2024-02-12},
date = {2023-05-23},
language = {eng}
}
@online{textract_home,
title = {Amazon Textract - Homepage},
url = {https://aws.amazon.com/textract},
urldate = {2024-02-12}, urldate = {2024-02-12},
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
@@ -97,23 +96,9 @@
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
} }
@online{gcv_home, @online{imagemagick,
title = {Google Cloud Vision - Homepage}, title = {ImageMagick Homepage},
url = {https://cloud.google.com/vision}, url = {https://www.imagemagick.org/},
urldate = {2024-02-12},
date = {2023-05-23},
language = {eng}
}
@online{azurevision_pricing,
title = {Azure AI Vision - Pricing},
url = {https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/computer-vision/},
urldate = {2024-02-12},
date = {2023-05-23},
language = {eng}
}
@online{azurevision_home,
title = {Azure AI Vision - Homepage},
url = {https://azure.microsoft.com/en-us/products/ai-services/ai-vision},
urldate = {2024-02-12}, urldate = {2024-02-12},
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
@@ -125,9 +110,92 @@
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
} }
@online{copa-data_zenon, @article{islam2017survey,
title = {COPA-DATA zenon - Homepage}, title = {A survey on optical character recognition systems},
url = {https://www.copadata.com/en/product/zenon-software-platform-for-industrial-automation-energy-automation/}, author = {Islam, Islam, Noor},
year = 2017,
journal = {arXiv preprint arXiv:1710.05703}
}
@article{kalyanathaya2019advances,
title = {Advances in natural language processing: a survey of current research trends, development tools and industry applications},
author = {Kalyanathaya, Krishna Prakash and Akila, D and Rajesh, P},
year = 2019,
journal = {International Journal of Recent Technology and Engineering},
volume = 7,
number = {5C}
}
@article{kapur1985new,
title = {A new method for gray-level picture thresholding using the entropy of the histogram},
author = {Kapur, Jagat Narain and Sahoo, Prasanna K and Wong, Andrew KC},
year = 1985,
journal = {Computer vision, graphics, and image processing},
publisher = {Elsevier},
volume = 29,
number = 3
}
@inproceedings{karpinski2018metrics,
title = {Metrics for complete evaluation of ocr performance},
author = {Karpinski, Romain and Lohani, Devashish and Belaid, Abdel},
year = 2018,
booktitle = {IPCV'18-The 22nd Int'l Conf on Image Processing, Computer Vision, \& Pattern Recognition}
}
@inproceedings{levenshtein1966binary,
title = {Binary codes capable of correcting deletions, insertions, and reversals},
author = {Levenshtein}
}
@article{mursari2021effectiveness,
title = {The effectiveness of image preprocessing on digital handwritten scripts recognition with the implementation of OCR Tesseract},
author = {Mursari, Lily Rojabiyati and Wibowo, Antoni},
year = 2021,
journal = {Computer Engineering and Applications Journal},
volume = 10,
number = 3
}
@article{otsu1979threshold,
title = {A threshold selection method from gray-level histograms},
author = {Otsu, Nobuyuki},
year = 1979,
journal = {IEEE transactions on systems, man, and cybernetics},
publisher = {IEEE},
volume = 9,
number = 1
}
@inproceedings{park2008empirical,
title = {An empirical analysis of word error rate and keyword error rate.},
author = {Park, Youngja and Patwardhan, Siddharth and Visweswariah, Karthik and Gates, Stephen C},
year = 2008,
booktitle = {Interspeech},
volume = 2008
}
@article{sahoo1988survey,
title = {A survey of thresholding techniques},
author = {Sahoo, Prasanna K and Soltani, SAKC and Wong, Andrew KC},
year = 1988,
journal = {Computer vision, graphics, and image processing},
publisher = {Elsevier},
volume = 41,
number = 2
}
@inproceedings{Smith2007,
title = {An Overview of the Tesseract OCR Engine},
author = {Smith R.},
url = {https://ieeexplore.ieee.org/document/4376991},
urldate = {2024-02-12},
date = 2007,
langid = {ngerman}
}
@article{sporici2020improving,
title = {Improving the accuracy of Tesseract 4.0 OCR engine using convolution-based preprocessing},
author = {Sporici, Dan and Cușnir, Elena and Boiangiu, Costin-Anton},
year = 2020,
journal = {Symmetry},
publisher = {MDPI},
volume = 12,
number = 5
}
@online{tessdoc,
title = {Tesseract Documentation},
url = {https://tesseract-ocr.github.io/},
urldate = {2024-02-12}, urldate = {2024-02-12},
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
@@ -139,25 +207,16 @@
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
} }
@article{asif2014overview, @online{textract_home,
title = {An overview and applications of optical character recognition}, title = {Amazon Textract - Homepage},
author = {Asif, AMAM and Hannan, Shaikh Abdul and Perwej, Yusuf and Vithalrao, Mane Arjun}, url = {https://aws.amazon.com/textract},
year = 2014, urldate = {2024-02-12},
journal = {Int. J. Adv. Res. Sci. Eng}, date = {2023-05-23},
volume = 3, language = {eng}
number = 7,
} }
@inbook{cc_platforms_comparison, @online{textract_pricing,
title = {“Comparison of Different Cloud Computing Platforms for Data Analytics”}, title = {Amazon Textract - Pricing},
author = {Gupta, Urvashi and Sharma, Rohit}, url = {https://aws.amazon.com/textract/pricing/},
year = 2023,
month = {09},
doi = {10.1007/978-981-99-3716-5_7},
isbn = {978-981-99-3715-8}
}
@online{tessdoc,
title = {Tesseract Documentation},
url = {https://tesseract-ocr.github.io/},
urldate = {2024-02-12}, urldate = {2024-02-12},
date = {2023-05-23}, date = {2023-05-23},
language = {eng} language = {eng}
@@ -173,72 +232,18 @@
keywords = {Amazon Web Services, Google Cloud Platform, Historical Documents, Microsoft Azure, Old Bailey, Optical Character Recognition}, keywords = {Amazon Web Services, Google Cloud Platform, Historical Documents, Microsoft Azure, Old Bailey, Optical Character Recognition},
language = {English (US)} language = {English (US)}
} }
@article{eikvil1993optical,
title = {Optical character recognition},
author = {Eikvil, Line},
year = 1993,
journal = {citeseer. ist. psu. edu/142042. html},
volume = 26
}
@article{islam2017survey,
title = {A survey on optical character recognition systems},
author = {Islam, Islam, Noor},
year = 2017,
journal = {arXiv preprint arXiv:1710.05703}
}
@article{chowdhary2020natural,
title = {Natural language processing},
author = {Chowdhary, KR1442 and Chowdhary, KR},
year = 2020,
journal = {Fundamentals of artificial intelligence},
publisher = {Springer},
}
@article{10.1145/219717.219778,
title = {Commercial Applications of Natural Language Processing},
author = {Church, Kenneth W. and Rau, Lisa F.},
year = 1995,
journal = {Commun. ACM},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = 38,
number = 11,
doi = {10.1145/219717.219778},
issn = {0001-0782},
url = {https://doi.org/10.1145/219717.219778},
numpages = 9
}
@article{kalyanathaya2019advances,
title = {Advances in natural language processing: a survey of current research trends, development tools and industry applications},
author = {Kalyanathaya, Krishna Prakash and Akila, D and Rajesh, P},
year = 2019,
journal = {International Journal of Recent Technology and Engineering},
volume = 7,
number = {5C},
}
@inproceedings{tong1996statistical, @inproceedings{tong1996statistical,
title = {A statistical approach to automatic OCR error correction in context}, title = {A statistical approach to automatic OCR error correction in context},
author = {Tong, Xiang and Evans, David A}, author = {Tong, Xiang and Evans, David A},
year = 1996, year = 1996,
booktitle = {Fourth workshop on very large corpora} % todo booktitle = {Fourth workshop on very large corpora}
} }
@inproceedings{karpinski2018metrics, @image{unimodal-histogram,
title = {Metrics for complete evaluation of ocr performance}, title = {Histogram of tips given in a restaurant},
author = {Karpinski, Romain and Lohani, Devashish and Belaid, Abdel}, author = {Wikimedia Commons},
year = 2018, year = 2014,
booktitle = {IPCV'18-The 22nd Int'l Conf on Image Processing, Computer Vision, \& Pattern Recognition} url = {https://commons.wikimedia.org/wiki/File:Tips-histogram1.png},
} urldate = {2024-02-12}
@article{approximate_string_matching,
title = {A Guided Tour to Approximate String Matching},
author = {Navarro, Gonzalo},
year = 2000,
month = {04},
journal = {ACM Computing Surveys},
volume = 33,
doi = {10.1145/375360.375365}
}
@inproceedings{levenshtein1966binary,
title = {Binary codes capable of correcting deletions, insertions, and reversals},
author = {Levenshtein} % todo
} }
@inproceedings{wang2003word, @inproceedings{wang2003word,
title = {Is word error rate a good indicator for spoken language understanding accuracy}, title = {Is word error rate a good indicator for spoken language understanding accuracy},
@@ -247,86 +252,21 @@
booktitle = {2003 IEEE workshop on automatic speech recognition and understanding (IEEE Cat. No. 03EX721)}, booktitle = {2003 IEEE workshop on automatic speech recognition and understanding (IEEE Cat. No. 03EX721)},
organization = {IEEE} organization = {IEEE}
} }
@inproceedings{park2008empirical, @article{wilbur1992automatic,
title = {An empirical analysis of word error rate and keyword error rate.}, title = {The automatic identification of stop words},
author = {Park, Youngja and Patwardhan, Siddharth and Visweswariah, Karthik and Gates, Stephen C}, author = {Wilbur, W John and Sirotkin, Karl},
year = 2008, year = 1992,
booktitle = {Interspeech}, journal = {Journal of information science},
volume = 2008, publisher = {Sage Publications Sage CA: Thousand Oaks, CA},
} volume = 18,
@article{sporici2020improving, number = 1
title = {Improving the accuracy of Tesseract 4.0 OCR engine using convolution-based preprocessing},
author = {Sporici, Dan and Cușnir, Elena and Boiangiu, Costin-Anton},
year = 2020,
journal = {Symmetry},
publisher = {MDPI},
volume = 12,
number = 5,
}
@article{mursari2021effectiveness,
title = {The effectiveness of image preprocessing on digital handwritten scripts recognition with the implementation of OCR Tesseract},
author = {Mursari, Lily Rojabiyati and Wibowo, Antoni},
year = 2021,
journal = {Computer Engineering and Applications Journal},
volume = 10,
number = 3,
}
@image{unimodal-histogram,
author = "Wikimedia Commons",
title = "Histogram of tips given in a restaurant",
year = "2014",
urldate = {2024-02-12},
url = "https://commons.wikimedia.org/wiki/File:Tips-histogram1.png",
}
@image{bimodal-histogram,
author = "Wikimedia Commons",
title = "Example of a histogram exhibiting bimodalty",
year = "2014",
urldate = {2024-02-12},
url = "https://commons.wikimedia.org/wiki/File:Bimodal-histogram.png",
} }
@article{zack1977automatic, @article{zack1977automatic,
title={Automatic measurement of sister chromatid exchange frequency.}, title = {Automatic measurement of sister chromatid exchange frequency.},
author={Zack, Gregory W and Rogers, William E and Latt, Samuel A}, author = {Zack, Gregory W and Rogers, William E and Latt, Samuel A},
journal={Journal of Histochemistry \& Cytochemistry}, year = 1977,
volume={25}, journal = {Journal of Histochemistry \& Cytochemistry},
number={7}, publisher = {SAGE Publications Sage CA: Los Angeles, CA},
year={1977}, volume = 25,
publisher={SAGE Publications Sage CA: Los Angeles, CA} number = 7
}
@article{kapur1985new,
title={A new method for gray-level picture thresholding using the entropy of the histogram},
author={Kapur, Jagat Narain and Sahoo, Prasanna K and Wong, Andrew KC},
journal={Computer vision, graphics, and image processing},
volume={29},
number={3},
year={1985},
publisher={Elsevier}
}
@article{otsu1979threshold,
title={A threshold selection method from gray-level histograms},
author={Otsu, Nobuyuki},
journal={IEEE transactions on systems, man, and cybernetics},
volume={9},
number={1},
year={1979},
publisher={IEEE}
}
@article{sahoo1988survey,
title={A survey of thresholding techniques},
author={Sahoo, Prasanna K and Soltani, SAKC and Wong, Andrew KC},
journal={Computer vision, graphics, and image processing},
volume={41},
number={2},
year={1988},
publisher={Elsevier}
}
@article{wilbur1992automatic,
title={The automatic identification of stop words},
author={Wilbur, W John and Sirotkin, Karl},
journal={Journal of information science},
volume={18},
number={1},
year={1992},
publisher={Sage Publications Sage CA: Thousand Oaks, CA}
} }