tesseract源码Page Layout解读(倾斜矫正)
2017-11-03 14:19
169 查看
http://blog.csdn.net/kaelsass/article/details/46874627
http://www.jianshu.com/p/7c63fd62ea28
代码调用
代码附录
Tesseract::SegmentPage[ccmain/pagesegmain.cpp] ->
/*** Segment the page according to the current value of tessedit_pageseg_mode.
* pix_binary_ is used as the source image and should not be NULL.
* On return the blocks list owns all the constructed page layout.
*/
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr) {
ASSERT_HOST(pix_binary_ != NULL);
int width = pixGetWidth(pix_binary_);
int height = pixGetHeight(pix_binary_);
// Get page segmentation mode.
PageSegMode pageseg_mode = static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation.
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
input_file != NULL && input_file->length() > 0) {
STRING name = *input_file;
const char* lastdot = strrchr(name.string(), '.');
if (lastdot != NULL)
name[lastdot - name.string()] = '\0';
read_unlv_file(name, width, height, blocks);
}
if (blocks->empty()) {
// No UNLV file present. Work according to the PageSegMode.
// First make a single block covering the whole image.
BLOCK_IT block_it(blocks);
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block->set_right_to_left(right_to_left());
block_it.add_to_end(block);
} else {
// UNLV file present. Use PSM_SINGLE_BLOCK.
pageseg_mode = PSM_SINGLE_BLOCK;
}
// The diacritic_blobs holds noise blobs that may be diacritics. They
// are separated out on areas of the image that seem noisy and short-circuit
// the layout process, going straight from the initial partition creation
// right through to after word segmentation, where they are added to the
// rej_cblobs list of the most appropriate word. From there classification
// will determine whether they are used.
BLOBNBOX_LIST diacritic_blobs;
int auto_page_seg_ret_val = 0;
TO_BLOCK_LIST to_blocks;
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
PSM_SPARSE(pageseg_mode)) {
auto_page_seg_ret_val = AutoPageSeg(
pageseg_mode, blocks, &to_blocks,
enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);
if (pageseg_mode == PSM_OSD_ONLY)
return auto_page_seg_ret_val;
// To create blobs from the image region bounds uncomment this line:
// to_blocks.clear(); // Uncomment to go back to the old mode.
} else {
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
if (pageseg_mode == PSM_CIRCLE_WORD) {
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
if (pixcleaned != NULL) {
pixDestroy(&pix_binary_);
pix_binary_ = pixcleaned;
}
}
}
if (auto_page_seg_ret_val < 0) {
return -1;
}
if (blocks->empty()) {
if (textord_debug_tabfind)
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
bool splitting =
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
bool cjk_mode = textord_use_cjk_fp_model;
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
pix_thresholds_, pix_grey_, splitting || cjk_mode,
&diacritic_blobs, blocks, &to_blocks);
return auto_page_seg_ret_val;
}
Tesseract::SetupPageSegAndDetectOrientation[ccmain/pagesegmain.cpp] ->
* Sets up auto page segmentation, determines the orientation, and corrects it.* Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
* facilitate testing.
* photo_mask_pix is a pointer to a NULL pointer that will be filled on return
* with the leptonica photo mask, which must be pixDestroyed by the caller.
* to_blocks is an empty list that will be filled with (usually a single)
* block that is used during layout analysis. This ugly API is required
* because of the possibility of a unlv zone file.
* TODO(rays) clean this up.
* See AutoPageSeg for other arguments.
* The returned ColumnFinder must be deleted after use.
*/
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
Pix** music_mask_pix) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
TabVector_LIST h_lines;
ICOORD bleft(0, 0);
ASSERT_HOST(pix_binary_ != NULL);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "PageSegInput");
}
// Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_,
textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix,
&v_lines, &h_lines);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoLines");
}
// Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoImages");
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
// The rest of the algorithm uses the usual connected components.
textord_.find_components(pix_binary_, blocks, to_blocks);
TO_BLOCK_IT to_block_it(to_blocks);
// There must be exactly one input block.
// TODO(rays) handle new textline finding with a UNLV zone file.
ASSERT_HOST(to_blocks->singleton());
TO_BLOCK* to_block = to_block_it.data();
TBOX blkbox = to_block->block->bounding_box();
ColumnFinder* finder = NULL;
int estimated_resolution = source_resolution_;
if (source_resolution_ == kMinCredibleResolution) {
// Try to estimate resolution from typical body text size.
int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor);
if (res > estimated_resolution && res < kMaxCredibleResolution) {
estimated_resolution = res;
tprintf("Estimating resolution as %d\n", estimated_resolution);
}
}
if (to_block->line_size >= 2) {
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
blkbox.botleft(), blkbox.topright(),
estimated_resolution, textord_use_cjk_fp_model,
textord_tabfind_aligned_gap_fraction, &v_lines,
&h_lines, vertical_x, vertical_y);
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
if (equ_detect_) {
equ_detect_->LabelSpecialText(to_block);
}
BLOBNBOX_CLIST osd_blobs;
// osd_orientation is the number of 90 degree rotations to make the
// characters upright. (See osdetect.h for precise definition.)
// We want the text lines horizontal, (vertical text indicates vertical
// textlines) which may conflict (eg vertically written CJK).
int osd_orientation = 0;
bool vertical_text = textord_tabfind_force_vertical_text ||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
4000
if (!vertical_text && textord_tabfind_vertical_text &&
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
vertical_text =
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
to_block, &osd_blobs);
}
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
GenericVector<int> osd_scripts;
if (osd_tess != this) {
// We are running osd as part of layout analysis, so constrain the
// scripts to those allowed by *this.
AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
for (int s = 0; s < sub_langs_.size(); ++s) {
AddAllScriptsConverted(sub_langs_[s]->unicharset,
osd_tess->unicharset, &osd_scripts);
}
}
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
if (pageseg_mode == PSM_OSD_ONLY) {
delete finder;
return NULL;
}
osd_orientation = osr->best_result.orientation_id;
double osd_score = osr->orientations[osd_orientation];
double osd_margin = min_orientation_margin * 2;
for (int i = 0; i < 4; ++i) {
if (i != osd_orientation &&
osd_score - osr->orientations[i] < osd_margin) {
osd_margin = osd_score - osr->orientations[i];
}
}
int best_script_id = osr->best_result.script_id;
const char* best_script_str =
osd_tess->unicharset.get_script_from_script_id(best_script_id);
bool cjk = best_script_id == osd_tess->unicharset.han_sid() ||
best_script_id == osd_tess->unicharset.hiragana_sid() ||
best_script_id == osd_tess->unicharset.katakana_sid() ||
strcmp("Japanese", best_script_str) == 0 ||
strcmp("Korean", best_script_str) == 0 ||
strcmp("Hangul", best_script_str) == 0;
if (cjk) {
finder->set_cjk_script(true);
}
if (osd_margin < min_orientation_margin) {
// The margin is weak.
if (!cjk && !vertical_text && osd_orientation == 2) {
// upside down latin text is improbable with such a weak margin.
tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
"Don't rotate.\n", osd_margin);
osd_orientation = 0;
} else {
tprintf(
"OSD: Weak margin (%.2f) for %d blob text block, "
"but using orientation anyway: %d\n",
osd_margin, osd_blobs.length(), osd_orientation);
}
}
}
osd_blobs.shallow_clear();
finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
}
return finder;
}
} // namespace tesseract.
LineFinder::FindAndRemoveLines[textord/linefind.cpp]
// Finds vertical and horizontal line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vertical_x and vertical_y contain a sum of the output vectors, // thereby giving the mean vertical direction. // If pix_music_mask != NULL, and music is detected, a mask of the staves // and anything that is connected (bars, notes etc.) will be returned in // pix_music_mask, the mask subtracted from pix, and the lines will not // appear in v_lines or h_lines. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. // The detected lines are removed from the pix. void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix, int* vertical_x, int* vertical_y, Pix** pix_music_mask, TabVector_LIST* v_lines, TabVector_LIST* h_lines) { PERF_COUNT_START("FindAndRemoveLines") if (pix == NULL || vertical_x == NULL || vertical_y == NULL) { tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); return; } Pix* pix_vline = NULL; Pix* pix_non_vline = NULL; Pix* pix_hline = NULL; Pix* pix_non_hline = NULL; Pix* pix_intersections = NULL; Pixa* pixa_display = debug ? pixaCreate(0) : NULL; GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline, &pix_intersections, pix_music_mask, pixa_display); // Find lines, convert to TabVector_LIST and remove those that are used. FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y, &pix_vline, pix_non_vline, pix, v_lines); if (pix_hline != NULL) { // Recompute intersections and re-filter false positive h-lines. if (pix_vline != NULL) pixAnd(pix_intersections, pix_vline, pix_hline); else pixDestroy(&pix_intersections); if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) { pixDestroy(&pix_hline); } } FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, &pix_hline, pix_non_hline, pix, h_lines); if (pixa_display != NULL && pix_vline != NULL) pixaAddPix(pixa_display, pix_vline, L_CLONE); if (pixa_display != NULL && pix_hline != NULL) pixaAddPix(pixa_display, pix_hline, L_CLONE); if (pix_vline != NULL && pix_hline != NULL) { // Remove joins (intersections) where lines cross, and the residue. // Recalculate the intersections, since some lines have been deleted. pixAnd(pix_intersections, pix_vline, pix_hline); // Fatten up the intersections and seed-fill to get the intersection // residue. Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5); pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); // Now remove the intersection residue. pixSubtract(pix, pix, pix_join_residue); pixDestroy(&pix_join_residue); } // Remove any detected music. if (pix_music_mask != NULL && *pix_music_mask != NULL) { if (pixa_display != NULL) pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); pixSubtract(pix, pix, *pix_music_mask); } if (pixa_display != NULL) pixaAddPix(pixa_display, pix, L_CLONE); pixDestroy(&pix_vline); pixDestroy(&pix_non_vline); pixDestroy(&pix_hline); pixDestroy(&pix_non_hline); pixDestroy(&pix_intersections); if (pixa_display != NULL) { pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf"); pixaDestroy(&pixa_display); } PERF_COUNT_END
ImageFind::FindImages [textord/linefind.cpp]
// Finds image regions within the BINARY source pix (page image) and returns // the image regions as a mask image. // The returned pix may be NULL, meaning no images found. // If not NULL, it must be PixDestroyed by the caller. // If textord_tabfind_show_images, debug images are appended to pixa_debug. Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) { // Not worth looking at small images. if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixr, "CascadeReduced"); // Get the halftone mask directly from Leptonica. // // Leptonica will print an error message and return NULL if we call // pixGenHalftoneMask(pixr, NULL, ...) with too small image, so we // want to bypass that. if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) { pixDestroy(&pixr); return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); } // Get the halftone mask. l_int32 ht_found = 0; Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr; Pix* pixht2 = pixGenerateHalftoneMask(pixr, NULL, &ht_found, pixadb); if (pixadb) { Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixdb, "HalftoneMask"); pixDestroy(&pixdb); pixaDestroy(&pixadb); } pixDestroy(&pixr); if (!ht_found && pixht2 != NULL) pixDestroy(&pixht2); if (pixht2 == NULL) return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); // Expand back up again. Pix *pixht = pixExpandReplicate(pixht2, 2); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixht, "HalftoneReplicated"); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8); pixOr(pixht, pixht, pixt); pixDestroy(&pixt); // Eliminate lines and bars that may be joined to images. Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixfinemask, "FineMask"); Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); pixDestroy(&pixreduced); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); pixDestroy(&pixreduced2); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixcoarsemask, "CoarseMask"); // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); pixDestroy(&pixfinemask); // Dilate a bit to make sure we get everything. pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); pixDestroy(&pixcoarsemask); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixmask, "MaskDilated"); // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); pixDestroy(&pixmask); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixht, "FinalMask"); // Make the result image the same size as the input. Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); pixOr(result, result, pixht); pixDestroy(&pixht); return result; }
Textord::find_components [textord/tordmain.cpp]
/********************************************************************** * find_components * * Find the C_OUTLINEs of the connected components in each block, put them * in C_BLOBs, and filter them by size, putting the different size * grades on different lists in the matching TO_BLOCK in to_blocks. **********************************************************************/ void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); if (width > MAX_INT16 || height > MAX_INT16) { tprintf("Input image too large! (%d, %d)\n", width, height); return; // Can't handle it. } set_global_loc_code(LOC_EDGE_PROG); BLOCK_IT block_it(blocks); // iterator for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); if (block->poly_block() == NULL || block->poly_block()->IsText()) { extract_edges(pix, block); } } assign_blobs_to_blocks2(pix, blocks, to_blocks); ICOORD page_tr(width, height); filter_blobs(page_tr, to_blocks, !textord_test_landscape); }
相关文章推荐
- OpenCV仪表数据识别(四):图像倾斜矫正
- CorelDRAW中怎样对图像进行快速的倾斜矫正
- 名片识别(一)倾斜矫正
- [置顶] openCV-图片倾斜矫正java版
- 车牌识别--倾斜矫正
- 车牌识别--倾斜矫正
- 手写数字识别系统之倾斜矫正
- 车牌识别--倾斜矫正
- 车牌识别--倾斜矫正
- 车牌识别--倾斜矫正
- OpenCV仪表数据识别(四):图像倾斜矫正
- 图像倾斜矫正方程基本的图像变换
- 图片文档倾斜矫正算法 附完整c代码
- 图片文档倾斜矫正算法 附完整c代码
- 车牌倾斜矫正
- 倾斜物体矫正
- 车牌识别--倾斜矫正
- 手写数字识别系统之倾斜矫正
- dumpsys命令的使用及telephony.registry解读
- 用铁轨理论解读SOA