VintaSoft PDF .NET Plug-in Discussions
Questions, comments and suggestions concerning VintaSoft PDF .NET Plug-in.
Board index < VintaSoft Imaging < VintaSoft PDF .NET Plug-in Discussions
----------------------------------------------------------------------------------------------------------------------------------------------
Private imageViewer1 As Vintasoft.Imaging.ImageViewer
Private _fileStream As System.IO.Stream = Nothing
Private _document As Vintasoft.Pdf.PdfDocument
Private _useEmbeddedThumbnails As Boolean = True
Private _titlePrefix As String = "VintaSoft PDF Reader Demo v3.1 {0}"
Private _isFirstOpenedPage As Boolean = False
_fileStream = New System.IO.FileStream(strArchivoPDF, System.IO.FileMode.Open, System.IO.FileAccess.Read)
_document = Vintasoft.Pdf.PdfDocumentController.OpenDocument(_fileStream)
_document.RenderingSettings.DrawPdfAnnotations = True
_document.RenderingSettings.DrawVintasoftAnnotations = True
_document.RenderingSettings.UseEmbeddedThumbnails = _useEmbeddedThumbnails
_fileStream.Position = 0
Dim openFileThread As New System.Threading.Thread(AddressOf OpenFileAsynchronously)
openFileThread.Start()
_isFirstOpenedPage = True
Text = String.Format(_titlePrefix, String.Format("- {0}", System.IO.Path.GetFileName(strArchivoPDF)))
If _document.IsEncrypted Then
Text += " (SECURED)"
End If
' #############################
' Let's go to generate JPG's...
' #############################
firstPage = 1
lastPage = _document.Pages.Count
For pageNo = 1 To _document.Pages.Count
strNombreArchivoJPG = "PAG_" & Format(pageNo, "000") & ".jpg"
MsgBox(_document.Pages(pageNo).TextRegion.TextContent)
'we need to explore the position of each word in the text
.../...
Next
----------------------------------------------------------------------------------------------------------------------------------------------
We have been reviewing the help, not just find a way to do this easily with TextRegion, Subregion, LineSubregion, GetWordSubRegion ???Imports System.Drawing
Imports Vintasoft.Pdf
Imports Vintasoft.Pdf.Tree
Imports Vintasoft.Pdf.Content.TextExtraction
Module Module1
Sub Main()
' open existing PDF document
Dim pdfDocument As New PdfDocument("d:\PdfTest.pdf")
' for each PDF page
For i As Integer = 0 To pdfDocument.Pages.Count - 1
Console.WriteLine(String.Format("PDF page {0}", i))
GetInfoAboutWordsOfPdfPage(pdfDocument.Pages(i))
Next
' free resources
pdfDocument.Dispose()
Console.ReadLine()
End Sub
Public Sub GetInfoAboutWordsOfPdfPage(ByVal page As PdfPage)
Dim region As TextRegion = page.TextRegion
' for each text line on PDF page
For i As Integer = 0 To region.Lines.Length - 1
Dim line As TextRegionLine = region.Lines(i)
Console.WriteLine(String.Format(" Line{0}", i))
Dim wordIndexInLine As Integer = 0
Dim wordString As String = ""
Dim wordStartSymbolIndex As Integer = 0
Dim wordEndSymbolIndex As Integer = 0
Dim curentSymbol As TextRegionSymbol
' for each symbol in text line
For j As Integer = 0 To line.Symbols.Length - 1
curentSymbol = line.Symbols(j)
wordString = String.Format("{0}{1}", wordString, curentSymbol.TextSymbol.Symbol)
If curentSymbol.TextSymbol.Symbol = " " Then
Dim wordX As Single = line.Symbols(wordStartSymbolIndex).Region.LeftTop.X
Dim wordY As Single = line.Symbols(wordStartSymbolIndex).Region.LeftTop.Y
Dim wordWidth As Single = line.Symbols(wordEndSymbolIndex).Region.RightBottom.X - wordX
Dim wordHeight As Single = line.Symbols(wordEndSymbolIndex).Region.RightBottom.Y - wordY
Console.WriteLine(String.Format(" Word{0}={1}, X={2}, Y={3}, Width={4}, Height={5}", _
wordIndexInLine, _
wordString, _
wordX, wordY, wordWidth, wordHeight))
wordIndexInLine = wordIndexInLine + 1
wordString = ""
wordStartSymbolIndex = j + 1
wordEndSymbolIndex = j + 1
End If
Next
Next
End Sub
End Module
Best regards, Alexander