Enumerate textual PDF content elements in C# and VB.NET

With GemBox.Pdf, you can enumerate textual PDF content elements in your C# or VB.NET application.

The following example demonstrates how to enumerate textual PDF content elements in a PDF document.

Screenshot
PDF Library for C# / VB.NET Screenshot

See the full code below, use Run Example to execute.

Upload your file(Drag files here)

Download an input file

using System;
using System.Text;
using GemBox.Pdf;
using GemBox.Pdf.Content;

class Sample
{
    [STAThread]
    static void Main(string[] args)
    {
        // If using Professional version, put your serial key below.
        ComponentInfo.SetLicense("FREE-LIMITED-KEY");

        PdfDocument document = PdfDocument.Load("TextContent.pdf");

        StringBuilder sb = new StringBuilder();

        // Enumerate all pages.
        for (int i = 0; i < document.Pages.Count; i++)
        {
            sb.AppendFormat("Page number: {0}", i + 1).AppendLine();
            sb.AppendLine();

            // Retrieve a page object.
            var page = document.Pages[i];

            // Enumerate all content elements (text, paths and external objects) contained in the current page.
            foreach (var contentElement in page.Content.Elements.All())
                if (contentElement.ElementType == PdfContentElementType.Text)
                {
                    // Retrieve a text content element.
                    var textContent = (PdfTextContent)contentElement;

                    var text = textContent.ToString();
                    var font = textContent.Format.Text.Font;
                    var color = textContent.Format.Fill.Color;
                    var location = textContent.Location;

                    // Append a text content element attributes.
                    sb.AppendFormat("Unicode text: {0}", text).AppendLine();
                    sb.AppendFormat("Font name: {0}", font.Face.Family.Name).AppendLine();
                    sb.AppendFormat("Font size: {0}", font.Size).AppendLine();
                    sb.AppendFormat("Font style: {0}", font.Face.Style).AppendLine();
                    sb.AppendFormat("Font weight: {0}", font.Face.Weight).AppendLine();
                    sb.AppendFormat("Font stretch: {0}", font.Face.Stretch).AppendLine();

                    if (color.TryGetRgb(out double red, out double green, out double blue))
                        sb.AppendFormat("Color: Red={0}, Green={1}, Blue={2}", red, green, blue).AppendLine();

                    sb.AppendFormat("Location: X={0:0.00}, Y={1:0.00}", location.X, location.Y).AppendLine();
                    sb.AppendLine();
                }

        }

        document.Close();
        Console.WriteLine(sb.ToString());
    }
}
Imports System.Text
Imports GemBox.Pdf
Imports GemBox.Pdf.Content

Module Samples

    Sub Main()

        ' If using Professional version, put your serial key below.
        ComponentInfo.SetLicense("FREE-LIMITED-KEY")

        Dim document As PdfDocument = PdfDocument.Load("TextContent.pdf")

        Dim sb As New StringBuilder()

        ' Enumerate all pages.
        For i As Integer = 0 To document.Pages.Count - 1
            sb.AppendFormat("Page number: {0}", i + 1).AppendLine()
            sb.AppendLine()

            ' Retrieve a page object.
            Dim page = document.Pages(i)

            ' Enumerate all content elements (text, paths and external objects) contained in the current page.
            For Each contentElement In page.Content.Elements.All()
                If contentElement.ElementType = PdfContentElementType.Text Then
                    ' Retrieve a text content element.
                    Dim textContentElement = DirectCast(contentElement, PdfTextContent)

                    Dim text = textContentElement.ToString()
                    Dim font = textContentElement.Format.Text.Font
                    Dim color = textContentElement.Format.Fill.Color
                    Dim location = textContentElement.Location

                    ' Append a text content element attributes.
                    sb.AppendFormat("Unicode text: {0}", text).AppendLine()
                    sb.AppendFormat("Font name: {0}", font.Face.Family.Name).AppendLine()
                    sb.AppendFormat("Font size: {0}", font.Size).AppendLine()
                    sb.AppendFormat("Font style: {0}", font.Face.Style).AppendLine()
                    sb.AppendFormat("Font weight: {0}", font.Face.Weight).AppendLine()
                    sb.AppendFormat("Font stretch: {0}", font.Face.Stretch).AppendLine()

                    Dim red As Double = Nothing
                    Dim green As Double = Nothing
                    Dim blue As Double = Nothing
                    If color.TryGetRgb(red, green, blue) Then
                        sb.AppendFormat("Color: Red={0}, Green={1}, Blue={2}", red, green, blue).AppendLine()
                    End If

                    sb.AppendFormat("Location: X={0:0.00}, Y={1:0.00}", location.X, location.Y).AppendLine()
                    sb.AppendLine()
                End If
            Next
        Next

        document.Close()
        Console.WriteLine(sb.ToString())

    End Sub

End Module