Split PDF file in C# and VB.NET
With GemBox.Pdf, you can split a PDF file into several PDF files in your C# or VB.NET application.
As PDF pages are self-contained (all information required for their appearance and interactivity is either stored or referenced from the page), they can be easily cloned to other PDF files.
To split a PDF file, you simply need to clone each of its pages into a new PDF file.
The following example shows how you can split a PDF file so that each page goes into its own PDF file.
using GemBox.Pdf;
using System.IO;
using System.IO.Compression;
class Program
{
static void Main()
{
// If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY");
// Open a source PDF file and create a destination ZIP file.
using (var source = PdfDocument.Load("%InputFileName%"))
using (var archiveStream = File.OpenWrite("Output.zip"))
using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create))
{
// Iterate through the PDF pages.
for (int pageIndex = 0; pageIndex < source.Pages.Count; pageIndex++)
{
// Create a ZIP entry for each source document page.
var entry = archive.CreateEntry($"Page {pageIndex + 1}.pdf");
// Save each page as a separate destination document to the ZIP entry.
using (var entryStream = entry.Open())
using (var destination = new PdfDocument())
{
destination.Pages.AddClone(source.Pages[pageIndex]);
destination.Save(entryStream);
}
}
}
}
}
Imports GemBox.Pdf
Imports System.IO
Imports System.IO.Compression
Module Program
Sub Main()
' If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY")
' Open a source PDF file and create a destination ZIP file.
Using source = PdfDocument.Load("%InputFileName%")
Using archiveStream = File.OpenWrite("Output.zip")
Using archive = New ZipArchive(archiveStream, ZipArchiveMode.Create)
' Iterate through the PDF pages.
For pageIndex As Integer = 0 To source.Pages.Count - 1
' Create a ZIP entry for each source document page.
Dim entry = archive.CreateEntry($"Page {pageIndex + 1}.pdf")
' Save each page as a separate destination document to the ZIP entry.
Using entryStream = entry.Open()
Using destination = New PdfDocument()
destination.Pages.AddClone(source.Pages(pageIndex))
destination.Save(entryStream)
End Using
End Using
Next
End Using
End Using
End Using
End Sub
End Module
In the above example, the uploaded PDF file is split so that each page is cloned to a new PdfDocument
, which is then saved to a ZIP archive that gets saved to a file.
If you want to clone a PDF page to an existing Besides splitting a PDF file by page, you can also split a PDF file by page ranges. The following example shows how you can create multiple PDF files by splitting a source PDF file with the specified ranges of page numbers. A PDF file may contain bookmarks that indicate different document sections. The following example shows how you can split such a PDF file by sections using the document's bookmarks. The The following example shows how you can split a very large PDF file into smaller PDF files by utilizing the PdfDocument
, see this Cloning example.Split PDF file by page ranges
using GemBox.Pdf;
using System;
using System.IO;
using System.IO.Compression;
class Program
{
static void Main()
{
// If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY");
// List of page numbers used for splitting the PDF document.
var pageRanges = new[]
{
new { FirstPageIndex = 0, LastPageIndex = 2 },
new { FirstPageIndex = 3, LastPageIndex = 3 },
new { FirstPageIndex = 4, LastPageIndex = 6 }
};
// Open a source PDF file and create a destination ZIP file.
using (var source = PdfDocument.Load("%#Chapters.pdf%"))
using (var archiveStream = File.OpenWrite("OutputRanges.zip"))
using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create))
{
// Iterate through page ranges.
foreach (var pageRange in pageRanges)
{
int pageIndex = pageRange.FirstPageIndex;
int pageCount = Math.Min(pageRange.LastPageIndex + 1, source.Pages.Count);
var entry = archive.CreateEntry($"Pages {pageIndex + 1}-{pageCount}.pdf");
using (var entryStream = entry.Open())
using (var destination = new PdfDocument())
{
// Add range of source pages to destination document.
while (pageIndex < pageCount)
destination.Pages.AddClone(source.Pages[pageIndex++]);
// Save destination document to the ZIP entry.
destination.Save(entryStream);
}
}
}
}
}
Imports GemBox.Pdf
Imports System
Imports System.IO
Imports System.IO.Compression
Module Program
Sub Main()
' If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY")
' List of page numbers used for splitting the PDF document.
Dim pageRanges = {
New With {.FirstPageIndex = 0, .LastPageIndex = 2},
New With {.FirstPageIndex = 3, .LastPageIndex = 3},
New With {.FirstPageIndex = 4, .LastPageIndex = 6}
}
' Open a source PDF file and create a destination ZIP file.
Using source = PdfDocument.Load("%#Chapters.pdf%")
Using archiveStream = File.OpenWrite("OutputRanges.zip")
Using archive = New ZipArchive(archiveStream, ZipArchiveMode.Create)
' Iterate through page ranges.
For Each pageRange In pageRanges
Dim pageIndex As Integer = pageRange.FirstPageIndex
Dim pageCount As Integer = Math.Min(pageRange.LastPageIndex + 1, source.Pages.Count)
Dim entry = archive.CreateEntry($"Pages {pageIndex + 1}-{pageCount}.pdf")
Using entryStream = entry.Open()
Using destination = New PdfDocument()
' Add range of source pages to destination document.
While pageIndex < pageCount
destination.Pages.AddClone(source.Pages(pageIndex))
pageIndex = pageIndex + 1
End While
' Save destination document to the ZIP entry.
destination.Save(entryStream)
End Using
End Using
Next
End Using
End Using
End Using
End Sub
End Module
Split PDF file by bookmarks (outlines)
using GemBox.Pdf;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
class Program
{
static void Main()
{
// If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY");
using (var source = PdfDocument.Load("%#Chapters.pdf%"))
using (var archiveStream = File.OpenWrite("OutputBookmarks.zip"))
using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create))
{
Dictionary<PdfPage, int> pageIndexes = source.Pages
.Select((page, index) => new { page, index })
.ToDictionary(item => item.page, item => item.index);
// Iterate through document outlines.
var outlines = source.Outlines;
for (int index = 0; index < outlines.Count; ++index)
{
var currentOutline = outlines[index];
var nextOutline = index + 1 < outlines.Count ? outlines[index + 1] : null;
int pageIndex = pageIndexes[currentOutline.Destination.Page];
int pageCount = nextOutline == null ? source.Pages.Count : pageIndexes[nextOutline.Destination.Page];
var entry = archive.CreateEntry($"{currentOutline.Title}.pdf");
using (var entryStream = entry.Open())
using (var destination = new PdfDocument())
{
// Add source pages from current bookmark till next bookmark to destination document.
while (pageIndex < pageCount)
destination.Pages.AddClone(source.Pages[pageIndex++]);
// Save destination document to the ZIP entry.
destination.Save(entryStream);
}
}
}
}
}
Imports GemBox.Pdf
Imports System.Collections.Generic
Imports System.IO
Imports System.IO.Compression
Imports System.Linq
Module Program
Sub Main()
' If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY")
' Open a source PDF file and create a destination ZIP file.
Using source = PdfDocument.Load("%#Chapters.pdf%")
Using archiveStream = File.OpenWrite("Output.zip")
Using archive = New ZipArchive(archiveStream, ZipArchiveMode.Create)
Dim pageIndexes As Dictionary(Of PdfPage, Integer) = source.Pages _
.Select(Function(page, index) New With {page, index}) _
.ToDictionary(Function(item) item.page, Function(item) item.index)
' Iterate through document outlines.
Dim outlines = source.Outlines
For index As Integer = 0 To outlines.Count - 1
Dim currentOutline = outlines(index)
Dim nextOutline = If(index + 1 < outlines.Count, outlines(index + 1), Nothing)
Dim pageIndex As Integer = pageIndexes(currentOutline.Destination.Page)
Dim pageCount As Integer = If(nextOutline Is Nothing, source.Pages.Count, pageIndexes(nextOutline.Destination.Page))
' Save each page as a separate destination document to the ZIP entry.
Dim entry = archive.CreateEntry($"{currentOutline.Title}.pdf")
Using entryStream = entry.Open()
Using destination = New PdfDocument()
' Add source pages from current bookmark till next bookmark to destination document.
While pageIndex < pageCount
destination.Pages.AddClone(source.Pages(pageIndex))
pageIndex = pageIndex + 1
End While
' Save destination document to the ZIP entry.
destination.Save(entryStream)
End Using
End Using
Next
End Using
End Using
End Using
End Sub
End Module
Split large PDF file
PdfPage
objects are loaded when requested for the first time and with PdfDocument.Unload
method, you can clear previously loaded pages.PdfDocument
's lazy loading and unloading.using GemBox.Pdf;
using System;
using System.IO;
class Program
{
static void Main()
{
// If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY");
using (var source = PdfDocument.Load("lorem-ipsum-1000-pages.pdf"))
{
int chunkSize = 220;
int pageIndex = 0;
int pageCount = source.Pages.Count;
while (pageIndex < pageCount)
{
// Split large PDF file into multiple PDF files of specified chunk size.
using (var destination = new PdfDocument())
{
int chunkCount = Math.Min(chunkSize + pageIndex, pageCount);
string chunkName = $"Pages {pageIndex + 1}-{chunkCount}.pdf";
while (pageIndex < chunkCount)
destination.Pages.AddClone(source.Pages[pageIndex++]);
destination.Save(Path.Combine("Split Large Pdf", chunkName));
}
// Clear previously parsed pages and thus free memory necessary for reading additional pages.
source.Unload();
}
}
}
}
Imports GemBox.Pdf
Imports System
Imports System.IO
Module Program
Sub Main()
' If using the Professional version, put your serial key below.
ComponentInfo.SetLicense("FREE-LIMITED-KEY")
Using source = PdfDocument.Load("lorem-ipsum-1000-pages.pdf")
Dim chunkSize As Integer = 220
Dim pageIndex As Integer = 0
Dim pageCount As Integer = source.Pages.Count
While pageIndex < pageCount
' Split large PDF file into multiple PDF files of specified chunk size.
Using destination = New PdfDocument()
Dim chunkCount As Integer = Math.Min(chunkSize + pageIndex, pageCount)
Dim chunkName As String = $"Pages {pageIndex + 1}-{chunkCount}.pdf"
While pageIndex < chunkCount
destination.Pages.AddClone(source.Pages(pageIndex))
pageIndex = pageIndex + 1
End While
destination.Save(Path.Combine("Split Large Pdf", chunkName))
End Using
' Clear previously parsed pages and thus free memory necessary for reading additional pages.
source.Unload()
End While
End Using
End Sub
End Module