2023-04-29 00:18:10 +02:00
package stirling.software.SPDF.controller.api ;
import java.io.ByteArrayOutputStream ;
import java.io.IOException ;
import java.io.InputStream ;
import java.nio.file.Files ;
import java.nio.file.Path ;
import java.util.ArrayList ;
import java.util.List ;
import java.util.stream.Collectors ;
import java.util.zip.ZipEntry ;
import java.util.zip.ZipOutputStream ;
import org.apache.pdfbox.pdmodel.PDDocument ;
import org.apache.pdfbox.pdmodel.PDPage ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.springframework.core.io.ByteArrayResource ;
import org.springframework.core.io.Resource ;
import org.springframework.http.HttpHeaders ;
import org.springframework.http.MediaType ;
import org.springframework.http.ResponseEntity ;
import org.springframework.web.bind.annotation.PostMapping ;
import org.springframework.web.bind.annotation.RequestParam ;
import org.springframework.web.bind.annotation.RequestPart ;
import org.springframework.web.bind.annotation.RestController ;
import org.springframework.web.multipart.MultipartFile ;
2023-05-08 16:20:04 +02:00
import io.swagger.v3.oas.annotations.Operation ;
import io.swagger.v3.oas.annotations.Parameter ;
2023-06-03 23:56:15 +02:00
import stirling.software.SPDF.utils.GeneralUtils ;
2023-06-07 15:01:37 +02:00
import stirling.software.SPDF.utils.WebResponseUtils ;
2023-05-08 16:20:04 +02:00
2023-04-29 00:18:10 +02:00
@RestController
public class SplitPDFController {
private static final Logger logger = LoggerFactory . getLogger ( SplitPDFController . class ) ;
@PostMapping ( consumes = " multipart/form-data " , value = " /split-pages " )
2023-05-08 16:20:04 +02:00
@Operation ( summary = " Split a PDF file into separate documents " ,
2023-06-24 00:29:53 +02:00
description = " This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO " )
2023-06-07 15:01:37 +02:00
public ResponseEntity < byte [ ] > splitPdf (
2023-05-08 16:20:04 +02:00
@RequestPart ( required = true , value = " fileInput " )
@Parameter ( description = " The input PDF file to be split " )
MultipartFile file ,
@RequestParam ( " pages " )
@Parameter ( description = " The pages to be included in separate documents. Specify individual page numbers (e.g., '1,3,5'), ranges (e.g., '1-3,5-7'), or 'all' for every page. " )
String pages ) throws IOException {
2023-04-29 00:18:10 +02:00
// parse user input
// open the pdf document
InputStream inputStream = file . getInputStream ( ) ;
PDDocument document = PDDocument . load ( inputStream ) ;
List < Integer > pageNumbers = new ArrayList < > ( ) ;
pages = pages . replaceAll ( " \\ s+ " , " " ) ; // remove whitespaces
if ( pages . toLowerCase ( ) . equals ( " all " ) ) {
for ( int i = 0 ; i < document . getNumberOfPages ( ) ; i + + ) {
pageNumbers . add ( i ) ;
}
} else {
2023-06-03 23:56:15 +02:00
String [ ] splitPoints = pages . split ( " , " ) ;
for ( String splitPoint : splitPoints ) {
List < Integer > orderedPages = GeneralUtils . parsePageList ( new String [ ] { splitPoint } , document . getNumberOfPages ( ) ) ;
pageNumbers . addAll ( orderedPages ) ;
2023-04-29 00:18:10 +02:00
}
2023-06-03 23:56:15 +02:00
// Add the last page as a split point
pageNumbers . add ( document . getNumberOfPages ( ) - 1 ) ;
2023-04-29 00:18:10 +02:00
}
logger . info ( " Splitting PDF into pages: {} " , pageNumbers . stream ( ) . map ( String : : valueOf ) . collect ( Collectors . joining ( " , " ) ) ) ;
// split the document
List < ByteArrayOutputStream > splitDocumentsBoas = new ArrayList < > ( ) ;
2023-06-03 23:56:15 +02:00
int previousPageNumber = 0 ;
for ( int splitPoint : pageNumbers ) {
2023-04-29 00:18:10 +02:00
try ( PDDocument splitDocument = new PDDocument ( ) ) {
2023-06-03 23:56:15 +02:00
for ( int i = previousPageNumber ; i < = splitPoint ; i + + ) {
2023-04-29 00:18:10 +02:00
PDPage page = document . getPage ( i ) ;
splitDocument . addPage ( page ) ;
logger . debug ( " Adding page {} to split document " , i ) ;
}
2023-06-03 23:56:15 +02:00
previousPageNumber = splitPoint + 1 ;
2023-04-29 00:18:10 +02:00
ByteArrayOutputStream baos = new ByteArrayOutputStream ( ) ;
splitDocument . save ( baos ) ;
splitDocumentsBoas . add ( baos ) ;
} catch ( Exception e ) {
logger . error ( " Failed splitting documents and saving them " , e ) ;
throw e ;
}
}
2023-06-03 23:56:15 +02:00
2023-04-29 00:18:10 +02:00
// closing the original document
document . close ( ) ;
Path zipFile = Files . createTempFile ( " split_documents " , " .zip " ) ;
2023-06-07 15:01:37 +02:00
String filename = file . getOriginalFilename ( ) . replaceFirst ( " [.][^.]+$ " , " " ) ;
2023-04-29 00:18:10 +02:00
try ( ZipOutputStream zipOut = new ZipOutputStream ( Files . newOutputStream ( zipFile ) ) ) {
// loop through the split documents and write them to the zip file
for ( int i = 0 ; i < splitDocumentsBoas . size ( ) ; i + + ) {
2023-06-07 15:01:37 +02:00
String fileName = filename + " _ " + ( i + 1 ) + " .pdf " ;
2023-04-29 00:18:10 +02:00
ByteArrayOutputStream baos = splitDocumentsBoas . get ( i ) ;
byte [ ] pdf = baos . toByteArray ( ) ;
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry ( fileName ) ;
zipOut . putNextEntry ( pdfEntry ) ;
zipOut . write ( pdf ) ;
zipOut . closeEntry ( ) ;
logger . info ( " Wrote split document {} to zip file " , fileName ) ;
}
} catch ( Exception e ) {
logger . error ( " Failed writing to zip " , e ) ;
throw e ;
}
logger . info ( " Successfully created zip file with split documents: {} " , zipFile . toString ( ) ) ;
byte [ ] data = Files . readAllBytes ( zipFile ) ;
Files . delete ( zipFile ) ;
// return the Resource in the response
2023-06-07 15:01:37 +02:00
return WebResponseUtils . bytesToWebResponse ( data , filename + " .zip " , MediaType . APPLICATION_OCTET_STREAM ) ;
2023-04-29 00:18:10 +02:00
}
}