PDF Base - Ps

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 12

% Copyright (C) 1994, 1996, 1997, 1998, 1999, 2000 Aladdin Enterprises.

All
rights reserved.
%
% This software is provided AS-IS with no warranty, either express or
% implied.
%
% This software is distributed under license and may not be copied,
% modified or distributed except as expressly authorized under the terms
% of the license contained in the file LICENSE in this distribution.
%
% For more information about licensing, please refer to
% http://www.ghostscript.com/licensing/. For information on
% commercial licensing, go to http://www.artifex.com/licensing/ or
% contact Artifex Software, Inc., 101 Lucas Valley Road #110,
% San Rafael, CA 94903, U.S.A., +1(415)492-9861.
% $Id: pdf_base.ps,v 1.30 2002/11/01 20:00:12 alexcher Exp $
% pdf_base.ps
% Basic parser for PDF reader.
% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% streams, and name/number trees; it doesn't include any facilities for
% making marks on the page.
/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin
% Define the name interpretation dictionary for reading values.
/valueopdict mark
(<<) cvn { mark } bind % don't push an actual mark!
(>>) cvn { { .dicttomark } stopped {
( **** File has an unbalanced >> \(close dictionary\).\n)
pdfformaterror
} if
} bind
([) cvn { mark } bind % ditto
(]) cvn dup load
% /true true % see .pdfexectoken below
% /false false % ibid.
% /null null % ibid.
/F dup cvx % see Objects section below
/R dup cvx % see Objects section below
/stream dup cvx % see Streams section below
.dicttomark readonly def
% ------ Utilities ------ %
% Define a scratch string. The PDF language definition says that
% no line in a PDF file can exceed 255 characters.
/pdfstring 255 string def
% Read the previous line of a file. If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline % - prevline <startpos> <substring>
{ PDFfile fileposition dup () pdfstring
2 index 257 sub 0 .max PDFfile exch setfileposition
{ % Stack: initpos linepos line string
PDFfile fileposition
PDFfile 2 index readline pop
dup length 0 gt
{ 3 2 roll 5 -2 roll pop pop 2 index }
{ pop }
ifelse
% Stack: initpos linepos line string startpos
PDFfile fileposition 5 index ge { exit } if
pop
}
loop pop pop 3 -1 roll pop
} bind def
% Handle the PDF 1.2 #nn escape convention when reading from a file.
% This should eventually be done in C.
/.pdffixname { % <execname> .pdffixname <execname'>
PDFversion 1.2 ge {
dup .namestring (#) search {
name#escape cvn exch pop
} {
pop
} ifelse
} if
} bind def
/name#escape % <post> <(#)> <pre> name#escape <string>
{ exch pop
1 index 2 () /SubFileDecode filter dup (x) readhexstring
% Stack: post pre stream char t/f
not { /.pdftoken cvx /syntaxerror signalerror } if
exch closefile concatstrings
exch 2 1 index length 2 sub getinterval
(#) search { name#escape } if concatstrings
} bind def
% Execute a file, interpreting its executable names in a given
% dictionary. The name procedures may do whatever they want
% to the operand stack.
/.pdftokenerror { % <count> <opdict> <errtoken> .pdftokenerror -
BXlevel 0 le {
( **** Unknown operator: ') pdfformaterror
dup =string cvs pdfformaterror
% Attempt a retry scan of the element after changing to PDFScanRules
<< /PDFScanRules true >> setuserparams
=string cvs
token pop exch pop dup type
dup /integertype eq exch /realtype eq or {
exch pop exch pop
(', processed as number, value: ) pdfformaterror
dup =string cvs pdfformaterror (\n) pdfformaterror
false % suppress any stack cleanup
} {
% error was non-recoverable with modified scanning rules
(\n) pdfformaterror
true
} ifelse
} {
true
} ifelse
{ % clean up the operand stack if this was non-recoverable
pop pop count exch sub { pop } repeat % pop all the operands
} if
} bind def
/.pdfexectoken { % <count> <opdict> <exectoken> .pdfexectoken ?
DEBUG { dup == flush } if
2 copy .knownget {
exch pop exch pop exch pop exec
} {
% Normally, true, false, and null would appear in opdict
% and be treated as "operators". However, there is a
% special fast case in the PostScript interpreter for names
% that are defined in, and only in, systemdict and/or
% userdict: putting these three names in the PDF dictionaries
% destroys this property for them, slowing down their
% interpretation in all PostScript code. Therefore, we
% check for them explicitly here instead.
dup dup dup /true eq exch /false eq or exch /null eq or {
exch pop exch pop //systemdict exch get
} {
.pdftokenerror
} ifelse
} ifelse
} bind def
/.pdfrun { % <file> <opdict> .pdfrun -
% Construct a procedure with the stack depth, file and opdict
% bound into it.
1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
{ % Stack: ..operands.. count opdict file
token {
dup type /nametype eq {
dup xcheck {
.pdfexectoken
} {
.pdffixname
exch pop exch pop DEBUG { dup ==only ( ) print flush } if
} ifelse
} {
exch pop exch pop DEBUG { dup ==only ( ) print flush } if
} ifelse
} {
(%%EOF) cvn cvx .pdfexectoken
} ifelse
}
aload pop .packtomark cvx
/loop cvx 2 packedarray cvx
{ stopped /PDFsource } aload pop
PDFsource
{ store { stop } if } aload pop .packtomark cvx
/PDFsource 3 -1 roll store exec
} bind def
% Execute a file, like .pdfrun, for a marking context.
% This temporarily rebinds LocalResources and DefaultMatrix.
/.pdfruncontext { % <resdict> <file> <opdict> .pdfruncontext -
/.pdfrun load LocalResources DefaultMatrix
/LocalResources 7 -1 roll store
/DefaultMatrix matrix currentmatrix store
3 .execn
/DefaultMatrix exch store
/LocalResources exch store
} bind def
% Get the depth of the PDF operand stack. The caller sets pdfemptycount
% before calling .pdfrun or .pdfruncontext. It is initially set by
% pdf_main, and is also set by any routine which changes the operand
% stack depth (currently .pdfpaintproc, although there are other callers
% of .pdfrun{context} which have not been checked for opstack depth.
/.pdfcount { % - .pdfcount <count>
count pdfemptycount sub
} bind def
% ------ File reading ------ %
% Read the cross-reference entry for an (unresolved) object.
% The caller must save and restore the PDFfile position if desired.
% For invalid (free) objects, we return 0.
/readxrefentry % <object#> readxrefentry <objpos>
{ dup Objects exch lget
PDFfile exch setfileposition
PDFfile token pop % object position
PDFfile token pop % generation #
PDFfile token pop % n or f
dup /n eq
{ pop 1 add dup 255 gt
{ Generations ltype /stringtype eq
{ % Convert Generations from a string to an array.
larray Generations llength lgrowto dup
0 1 2 index llength 1 sub
{ Generations 1 index lget lput dup
}
for pop /Generations exch store
}
if
}
if
}
{ /f eq
{ pop 0 }
{ /readxrefentry cvx /syntaxerror signalerror }
ifelse
}
ifelse
% Stack: obj# objpos 1+gen#
Generations 4 -1 roll 3 -1 roll lput
} bind def
% ================================ Objects ================================ %
% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray { % - larray <larray>
[ [] ]
} bind def
/lstring { % - lstring <lstring>
[ () ]
} bind def
/ltype { % <lseq> type <type>
0 get type
} bind def
/lget { % <lseq> <index> lget <value>
dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput { % <lseq> <index> <value> lput -
3 1 roll
dup //lsubmask and 4 1 roll //lnshift bitshift get
3 1 roll put
} bind def
/llength { % <lseq> llength <length>
dup length 1 sub dup //lshift bitshift
3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto { % <string/array> <length> growto <string'/array'>
1 index type /stringtype eq { string } { array } ifelse
2 copy copy pop exch pop
} bind def
/lgrowto { % <lseq> <newlength> lgrowto <lseq'>
dup //lsubmask add //lnshift bitshift dup 3 index length gt {
% Add more sub-arrays. Start by completing the last existing one.
% Stack: lseq newlen newtoplen
3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
% Stack: newlen newtoplen lseq
[ exch aload pop
counttomark 2 add -1 roll % newtoplen
counttomark sub { dup 0 0 getinterval lsublen growto } repeat
dup 0 0 getinterval ] exch
} {
pop
} ifelse
% Expand the last sub-array.
1 sub //lsubmask and 1 add
exch dup dup length 1 sub 2 copy
% Stack: newsublen lseq lseq len-1 lseq len-1
get 5 -1 roll growto put
} bind def
/lforall { % <lseq> <proc> lforall -
/forall cvx 2 packedarray cvx forall
} bind def
% We keep track of PDF objects using the following PostScript variables:
%
% Generations (lstring): Generations[N] holds 1+ the current
% generation number for object number N. (As far as we can tell,
% this is needed only for error checking.) For free objects,
% Generations[N] is 0.
%
% Objects (larray): If object N is loaded, Objects[N] is the actual
% object; otherwise, Objects[N] is an executable integer giving
% the file offset of the object's entry in the cross-reference
% table.
%
% GlobalObjects (dictionary): If object N has been resolved in
% global VM, GlobalObjects[N] is the same as Objects[N]
% (except that GlobalObjects itself is stored in global VM,
% so the entry will not be deleted at the end of the page).
%
% IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
% global VM. This is an accelerator to avoid having to do a
% dictionary lookup in GlobalObjects when resolving every object.
% Initialize the PDF object tables.
/initPDFobjects { % - initPDFobjects -
/Objects larray def
/Generations lstring def
.currentglobal true .setglobal
/GlobalObjects 20 dict def
.setglobal
/IsGlobal lstring def
} bind def
% Grow the tables to a specified size.
/growPDFobjects { % <minsize> growPDFobjects -
dup Objects llength gt {
dup Objects exch lgrowto /Objects exch def
} if
dup Generations llength gt {
dup Generations exch lgrowto /Generations exch def
} if
dup IsGlobal llength gt {
dup IsGlobal exch lgrowto /IsGlobal exch def
} if
pop
} bind def
% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
% no way to represent procedures. Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/resolved? { % <object#> resolved? <value> true
% <object#> resolved? false
Objects 1 index lget dup xcheck {
dup type /integertype eq {
% Check whether the object is in GlobalObjects.
pop IsGlobal 1 index lget 0 eq {
pop false
} {
% Update Objects from GlobalObjects
DEBUG { (%Global=>local: ) print dup == } if
GlobalObjects 1 index get dup Objects 4 1 roll lput true
} ifelse
} {
exch pop true
} ifelse
} {
exch pop true
} ifelse
} bind def
/oforce /exec load def
/oget { % <array> <index> oget <object>
% <dict> <key> oget <object>
% Before release 6.20, this procedure stored the resolved
% object back into the referring slot. In order to support
% PDF linearization, we no longer do this.
get oforce
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget { % <dict> <key> knownoget <value> true
% <dict> <key> knownoget false
% See oget above regarding this procedure.
.knownget {
oforce dup null eq { pop false } { true } ifelse
} {
false
} ifelse
} bind def
% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F { % <file#> <object#> <generation#> F <object>
% Some PDF 1.1 files use F as a synonym for f!
.pdfcount 3 lt { f } { pop pop pop null } ifelse
} bind def
/checkgeneration { % <object#> <generation#> checkgeneration <object#> <OK>
Generations 2 index lget 1 sub 1 index eq {
pop true
} {
QUIET not {
Generations 2 index lget 0 eq {
( **** Warning: reference to free object: )
} {
( **** Warning: wrong generation: )
} ifelse
2 index =string cvs concatstrings ( ) concatstrings
exch =string cvs concatstrings ( R\n) concatstrings
pdfformaterror
} {
pop
} ifelse false
} ifelse
} bind def
/R { % <object#> <generation#> R <object>
/resolveR cvx 3 packedarray cvx
} bind def
% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
valueopdict { } forall
/endobj dup cvx
.dicttomark readonly def
/obj { % <object#> <generation#> obj <object>
PDFfile objopdict .pdfrun
} bind def
/endobj { % <object#> <generation#> <object> endobj <object>
3 1 roll
% Read the xref entry if we haven't yet done so.
% This is only needed for generation # checking.
1 index resolved? {
pop
} {
PDFfile fileposition
2 index readxrefentry pop
PDFoffset add PDFfile exch setfileposition
} ifelse
checkgeneration {
% The only global objects we bother to save are
% (resource) dictionaries.
1 index dup gcheck exch type /dicttype eq and {
DEBUG { (%Local=>global: ) print dup == } if
GlobalObjects 1 index 3 index put
IsGlobal 1 index 1 put
} if
Objects exch 2 index lput
} {
pop pop null
} ifelse
} bind def
% When resolving an object reference, we stop at the endobj.
/resolveopdict mark
valueopdict { } forall
/endobj { endobj exit } bind
% OmniForm generates PDF file with endobj missing in some
% objects. AR ignores this. So we have to do it too.
/obj { pop pop endobj exit } bind
.dicttomark readonly def
/resolveR { % <object#> <generation#> resolveR <object>
DEBUG { (%Resolving: ) print 2 copy 2 array astore == } if
1 index resolved? {
exch pop exch pop
} {
PDFfile fileposition 3 1 roll
1 index readxrefentry
3 1 roll checkgeneration {
% Stack: savepos objpos obj#
exch PDFoffset add PDFfile exch setfileposition
PDFfile token pop 2 copy ne
{ ( **** Unrecoverable error in xref!\n) pdfformaterror
/resolveR cvx /rangecheck signalerror
}
if pop PDFfile token pop
PDFfile token pop /obj ne
{ ( **** Unrecoverable error in xref!\n) pdfformaterror
/resolveR cvx /rangecheck signalerror
}
if
pdf_run_resolve % PDFfile resolveopdict .pdfrun
}
{ % Don't cache if the generation # is wrong.
pop pop null
} ifelse
exch PDFfile exch setfileposition
} ifelse
} bind def
% ================================ Streams ================================ %
% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
% /File - the file or string where the stream contents are stored,
% if the stream is not an external one.
% /FilePosition - iff File is a file, the position in the file
% where the contents start.
% /StreamKey - the key used to decrypt this stream, if any.
% We do the real work of constructing the data stream only when the
% contents are needed.
% Construct a stream. The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably. We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
% Unfortunately, this doesn't account for other whitespace characters that
% may have preceded the EOL, such as spaces or tabs. Thus we back up one
% character and scan until we find the \n terminator.
/stream { % <dict> stream <modified_dict>
dup /Length oget 0 eq {
dup /Filter undef % don't confuse any filters that require data
} if
dup /F known dup PDFsource PDFfile eq or {
not {
dup /File PDFfile put
% make sure that we are just past the EOL \n character
PDFfile dup fileposition 1 sub setfileposition % back up one
{ PDFfile read pop dup 13 eq {
% If there had been a \n, token would have advanced over it
% thus, if the terminator was \r, we have a format error!
( **** Warning: stream operator not terminated by valid EOL.\n) pdff
ormaterror
pop exit % fileposition is OK (just past the \r).
} if
10 eq { exit } if
} loop % scan past \n
dup /FilePosition PDFfile fileposition put
DEBUG { (%FilePosition: ) print dup /FilePosition get == } if
} if
PDFfile fileposition 1 index /Length oget add
PDFfile exch setfileposition
} {
pop
% We're already reading from a stream, which we can't reposition.
% Capture the sub-stream contents in a string.
dup /Length oget string PDFsource exch readstring
not {
( **** Warning: Unexpected EOF in stream!\n) pdfformaterror
/stream cvx /rangecheck signalerror
} if
1 index exch /File exch put
} ifelse
PDFsource {token} stopped {
pop null
} {
not { null } if
} ifelse
dup /endobj eq {
% Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
( **** Warning: stream missing 'endstream'.\n) pdfformaterror
pop /endstream % fake a valid endstream
} if
/endstream ne {
( **** Warning: stream Length incorrect.\n) pdfformaterror
exit % exit from .pdfrun now.
} if
cvx
} bind def
/endstream {
exit
} bind def
% Contrary to the published PDF (1.3) specification, Acrobat Reader
% accepts abbreviated filter names everywhere, not just for in-line images,
% and some applications (notably htmldoc) rely on this.
/unabbrevfilterdict mark
/AHx /ASCIIHexDecode /A85 /ASCII85Decode /CCF /CCITTFaxDecode
/DCT /DCTDecode /Fl /FlateDecode /LZW /LZWDecode /RL /RunLengthDecode
.dicttomark readonly def
% Extract and apply filters.
/filterparms { % <dict> <DPkey> <Fkey> filterparms
% <dict> <parms> <filternames>
2 index exch knownoget {
exch 2 index exch knownoget {
% Both filters and parameters.
exch dup type /nametype eq {
1 array astore exch 1 array astore exch
} if
} {
% Filters, but no parameters.
null exch
dup type /nametype eq { 1 array astore } if
} ifelse
} {
% No filters: ignore parameters, if any.
pop null { }
} ifelse
} bind def
/filtername { % <filtername> filtername <filtername'>
//unabbrevfilterdict 1 index .knownget { exch pop } if
} bind def
/applyfilters { % <parms> <source> <filternames> applyfilters <stream>
2 index null eq {
{ filtername filter }
} {
{ % Stack: parms stream filtername
2 index 0 oget dup null eq { pop } { exch } ifelse filtername filter
exch dup length 1 sub 1 exch getinterval exch
}
} ifelse forall exch pop
} bind def
% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
% - Whether we are going to interpret the stream, or If we are just
% going to read data from them, we impose a SubFileDecode filter
% that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream { % <streamdict> <readdata?> resolvestream <stream>
1 index /F knownoget {
% This stream is stored on an external file.
(r) file 3 -1 roll
/FDecodeParms /FFilter filterparms
% Stack: readdata? file dict parms filternames
4 -1 roll exch
pdf_decrypt_stream
applyfilters
} {
exch dup /FilePosition .knownget {
1 index /File get exch setfileposition
} if
% Stack: readdata? dict
/DecodeParms /Filter filterparms
% Stack: readdata? dict parms filternames
2 index /File get exch
% Stack: readdata? dict parms file/string filternames
pdf_decrypt_stream % add decryption if needed
dup length 0 eq {
% All the PDF filters have EOD markers, but in this case
% there is no specified filter.
pop exch pop
% Stack: readdata? dict file/string
2 index 1 index type /filetype eq or {
% Use length for any files or reading data from any source.
1 index /Length knownoget not { 0 } if
} {
0 % Otherwise length of 0 for whole string
} ifelse
2 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
} {
applyfilters
} ifelse
} ifelse
% Stack: readdata? dict file
exch pop exch pop
} bind def
% ============================ Name/number trees ============================ %
/nameoget { % <nametree> <key> nameoget <obj|null>
exch /Names exch .treeget
} bind def
/numoget { % <numtree> <key> numoget <obj|null>
exch /Nums exch .treeget
} bind def
/.treeget { % <key> <leafkey> <tree> .treeget <obj|null>
dup /Kids knownoget {
exch pop .branchget
} {
exch get .leafget
} ifelse
} bind def
/.branchget { % <key> <leafkey> <kids> .branchget <obj|null>
dup length 0 eq {
pop pop pop null
} {
dup length -1 bitshift 2 copy oget
% Stack: key leafkey kids mid kids[mid]
dup /Limits oget aload pop
% Stack: key leafkey kids mid kids[mid] min max
6 index lt {
pop pop
1 add 1 index length 1 index sub getinterval .branchget
} {
5 index gt {
pop
0 exch getinterval .branchget
} {
exch pop exch pop .treeget
} ifelse
} ifelse
} ifelse
} bind def
/.leafget { % <key> <pairs> .leafget <obj|null>
dup length 2 eq {
dup 0 get 2 index eq { 1 oget } { pop null } ifelse
exch pop
} {
dup length -1 bitshift -2 and 2 copy oget
% Stack: key pairs mid pairs[mid]
3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
getinterval .leafget
} ifelse
} bind def
end % pdfdict
.setglobal

You might also like