program pdffix(input,output); const end_of_line = 10; max_num_key_word = 32; max_key_word_len = 32; max_objects = 1024; record_len_max = 2048; n_args_max = 10; (* max. number of command line args *) max_arg_len = 128; (* max. length of each command line arg *) type node_ptr = ^node_rec; node_rec = array[0..255] of record key : integer; next : ^node_rec; end; var never_used : [global]varying[10] of char; (* fixes a bug in p2c *) arg : array[0..n_args_max] of varying[max_arg_len] of char; i,j,k,n : integer; offset : integer; obj : integer; byte_address : integer; atEOL : integer; atSlash : integer; num_annot : integer; start_annot : array[1..record_len_max] of integer; finish_annot : array[1..record_len_max] of integer; num_openaction : integer; start_openaction : array[1..record_len_max] of integer; finish_openaction : array[1..record_len_max] of integer; this_string : varying[record_len_max] of char; inXref : boolean; inTypePage : boolean; inStream : boolean; inOther : boolean; inStartXref : boolean; inAnnots : boolean; inTrailer : boolean; inOtherAnnot : boolean; inLastAnnot : boolean; inCatalog : boolean; inOpenAction : boolean; doCopy : boolean; doCopyTrailer : boolean; doEndobj : boolean; doEndStream : boolean; doEOF : boolean; doAnnotsRead : boolean; doFlush : boolean; hasAnnots : boolean; hasCatalog : boolean; hasOpenAction : boolean; doneLastAnnot : boolean; this_chr : char; this_num : integer; prev_num : integer; skip_char : boolean; the_record : varying[record_len_max] of char; tmp_record : varying[record_len_max] of char; the_annots : varying[record_len_max] of char; the_trailer : varying[record_len_max] of char; tmp_trailer : varying[record_len_max] of char; the_catalog : varying[record_len_max] of char; the_startxref : varying[record_len_max] of char; the_obj_num : varying[record_len_max] of char; the_obj_ver : varying[record_len_max] of char; old_startxref : varying[20] of char; new_startxref : varying[20] of char; num_obj : integer; objects : array[1..max_objects] of varying[record_len_max] of char; obj_number : array[1..max_objects] of varying[10] of char; obj_address : array[1..max_objects] of varying[10] of char; obj_version : array[1..max_objects] of varying[5] of char; node : node_ptr; root_node : node_ptr; num_key_word : integer; key_word : array[1..max_num_key_word,1..max_key_word_len] of integer; key_word_len : array[1..max_num_key_word] of integer; key_word_index : array[1..max_num_key_word] of integer; inp : file of char; out : file of char; inp_file_name : varying[80] of char; out_file_name : varying[80] of char; (* -------------------------------------------------------------------------- *) function ord_chr(this_char : char) : integer; var this_num:integer; begin this_num:=ord(this_char); if this_num < 0 then this_num:=256+this_num; (* fixes a bug in p2c *) ord_chr:=this_num; end; (* -------------------------------------------------------------------------- *) function is_numeral(this_num : integer) : boolean; begin if (ord_chr('0') <= this_num) and (this_num <= ord_chr('9')) then is_numeral:=true else is_numeral:=false; end; (* -------------------------------------------------------------------------- *) procedure write_out( this_string : varying[max] of char; var byte_address : integer); var i : integer; begin for i:=1 to this_string.length do begin write(out,this_string[i]); end; write(out,chr(end_of_Line)); byte_address:=byte_address + this_string.length + 1; end; (* -------------------------------------------------------------------------- *) procedure trim_tail(var this_line : varying[max] of char); var i : integer; index : integer; searching : boolean; begin (* strip trailing blanks from this_line *) if this_line.length > 0 then begin index:=0; searching:=true; i:=this_line.length; while searching do begin if this_line[i] <> ' ' then begin index:=i; searching:=false; end else begin i:=i-1; searching:=(i > 0); end; end; this_line.length:=index; end; end; (* -------------------------------------------------------------------------- *) procedure get_word(var this_word : varying[max1] of char; this_string : varying[max2] of char; var offset : integer); var i : integer; reading : boolean; done : boolean; skip_blanks : boolean; begin i:=offset; this_word:=''; done:=i > this_string.length; if not done then begin skip_blanks:=(this_string[i] = ' '); while (skip_blanks) and (not done) do begin if this_string[i] = ' ' then i:=i+1 else skip_blanks:=false; if i > this_string.length then done:=true; end; reading:=true; while (reading) and (not done) do begin if (this_string[i] = '0') or (this_string[i] = '1') or (this_string[i] = '2') or (this_string[i] = '3') or (this_string[i] = '4') or (this_string[i] = '5') or (this_string[i] = '6') or (this_string[i] = '7') or (this_string[i] = '8') or (this_string[i] = '9') then begin this_word:=this_word + this_string[i]; i:=i+1; end else reading:=false; if i > this_string.length then done:=true; end; end; offset:=i; end; (* -------------------------------------------------------------------------- *) procedure add_key_word(var key_word : array[l1..u1:integer] of integer; var key_word_len : integer; key_word_index : integer); var i,j : integer; this_char : integer; next_node : node_ptr; prev_node : node_ptr; begin node:=root_node; for i:=1 to key_word_len do begin this_char:=key_word[i]; if node^[this_char].next = nil then begin new(next_node); for j:=0 to 255 do begin next_node^[j].key:=0; next_node^[j].next:=nil; end; node^[this_char].key:=0; node^[this_char].next:=next_node; node:=next_node; end else begin node^[this_char].key:=0; node:=node^[this_char].next; end; end; (* step back to ensure tree branch terminated at last character *) node:=root_node; for i:=1 to key_word_len do begin this_char:=key_word[i]; prev_node:=node; node:=node^[this_char].next; end; prev_node^[this_char].key:=key_word_index; prev_node^[this_char].next:=nil; end; (* -------------------------------------------------------------------------- *) procedure new_key_word(var key_word : array[l1..u1:integer; l2..u2:integer] of integer; var key_word_len : array[l3..u3:integer] of integer; var key_word_index : array[l4..u4:integer] of integer; index : integer; key_word_string : varying[max] of char); var i : integer; the_ord : integer; begin num_key_word:=num_key_word+1; key_word_len[num_key_word]:=key_word_string.length; key_word_index[num_key_word]:=index; for i:=1 to key_word_len[num_key_word] do begin the_ord:=ord(key_word_string[i]); if the_ord < 0 then the_ord:=256+the_ord; key_word[num_key_word,i]:=the_ord; end; end; (* -------------------------------------------------------------------------- *) procedure copy_record(var the_record : varying[max] of char); begin num_obj:=num_obj + 1; objects[num_obj]:=the_record; end; (* -------------------------------------------------------------------------- *) begin if argc <> 3 then begin writeln(' Usage : pdffix input-file.pdf output-file.pdf'); halt; end else begin for i:=0 to argc-1 do begin argv(i,arg[i]); arg[i].length:=max_arg_len; trim_tail(arg[i]); end; end; inp_file_name:=arg[1]; out_file_name:=arg[2]; reset(inp,inp_file_name); rewrite(out,out_file_name); (* --- Create keywords --- *) num_key_word:=0; new_key_word(key_word,key_word_len,key_word_index, 1,'obj'); new_key_word(key_word,key_word_len,key_word_index, 2,'endobj'); new_key_word(key_word,key_word_len,key_word_index, 3,'/Annots'); new_key_word(key_word,key_word_len,key_word_index, 4,'/Type /Page'); new_key_word(key_word,key_word_len,key_word_index, 5,'['); new_key_word(key_word,key_word_len,key_word_index, 6,']'); new_key_word(key_word,key_word_len,key_word_index, 7,'stream'); new_key_word(key_word,key_word_len,key_word_index, 8,'endstream'); new_key_word(key_word,key_word_len,key_word_index, 9,'xref'); new_key_word(key_word,key_word_len,key_word_index,10,'startxref'); new_key_word(key_word,key_word_len,key_word_index,11,'%%EOF'); new_key_word(key_word,key_word_len,key_word_index,12,'trailer'); new_key_word(key_word,key_word_len,key_word_index,13,'/Root'); new_key_word(key_word,key_word_len,key_word_index,14,'<<'); new_key_word(key_word,key_word_len,key_word_index,15,'>>'); new_key_word(key_word,key_word_len,key_word_index,16,'/Type /Catalog'); new_key_word(key_word,key_word_len,key_word_index,17,'/OpenAction'); (* --- Create keyword tables --- *) new(root_node); for i:=0 to 255 do begin root_node^[i].key:=0; root_node^[i].next:=nil; end; for i:=1 to num_key_word do begin add_key_word(key_word[i],key_word_len[i],key_word_index[i]); end; (* --- 1st pass : copy source to output and save page objects --- *) i:=0; node:=root_node; inXref:=false; inStream:=false; inOther:=true; inTypePage:=false; inStartXref:=false; inTrailer:=false; doEOF:=false; doEndobj:=false; doCopy:=false; doCopyTrailer:=false; doEndStream:=false; hasAnnots:=false; the_record:=''; the_trailer:=''; the_startxref:=''; prev_num:=0; num_obj:=0; while not eof(inp) do begin i:=i+1; read(inp,this_chr); write(out,this_chr); this_num:=ord_chr(this_chr); (* --- Start copying input ? --- *) if (not doCopy) and (inOther) and (prev_num = end_of_line) and (is_numeral(this_num)) then begin doCopy:=true; the_record:=''; end; if doCopy then begin the_record:=the_record + this_chr; end; if doCopyTrailer then begin the_trailer:=the_trailer + this_chr; end; (* --- Have we just read a keyword ? --- *) if node^[this_num].key <> 0 then begin (* yes *) case node^[this_num].key of 2 : begin (* endobj *) doEndobj:=true; end; 3 : begin (* Annots *) if inTypePage then hasAnnots:=true else hasAnnots:=false; end; 4 : begin (* /Type /Page *) inTypePage:=true; end; 7 : begin (* stream *) inStream:=true; end; 8 : begin (* endstream *) doEndStream:=true; end; 9 : begin (* xref *) inXref:=true; end; 10 : begin (* startxref *) inStartXref:=true; end; 11 : begin (* %%EOF *) doEOF:=true; end; 12 : begin (* trailer *) inTrailer:=true; end; 14 : begin (* << *) if inTrailer then begin doCopyTrailer:=true; the_trailer:='trailer' + chr(end_of_Line) + '<<'; end; end; 15 : begin (* >> *) if inTrailer then begin doCopytrailer:=false; inTrailer:=false; end; end; 16 : begin (* /Type /Catalog *) hasCatalog:=true; end; otherwise ; end; node:=root_node; end else begin (* no *) if node^[this_num].next <> nil then begin node:=node^[this_num].next; end else begin node:=root_node; end; end; (* --- Actions for keywords --- *) if inStream then begin inXref:=false; inOther:=false; inTypePage:=false; inStream:=false; doEndobj:=false; doEndStream:=false; doCopy:=false; the_record:=''; end; if doEndStream then begin inXref:=false; inOther:=true; inTypePage:=false; inStream:=false; doEndObj:=false; doEndStream:=false; doCopy:=false; the_record:=''; end; if inXref then begin inXref:=true; inOther:=false; inTypePage:=false; inStream:=false; doEndobj:=false; doEndStream:=false; doCopy:=false; the_record:=''; end; if doEndobj then begin if (inTypePage) and (hasAnnots) then begin copy_record(the_record); inTypePage:=false; hasAnnots:=false; end; if (hasCatalog) then begin the_catalog:=the_record; copy_record(the_record); hasCatalog:=false; end; inOther:=true; doEndobj:=false; doCopy:=false; the_record:=''; end; if inStartXref then begin if is_numeral(this_num) then the_startxref:=the_startxref + this_chr; end; if doEOF then begin doEOF:=false; inStartXref:=false; end; (* --- Prepare for the next character --- *) prev_num:=this_num; (* --- Assume long records don't interest us --- *) if the_record.length >= record_len_max then begin if inTypePage then writeln(' Error : record length too long : ',record_len_max:5); the_record:=''; doCopy:=false; end; end; byte_address:=i; (* --- 2nd pass, consolidate page annotations for each page --- *) for obj:=1 to num_obj do begin the_record:=''; the_annots:=''; node:=root_node; inOther:=true; inTypePage:=false; inAnnots:=false; inCatalog:=false; inOpenAction:=false; doFlush:=false; doAnnotsRead:=false; hasAnnots:=false; hasOpenAction:=false; skip_char:=false; atSlash:=1; num_annot:=0; num_openaction:=0; tmp_record:=objects[obj]; for i:=1 to tmp_record.length do begin this_chr:=tmp_record[i]; this_num:=ord_chr(this_chr); if doAnnotsRead then the_annots:=the_annots + this_chr else the_record:=the_record + this_chr; if this_chr = '/' then atSlash:=the_record.length; if this_chr = chr(end_of_line) then atEOL:=the_record.length; if node^[this_num].key <> 0 then begin case node^[this_num].key of 2 : begin (* endobj *) doFlush:=true; inAnnots:=false; inCatalog:=false; end; 3 : begin (* /Annots *) inAnnots:=true; inCatalog:=false; hasAnnots:=true; end; 5 : begin (* [ *) if inAnnots then begin doAnnotsRead:=true; num_annot:=num_annot + 1; start_annot[num_annot]:=atSlash; end; end; 6 : begin (* ] *) if inAnnots then begin doAnnotsRead:=false; the_annots.length:=the_annots.length-1; the_record:=the_record + ']'; finish_annot[num_annot]:=the_record.length; end; end; 16 : begin (* /Type /Catalog *) inCatalog:=true; inAnnots:=false; end; 17 : begin (* /OpenAction *) hasOpenAction:=true; if inCatalog then begin inOpenAction:=true; num_openaction:=num_openaction+1; start_openaction[num_openaction]:=atSlash; end; end; otherwise ; end; end else begin if node^[this_num].next <> nil then begin node:=node^[this_num].next; end else begin node:=root_node; end; end; if inOpenAction then begin (* assume OpenActions always on just one line and of the form /OpenAction object-reference *) if this_chr = chr(end_of_line) then begin finish_openaction[num_openaction]:=atEOL; inOpenAction:=false; end; end; if doFlush then begin (* process object *) objects[obj]:=the_record; (* --- Rebuild page object to have just one /Annots dict --- *) if hasAnnots then begin tmp_record:=''; doneLastAnnot:=false; for k:=1 to the_record.length do begin inLastAnnot:=false; inOtherAnnot:=false; for n:=1 to num_annot do begin if (start_annot[n] <= k) and (k <= finish_annot[n]) then begin if (n < num_annot) then inOtherAnnot:=true else inLastAnnot:=true; end; end; if inLastAnnot then begin if not doneLastAnnot then begin tmp_record:=tmp_record + '/Annots [ ' + the_annots + ']'; doneLastannot:=true; end; end else if (not inOtherAnnot) then begin tmp_record:=tmp_record + the_record[k]; end; end; objects[obj]:=tmp_record; end; (* --- Retain only first /OpenAction, delete others --- *) if hasOpenAction then begin tmp_record:=''; the_record:=objects[obj]; for k:=1 to start_openaction[num_openaction] - 1 do tmp_record:=tmp_record + the_record[k]; for k:=finish_openaction[num_openaction] + 1 to the_record.length do tmp_record:=tmp_record + the_record[k]; objects[obj]:=tmp_record; end; doFlush:=false; inAnnots:=false; inCatalog:=false; inOpenAction:=false; end; end; end; (* --- 3rd pass : write out updated page objects and new xref/trailer --- *) for i:=1 to num_obj do begin offset:=1; the_record:=objects[i]; get_word(the_obj_num,the_record,offset); get_word(the_obj_ver,the_record,offset); obj_number[i]:=the_obj_num; writev(obj_address[i],byte_address:10); for j:=1 to 10 do if obj_address[i,j] = ' ' then obj_address[i,j]:='0'; obj_version[i]:='00000'; for j:=the_obj_ver.length downto 1 do obj_version[i,6-j]:=the_obj_ver[j]; write_out(the_record,byte_address); end; old_startxref:=the_startxref; writev(new_startxref,byte_address:0); write_out('xref',byte_address); write_out('0 1',byte_address); write_out('0000000000 65535 f ',byte_address); for i:=1 to num_obj do begin this_string:=obj_number[i]+ ' 1'; write_out(this_string,byte_address); this_string:=obj_address[i] + ' ' + obj_version[i] + ' n '; write_out(this_string,byte_address); end; tmp_trailer:=the_trailer; tmp_trailer.length:=tmp_trailer.length - 2; tmp_trailer:=tmp_trailer + '/Prev ' + old_startxref + chr(end_of_line) + '>>'; the_trailer:=tmp_trailer; write_out(the_trailer,byte_address); write_out('startxref',byte_address); write_out(new_startxref,byte_address); write_out('%%EOF',byte_address); close(inp); close(out); end.