V | = | version |
R | = | revision |
Length | = | params[:KeyLength] |
P | = | params[:Permissions] |
EncryptMetadata | = | params[:EncryptMetadata] |
CF | = | Dictionary.new |
AuthEvent | = | :DocOpen |
CFM | = | :AESV2 |
Length | = | 16 |
StmF | = | handler.StrF = :StdCF |
ID | = | [ id, id ] |
Rect | = | Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0] |
V | = | digsig ; |
SigFlags | = | InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY |
Location | = | HexaString.new(location) if location |
ContactInfo | = | HexaString.new(contact) if contact |
Reason | = | HexaString.new(reason) if reason |
Data | = | self.Catalog |
TransformParams | = | UsageRights::TransformParams.new |
V | = | UsageRights::TransformParams::VERSION |
Reference | = | [ sigref ] |
UR3 | = | digsig |
Prev | = | prev_xref_offset |
Size | = | objset.size + 1 |
Prev | = | prev_xref_offset |
XRefStm | = | xrefstm_offset if options[:use_xrefstm] == true |
Size | = | size + 1 |
Root | = | root |
Pages | = | PageTreeNode.new.set_indirect(true) |
Root | = | catalog.reference |
Size | = | size + 1 |
Root | = | self << cat |
OpenAction | = | action |
WC | = | action |
WP | = | action |
Names | = | Names.new |
Count | = | treeroot.Kids.length |
Parent | = | treeroot |
get_object | -> | [] |
filename | [RW] | |
header | [RW] | |
revisions | [RW] |
init_structure: | If this flag is set, then some structures will be automatically generated while manipulating this PDF. Set it if you are creating a new PDF file, this must not be used when parsing an existing file. |
# File sources/parser/pdf.rb, line 164 164: def initialize(init_structure = true) 165: 166: @header = PDF::Header.new 167: @revisions = [] 168: 169: add_new_revision 170: 171: @revisions.first.trailer = Trailer.new 172: 173: init if init_structure 174: end
Adds a new object to the PDF file. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.
object: | The object to add. |
# File sources/parser/pdf.rb, line 395 395: def <<(object) 396: 397: add_to_revision(object, @revisions.last) 398: 399: end
Returns the current Catalog Dictionary.
# File sources/parser/catalog.rb, line 33 33: def Catalog 34: get_doc_attr(:Root) 35: end
Sets the current Catalog Dictionary.
# File sources/parser/catalog.rb, line 40 40: def Catalog=(cat) 41: 42: unless cat.is_a?(Catalog) 43: raise TypeError, "Expected type Catalog, received #{cat.class}" 44: end 45: 46: if @revisions.last.trailer.Root 47: delete_object(@revisions.last.trailer.Root) 48: end 49: 50: @revisions.last.trailer.Root = self << cat 51: end
Add a field to the Acrobat form.
field: | The Field to add. |
# File sources/parser/acroform.rb, line 41 41: def add_field(field) 42: 43: if field.is_a?(::Array) 44: raise TypeError, "Expected array of Fields" unless field.all? { |f| f.is_a?(Field) } 45: elsif not field.is_a?(Field) 46: raise TypeError, "Expected Field, received #{field.class}" 47: end 48: 49: fields = field.is_a?(Field) ? [field] : field 50: 51: self.Catalog.AcroForm ||= InteractiveForm.new 52: self.Catalog.AcroForm.Fields ||= [] 53: 54: self.Catalog.AcroForm.Fields.concat(fields) 55: 56: self 57: end
Ends the current Revision, and starts a new one.
# File sources/parser/pdf.rb, line 652 652: def add_new_revision 653: 654: root = @revisions.last.trailer[:Root] unless @revisions.empty? 655: 656: @revisions << Revision.new(self) 657: @revisions.last.trailer = Trailer.new 658: @revisions.last.trailer.Root = root 659: 660: self 661: end
Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.
object: | The object to add. |
revision: | The revision to add the object to. |
# File sources/parser/pdf.rb, line 408 408: def add_to_revision(object, revision) 409: 410: object.set_indirect(true) 411: object.set_pdf(self) 412: 413: object.no, object.generation = alloc_new_object_number if object.no == 0 414: 415: revision.body[object.reference] = object 416: 417: object.reference 418: end
# File sources/parser/page.rb, line 26 26: def append_page(page = Page.new, *more) 27: 28: pages = [ page ].concat(more) 29: 30: fail "Expecting Page type, instead of #{page.class}" unless pages.all?{|page| page.is_a?(Page)} 31: 32: treeroot = self.Catalog.Pages 33: 34: treeroot.Kids ||= [] #:nodoc: 35: treeroot.Kids.concat(pages) 36: treeroot.Count = treeroot.Kids.length 37: 38: pages.each do |page| 39: page.Parent = treeroot 40: end 41: 42: self 43: end
# File sources/parser/pdf.rb, line 342 342: def append_subobj(root, objset, inc_objstm) 343: 344: if objset.find{ |o| root.equal?(o) }.nil? 345: 346: objset << root 347: 348: if root.is_a?(Dictionary) 349: root.each_pair { |name, value| 350: append_subobj(name, objset, inc_objstm) 351: append_subobj(value, objset, inc_objstm) 352: } 353: elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and inc_objstm == true) 354: root.each { |subobj| append_subobj(subobj, objset, inc_objstm) } 355: end 356: 357: end 358: 359: end
Attachs an embedded file to the PDF.
path: | The path to the file to attach. |
options: | A set of options to configure the attachment. |
# File sources/parser/file.rb, line 35 35: def attach_file(path, options = {}) 36: 37: # 38: # Default options. 39: # 40: params = 41: { 42: :Register => true, # Shall the file be registered in the name directory ? 43: :EmbeddedName => File.basename(path), # The inner filename of the attachment. 44: :Filter => :FlateDecode # The stream filter used to store data. 45: } 46: 47: params.update(options) 48: 49: fdata = File.open(path, "r").binmode.read 50: 51: fstream = EmbeddedFileStream.new 52: fstream.data = fdata 53: fstream.setFilter(params[:Filter]) 54: 55: name = params[:EmbeddedName] 56: fspec = FileSpec.new.setType(:Filespec).setF(name).setEF(FileSpec.new(:F => fstream)) 57: 58: register(Names::Root::EMBEDDEDFILES, name, fspec) if params[:Register] == true 59: 60: fspec 61: end
This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
# File sources/parser/pdf.rb, line 436 436: def compile 437: 438: # 439: # A valid document must have at least one page. 440: # 441: append_page if pages.empty? 442: 443: # 444: # Allocates object numbers and creates references. 445: # Invokes object finalization methods. 446: # 447: physicalize 448: 449: # 450: # Sets the PDF version header. 451: # 452: pdf_version = version_required 453: @header.majorversion = pdf_version.to_s[0,1].to_i 454: @header.minorversion = pdf_version.to_s[2,1].to_i 455: 456: self 457: end
Decrypts the current document (only RC4 40..128 bits). TODO: AESv2, AESv3, lazy decryption
passwd: | The password to decrypt the document. |
# File sources/parser/encryption.rb, line 54 54: def decrypt(passwd = "") 55: 56: unless self.is_encrypted? 57: raise EncryptionError, "PDF is not encrypted" 58: end 59: 60: encrypt_dict = get_doc_attr(:Encrypt) 61: handler = Encryption::Standard::Dictionary.new(encrypt_dict.copy) 62: 63: unless handler.Filter == :Standard 64: raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'" 65: end 66: 67: algorithm = 68: case handler.V 69: when 1,2 then Encryption::ARC4 70: when 4 then Encryption::AES 71: else 72: raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}" 73: end 74: 75: id = get_doc_attr(:ID) 76: if id.nil? or not id.is_a?(Array) 77: raise EncryptionError, "Document ID was not found or is invalid" 78: else 79: id = id.first 80: end 81: 82: if not handler.is_owner_password?(passwd, id) and not handler.is_user_password?(passwd, id) 83: raise EncryptionInvalidPasswordError 84: end 85: 86: encryption_key = handler.compute_encryption_key(passwd, id) 87: 88: #self.extend(Encryption::EncryptedDocument) 89: #self.encryption_dict = encrypt_dict 90: #self.encryption_key = encryption_key 91: #self.stm_algo = self.str_algo = algorithm 92: 93: # 94: # Should be fixed to exclude only the active XRefStream 95: # 96: encrypted_objects = self.objects(false).find_all{ |obj| 97: (obj.is_a?(String) and not obj.indirect_parent.is_a?(XRefStream) and not obj.equal?(encrypt_dict[:U]) and not obj.equal?(encrypt_dict[:O])) or 98: (obj.is_a?(Stream) and not obj.is_a?(XRefStream)) 99: } 100: 101: encrypted_objects.each { |obj| 102: no = obj.indirect_parent.no 103: gen = obj.indirect_parent.generation 104: 105: k = encryption_key + [no].pack("I")[0..2] + [gen].pack("I")[0..1] 106: key_len = (k.length > 16) ? 16 : k.length 107: 108: k << "sAlT" if algorithm == Encryption::AES 109: 110: key = Digest::MD5.digest(k)[0, key_len] 111: 112: case obj 113: when String then obj.replace(algorithm.decrypt(key, obj.value)) 114: when Stream then obj.rawdata = algorithm.decrypt(key, obj.rawdata) 115: end 116: } 117: 118: self 119: end
Enable the document Usage Rights.
rights: | list of rights defined in UsageRights::Rights |
# File sources/parser/signature.rb, line 130 130: def enable_usage_rights(*rights) 131: 132: def signfield_size(certificate, key, ca = []) #:nodoc: 133: datatest = "abcdefghijklmnopqrstuvwxyz" 134: OpenSSL::PKCS7.sign(certificate, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der.size + 128 135: end 136: 137: begin 138: key = OpenSSL::PKey::RSA.new(File.open('adobe.key','r').binmode.read) 139: certificate = OpenSSL::X509::Certificate.new(File.open('adobe.crt','r').binmode.read) 140: rescue 141: warn "The Adobe private key is necessary to enable usage rights.\nYou do not seem to be Adobe :)... Aborting." 142: return nil 143: end 144: 145: digsig = Signature::DigitalSignature.new.set_indirect(true) 146: 147: self.Catalog.AcroForm ||= InteractiveForm.new 148: #self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPENDONLY 149: 150: digsig.Type = :Sig #:nodoc: 151: digsig.Contents = HexaString.new("\x00" * signfield_size(certificate, key, [])) #:nodoc: 152: digsig.Filter = Name.new("Adobe.PPKLite") #:nodoc: 153: digsig.Name = "ARE Acrobat Product v8.0 P23 0002337" #:nodoc: 154: digsig.SubFilter = Name.new("adbe.pkcs7.detached") #:nodoc: 155: digsig.ByteRange = [0, 0, 0, 0] #:nodoc: 156: 157: sigref = Signature::Reference.new #:nodoc: 158: sigref.Type = :SigRef #:nodoc: 159: sigref.TransformMethod = :UR3 #:nodoc: 160: sigref.Data = self.Catalog 161: 162: sigref.TransformParams = UsageRights::TransformParams.new 163: sigref.TransformParams.P = true #:nodoc: 164: sigref.TransformParams.Type = :TransformParams #:nodoc: 165: sigref.TransformParams.V = UsageRights::TransformParams::VERSION 166: 167: rights.each { |right| 168: 169: sigref.TransformParams[right.first] ||= [] 170: sigref.TransformParams[right.first].concat(right[1..-1]) 171: 172: } 173: 174: digsig.Reference = [ sigref ] 175: 176: self.Catalog.Perms ||= Perms.new 177: self.Catalog.Perms.UR3 = digsig 178: 179: # 180: # Flattening the PDF to get file view. 181: # 182: self.compile 183: 184: # 185: # Creating an empty Xref table to compute signature byte range. 186: # 187: rebuild_dummy_xrefs 188: 189: sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset 190: 191: digsig.ByteRange[0] = 0 192: digsig.ByteRange[1] = sigoffset 193: digsig.ByteRange[2] = sigoffset + digsig.Contents.size 194: 195: digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2] 196: 197: # From that point the file size remains constant 198: 199: # 200: # Correct Xrefs variations caused by ByteRange modifications. 201: # 202: rebuildxrefs 203: 204: filedata = self.to_bin 205: signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]] 206: 207: signature = OpenSSL::PKCS7.sign(certificate, key, signable_data, [], OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der 208: digsig.Contents[0, signature.size] = signature 209: 210: # 211: # No more modification are allowed after signing. 212: # 213: self.freeze 214: 215: end
Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.
userpasswd: | The user password. |
ownerpasswd: | The owner password. |
options: | A set of options to configure encryption. |
# File sources/parser/encryption.rb, line 128 128: def encrypt(userpasswd, ownerpasswd, options = {}) 129: 130: if self.is_encrypted? 131: raise EncryptionError, "PDF is already encrypted" 132: end 133: 134: # 135: # Default encryption options. 136: # 137: params = 138: { 139: :Algorithm => :RC4, # :RC4 or :AES 140: :KeyLength => 128, # Key size in bits 141: :EncryptMetadata => true, # Metadata shall be encrypted? 142: :Permissions => Encryption::Standard::Permissions::ALL # Document permissions 143: } 144: 145: params.update(options) 146: 147: case params[:Algorithm] 148: when :RC4 149: algorithm = Encryption::ARC4 150: if (40..128) === params[:KeyLength] and params[:KeyLength] % 8 == 0 151: if params[:KeyLength] > 40 152: version = 2 153: revision = 3 154: else 155: version = 1 156: revision = 2 157: end 158: else 159: raise EncryptionError, "Invalid key length" 160: end 161: when :AES 162: algorithm = Encryption::AES 163: if params[:KeyLength] == 128 164: version = revision = 4 165: else 166: raise EncryptionError, "Invalid key length" 167: end 168: else 169: raise EncryptionNotSupportedError, "Algorithm not supported : #{params[:Algorithm]}" 170: end 171: 172: id = (get_doc_attr(:ID) || gen_id).first 173: 174: handler = Encryption::Standard::Dictionary.new 175: handler.Filter = :Standard #:nodoc: 176: handler.V = version 177: handler.R = revision 178: handler.Length = params[:KeyLength] 179: handler.P = params[:Permissions] 180: 181: if revision == 4 182: handler.EncryptMetadata = params[:EncryptMetadata] 183: handler.CF = Dictionary.new 184: cryptfilter = Encryption::CryptFilterDictionary.new 185: cryptfilter.AuthEvent = :DocOpen 186: cryptfilter.CFM = :AESV2 187: cryptfilter.Length = 16 188: 189: handler.CF[:StdCF] = cryptfilter 190: handler.StmF = handler.StrF = :StdCF 191: end 192: 193: handler.set_owner_password(userpasswd, ownerpasswd) 194: handler.set_user_password(userpasswd, id) 195: 196: encryption_key = handler.compute_encryption_key(userpasswd, id) 197: 198: fileInfo = get_trailer_info 199: fileInfo[:Encrypt] = self << handler 200: 201: self.extend(Encryption::EncryptedDocument) 202: self.encryption_dict = handler 203: self.encryption_key = encryption_key 204: self.stm_algo = self.str_algo = algorithm 205: 206: self 207: end
Exports the document to a dot Graphiz file.
filename: | The path where to save the file. |
# File sources/parser/export.rb, line 34 34: def export_to_graph(filename) 35: 36: def appearance(object) #:nodoc: 37: 38: label = object.type.to_s 39: case object 40: when Catalog 41: fontcolor = "red" 42: color = "mistyrose" 43: shape = "doublecircle" 44: when Name, Number 45: label = object.value 46: fontcolor = "orange" 47: color = "lightgoldenrodyellow" 48: shape = "polygon" 49: when String 50: label = object.value unless (object.is_binary_data? or object.length > 50) 51: fontcolor = "red" 52: color = "white" 53: shape = "polygon" 54: when Array 55: fontcolor = "green" 56: color = "lightcyan" 57: shape = "ellipse" 58: else 59: fontcolor = "blue" 60: color = "aliceblue" 61: shape = "ellipse" 62: end 63: 64: { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape } 65: end 66: 67: def add_edges(pdf, fd, object) #:nodoc: 68: 69: if object.is_a?(Array) or object.is_a?(ObjectStream) 70: 71: object.each { |subobj| 72: 73: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end 74: 75: unless subobj.nil? 76: fd << "\t#{object.object_id} -> #{subobj.object_id}\n" 77: end 78: } 79: 80: elsif object.is_a?(Dictionary) 81: 82: object.each_pair { |name, subobj| 83: 84: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end 85: 86: unless subobj.nil? 87: fd << "\t#{object.object_id} -> #{subobj.object_id} [label=\"#{name.value}\",fontsize=7];\n" 88: end 89: } 90: 91: end 92: 93: if object.is_a?(Stream) 94: 95: object.dictionary.each_pair { |key, value| 96: 97: if value.is_a?(Reference) then value = pdf.indirect_objects[subobj] end 98: 99: unless value.nil? 100: fd << "\t#{object.object_id} -> #{value.object_id} [label=\"#{key.value}\",fontsize=7];\n" 101: end 102: } 103: 104: end 105: 106: end 107: 108: graphname = "PDF" if graphname.nil? or graphname.empty? 109: 110: fd = File.open(filename, "w") 111: 112: begin 113: 114: fd << "digraph #{graphname} {\n\n" 115: 116: objects = self.objects(true).find_all{ |obj| not obj.is_a?(Reference) } 117: 118: objects.each { |object| 119: 120: attr = appearance(object) 121: 122: fd << "\t#{object.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]}];\n" 123: 124: if object.is_a?(Stream) 125: 126: object.dictionary.each { |value| 127: 128: unless value.is_a?(Reference) 129: attr = appearance(value) 130: fd << "\t#{value.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]}];\n" 131: end 132: 133: } 134: 135: end 136: 137: add_edges(self, fd, object) 138: 139: } 140: 141: fd << "\n}" 142: 143: ensure 144: fd.close 145: end 146: 147: end
Exports the document to a GraphML file.
filename: | The path where to save the file. |
# File sources/parser/export.rb, line 153 153: def export_to_graphml(filename) 154: 155: def declare_node(id, attr) #:nodoc: 156: " <node id=\"#{id}\">\n" << 157: " <data key=\"d0\">\n" << 158: " <y:ShapeNode>\n" << 159: " <y:NodeLabel>#{attr[:label]}</y:NodeLabel>\n" << 160: #~ " <y:Shape type=\"#{attr[:shape]}\"/>\n" << 161: " </y:ShapeNode>\n" << 162: " </data>\n" << 163: " </node>\n" 164: end 165: 166: def declare_edge(id, src, dest, label = nil) #:nodoc: 167: " <edge id=\"#{id}\" source=\"#{src}\" target=\"#{dest}\">\n" << 168: " <data key=\"d1\">\n" << 169: " <y:PolyLineEdge>\n" << 170: " <y:LineStyle type=\"line\" width=\"1.0\" color=\"#000000\"/>\n" << 171: " <y:Arrows source=\"none\" target=\"standard\"/>\n" << 172: " <y:EdgeLabel>#{label.to_s}</y:EdgeLabel>\n" << 173: " </y:PolyLineEdge>\n" << 174: " </data>\n" << 175: " </edge>\n" 176: end 177: 178: def appearance(object) #:nodoc: 179: 180: label = object.type.to_s 181: case object 182: when Catalog 183: fontcolor = "red" 184: color = "mistyrose" 185: shape = "doublecircle" 186: when Name, Number 187: label = object.value 188: fontcolor = "orange" 189: color = "lightgoldenrodyellow" 190: shape = "polygon" 191: when String 192: label = object.value unless (object.is_binary_data? or object.length > 50) 193: fontcolor = "red" 194: color = "white" 195: shape = "polygon" 196: when Array 197: fontcolor = "green" 198: color = "lightcyan" 199: shape = "ellipse" 200: else 201: fontcolor = "blue" 202: color = "aliceblue" 203: shape = "ellipse" 204: end 205: 206: { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape } 207: end 208: 209: def add_edges(pdf, fd, object, id) #:nodoc: 210: 211: if object.is_a?(Array) or object.is_a?(ObjectStream) 212: 213: object.each { |subobj| 214: 215: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end 216: 217: unless subobj.nil? 218: fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}") 219: id = id + 1 220: end 221: } 222: 223: elsif object.is_a?(Dictionary) 224: 225: object.each_pair { |name, subobj| 226: 227: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end 228: 229: unless subobj.nil? 230: fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}", name.value) 231: id = id + 1 232: end 233: } 234: 235: end 236: 237: if object.is_a?(Stream) 238: 239: object.dictionary.each_pair { |key, value| 240: 241: if value.is_a?(Reference) then value = pdf.indirect_objects[subobj] end 242: 243: unless value.nil? 244: fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{value.object_id}", key.value) 245: id = id + 1 246: end 247: } 248: 249: end 250: 251: id 252: end 253: 254: @@edge_nb = 1 255: 256: graphname = "PDF" if graphname.nil? or graphname.empty? 257: 258: fd = File.open(filename, "w") 259: 260: edge_nb = 1 261: begin 262: 263: fd << '<?xml version="1.0" encoding="UTF-8"?>' << "\n" 264: fd << '<graphml xmlns="http://graphml.graphdrawing.org/xmlns/graphml"' << "\n" 265: fd << ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' << "\n" 266: fd << ' xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns/graphml ' << "\n" 267: fd << ' http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd"' << "\n" 268: fd << ' xmlns:y="http://www.yworks.com/xml/graphml">' << "\n" 269: fd << '<key id="d0" for="node" yfiles.type="nodegraphics"/>' << "\n" 270: fd << '<key id="d1" for="edge" yfiles.type="edgegraphics"/>' << "\n" 271: fd << "<graph id=\"#{graphname}\" edgedefault=\"directed\">\n" 272: 273: objects = self.objects(true).find_all{ |obj| not obj.is_a?(Reference) } 274: 275: objects.each { |object| 276: 277: fd << declare_node("n#{object.object_id}", appearance(object)) 278: 279: if object.is_a?(Stream) 280: 281: object.dictionary.each { |value| 282: 283: unless value.is_a?(Reference) 284: fd << declare_node(value.object_id, appearance(value)) 285: end 286: } 287: end 288: 289: edge_nb = add_edges(self, fd, object, edge_nb) 290: } 291: 292: fd << '</graph>' << "\n" 293: fd << '</graphml>' 294: 295: ensure 296: fd.close 297: end 298: 299: end
Returns the virtual file size as it would be taking on disk.
# File sources/parser/pdf.rb, line 191 191: def filesize 192: self.to_bin(:rebuildxrefs => false).size 193: end
Returns an array of objects matching specified block.
# File sources/parser/pdf.rb, line 321 321: def find(params = {}, &b) 322: 323: options = 324: { 325: :only_indirect => false 326: } 327: options.update(params) 328: 329: objset = (options[:only_indirect] == true) ? 330: self.indirect_objects.values : self.objects 331: 332: objset.find_all(&b) 333: end
Returns the document information dictionary if present.
# File sources/parser/metadata.rb, line 49 49: def get_document_info 50: get_doc_attr :Info 51: end
Returns a Hash of the information found in the metadata stream
# File sources/parser/metadata.rb, line 56 56: def get_metadata 57: metadata_stm = self.Catalog.Metadata 58: 59: if metadata_stm.is_a?(Stream) 60: doc = REXML::Document.new(metadata_stm.data) 61: 62: info = {} 63: doc.elements.each("*/*/rdf:Description/*") do |element| 64: info[element.name] = element.text 65: end 66: 67: return info 68: end 69: end
Returns an array of Objects whose content is matching pattern.
# File sources/parser/pdf.rb, line 267 267: def grep(*patterns) 268: 269: patterns.map! do |pattern| 270: pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern 271: end 272: 273: unless patterns.all? { |pattern| pattern.is_a?(Regexp) } 274: raise TypeError, "Expected a String or Regexp" 275: end 276: 277: result = [] 278: objects.each do |obj| 279: case obj 280: when String, Name 281: result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)} 282: when Stream 283: result << obj if patterns.any?{|pattern| obj.data.match(pattern)} 284: end 285: end 286: 287: result 288: end
Returns true if the document has a document information dictionary.
# File sources/parser/metadata.rb, line 35 35: def has_document_info? 36: has_attr? :Info 37: end
Returns true if the document contains an acrobat form.
# File sources/parser/acroform.rb, line 33 33: def has_form? 34: not self.Catalog.nil? and not self.Catalog.AcroForm.nil? 35: end
Returns true if the document has a catalog metadata stream.
# File sources/parser/metadata.rb, line 42 42: def has_metadata? 43: self.Catalog.has_key? :Metadata 44: end
# File sources/parser/signature.rb, line 217 217: def has_usage_rights? 218: 219: #~ not self.Catalog.Perms.nil? and (not self.Catalog.Perms.UR3.nil? or not self.Catalog.Perms.UR.nil?) 220: "todo" 221: 222: end
Returns whether the current document is linearized.
# File sources/parser/linearization.rb, line 33 33: def is_linearized? 34: obj = @revisions.first.body.values.first 35: 36: obj.is_a?(Dictionary) and obj.has_key? :Linearized 37: end
Returns whether the document contains a digital signature.
# File sources/parser/signature.rb, line 119 119: def is_signed? 120: 121: #~ not self.Catalog.AcroForm.nil? and (self.Catalog.AcroForm[:SigFlags] & InteractiveForm::SigFlags::SIGNATUREEXISTS) != 0 122: "todo" 123: 124: end
Returns an array of Objects whose name (in a Dictionary) is matching pattern.
# File sources/parser/pdf.rb, line 293 293: def ls(*patterns) 294: 295: if patterns.empty? 296: return objects 297: end 298: 299: result = [] 300: 301: patterns.map! do |pattern| 302: pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern 303: end 304: 305: objects.each do |obj| 306: if obj.is_a?(Dictionary) 307: obj.each_pair do |name, obj| 308: if patterns.any?{ |pattern| name.value.to_s.match(pattern) } 309: result << ( obj.is_a?(Reference) ? obj.solve : obj ) 310: end 311: end 312: end 313: end 314: 315: result 316: end
# File sources/parser/obfuscation.rb, line 216 216: def obfuscate_and_saveas(filename, options = {}) 217: options[:obfuscate] = true 218: saveas(filename, options) 219: end
Returns an array of objects embedded in the PDF body.
include_objstm: | Whether it shall return objects embedded in object streams. |
Note : Shall return to an iterator for Ruby 1.9 comp.
# File sources/parser/pdf.rb, line 340 340: def objects(include_objstm = true) 341: 342: def append_subobj(root, objset, inc_objstm) 343: 344: if objset.find{ |o| root.equal?(o) }.nil? 345: 346: objset << root 347: 348: if root.is_a?(Dictionary) 349: root.each_pair { |name, value| 350: append_subobj(name, objset, inc_objstm) 351: append_subobj(value, objset, inc_objstm) 352: } 353: elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and inc_objstm == true) 354: root.each { |subobj| append_subobj(subobj, objset, inc_objstm) } 355: end 356: 357: end 358: 359: end 360: 361: objset = [] 362: @revisions.each { |revision| 363: revision.body.each_value { |object| 364: append_subobj(object, objset, include_objstm) 365: } 366: } 367: 368: objset 369: end
Sets an action to run on document closing.
action: | A JavaScript Action Object. |
# File sources/parser/catalog.rb, line 76 76: def onDocumentClose(action) 77: 78: unless action.is_a?(Action::JavaScript) 79: raise TypeError, "An Action::JavaScript object must be passed." 80: end 81: 82: unless self.Catalog 83: raise InvalidPDF, "A catalog object must exist to add this action." 84: end 85: 86: self.Catalog.AA ||= CatalogAdditionalActions.new 87: self.Catalog.AA.WC = action 88: 89: self 90: end
Sets an action to run on document opening.
action: | An Action Object. |
# File sources/parser/catalog.rb, line 57 57: def onDocumentOpen(action) 58: 59: unless action.is_a?(Action::Action) 60: raise TypeError, "An Action object must be passed." 61: end 62: 63: unless self.Catalog 64: raise InvalidPDF, "A catalog object must exist to add this action." 65: end 66: 67: self.Catalog.OpenAction = action 68: 69: self 70: end
Sets an action to run on document printing.
action: | A JavaScript Action Object. |
# File sources/parser/catalog.rb, line 96 96: def onDocumentPrint(action) 97: 98: unless action.is_a?(Action::JavaScript) 99: raise TypeError, "An Action::JavaScript object must be passed." 100: end 101: 102: unless self.Catalog 103: raise InvalidPDF, "A catalog object must exist to add this action." 104: end 105: 106: self.Catalog.AA ||= CatalogAdditionalActions.new 107: self.Catalog.AA.WP = action 108: 109: end
Converts a logical PDF view into a physical view ready for writing.
# File sources/parser/pdf.rb, line 751 751: def physicalize 752: 753: # 754: # Indirect objects are added to the revision and assigned numbers. 755: # 756: def build(obj, revision, embedded = false) #:nodoc: 757: 758: # 759: # Finalize any subobjects before building the stream. 760: # 761: if obj.is_a?(ObjectStream) 762: obj.each { |subobj| 763: build(subobj, revision, true) 764: } 765: end 766: 767: obj.pre_build 768: 769: if obj.is_a?(Dictionary) or obj.is_a?(Array) 770: 771: obj.map! { |subobj| 772: if subobj.is_indirect? 773: if get_object(subobj.reference) 774: subobj.reference 775: else 776: ref = add_to_revision(subobj, revision) 777: build(subobj, revision) 778: ref 779: end 780: else 781: subobj 782: end 783: } 784: 785: obj.each { |subobj| 786: build(subobj, revision) 787: } 788: 789: end 790: 791: obj.post_build 792: 793: end 794: 795: all_indirect_objects.each { |obj, revision| 796: build(obj, revision) 797: } 798: 799: self 800: end
Compute and update XRef::Section for each Revision.
# File sources/parser/pdf.rb, line 625 625: def rebuildxrefs 626: 627: size = 0 628: startxref = @header.to_s.size 629: 630: @revisions.each { |revision| 631: 632: revision.body.each_value { |object| 633: startxref += object.to_s.size 634: } 635: 636: size += revision.body.size 637: revision.xreftable = buildxrefs(revision.body.values) 638: 639: revision.trailer ||= Trailer.new 640: revision.trailer.Size = size + 1 641: revision.trailer.startxref = startxref 642: 643: startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size 644: } 645: 646: self 647: end
Registers an object into a specific Names root dictionary.
root: | The root dictionary (see Names::Root) |
name: | The value name. |
value: | The value to associate with this name. |
# File sources/parser/catalog.rb, line 117 117: def register(root, name, value) 118: 119: if self.Catalog.Names.nil? 120: self.Catalog.Names = Names.new 121: end 122: 123: value.set_indirect(true) 124: 125: namesroot = self.Catalog.Names.send(root) 126: if namesroot.nil? 127: names = NameTreeNode.new({:Names => [] }) 128: self.Catalog.Names.send((root.id2name + "=").to_sym, (self << names)) 129: names.Names << name << value 130: else 131: namesroot.Names << name << value 132: end 133: 134: end
Remove last Revisions.
level: | The number of revisions to remove. |
# File sources/parser/pdf.rb, line 667 667: def remove_last_revision(level = 1) 668: 669: @revisions.pop(level) 670: 671: self 672: end
Saves the current file as its current filename.
# File sources/parser/pdf.rb, line 198 198: def save(filename = nil, params = {}) 199: 200: name = filename || @filename 201: fail "No filename specified for saving." unless name 202: 203: options = 204: { 205: :recompile => true, 206: } 207: options.update(params) 208: 209: fd = File.open(name, "w").binmode 210: 211: self.compile if options[:recompile] == true 212: bin = self.to_bin(options) 213: fd << bin 214: 215: fd.close 216: 217: self 218: end
Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.
revision: | The revision number to save. |
filename: | The path where to save this PDF. |
# File sources/parser/pdf.rb, line 243 243: def save_upto(revision, filename) 244: 245: fd = File.open(filename, "w").binmode 246: 247: fd << @header 248: 249: nrev = 0 250: while nrev < revision && nrev < @revisions.size 251: 252: fd << @revisions[nrev].body.values 253: fd << @revisions[nrev].xreftable 254: fd << @revisions[nrev].trailer 255: 256: nrev = nrev.succ 257: end 258: 259: fd.close 260: 261: self 262: end
Sets the current filename to the argument given, then save it.
filename: | The path where to save this PDF. |
# File sources/parser/pdf.rb, line 224 224: def saveas(filename, params = {}) 225: 226: if self.frozen? 227: params[:recompile] = params[:rebuildxrefs] = false 228: save(filename, params) 229: else 230: @filename = filename 231: save(filename, params) 232: end 233: 234: self 235: end
Sign the document with the given key and x509 certificate.
certificate: | The X509 certificate containing the public key. |
key: | The private key associated with the certificate. |
ca: | Optional CA certificates used to sign the user certificate. |
# File sources/parser/signature.rb, line 34 34: def sign(certificate, key, ca = [], annotation = nil, location = nil, contact = nil, reason = nil) 35: 36: unless certificate.is_a?(OpenSSL::X509::Certificate) 37: raise TypeError, "A OpenSSL::X509::Certificate object must be passed." 38: end 39: 40: unless key.is_a?(OpenSSL::PKey::RSA) 41: raise TypeError, "A OpenSSL::PKey::RSA object must be passed." 42: end 43: 44: unless ca.is_a?(::Array) 45: raise TypeError, "Expected an Array of CA certificate." 46: end 47: 48: unless annotation.nil? or annotation.is_a?(Annotation::Widget::Signature) 49: raise TypeError, "Expected a Annotation::Widget::Signature object." 50: end 51: 52: def signfield_size(certificate, key, ca = []) #;nodoc: 53: datatest = "abcdefghijklmnopqrstuvwxyz" 54: OpenSSL::PKCS7.sign(certificate, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der.size + 128 55: end 56: 57: digsig = Signature::DigitalSignature.new.set_indirect(true) 58: 59: if annotation.nil? 60: annotation = Annotation::Widget::Signature.new 61: annotation.Rect = Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0] 62: end 63: 64: annotation.V = digsig ; 65: add_field(annotation) 66: self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY 67: 68: digsig.Type = :Sig #:nodoc: 69: digsig.Contents = HexaString.new("\x00" * signfield_size(certificate, key, ca)) #:nodoc: 70: digsig.Filter = Name.new("Adobe.PPKMS") #:nodoc: 71: digsig.SubFilter = Name.new("adbe.pkcs7.detached") #:nodoc: 72: digsig.ByteRange = [0, 0, 0, 0] #:nodoc: 73: 74: digsig.Location = HexaString.new(location) if location 75: digsig.ContactInfo = HexaString.new(contact) if contact 76: digsig.Reason = HexaString.new(reason) if reason 77: 78: # 79: # Flattening the PDF to get file view. 80: # 81: self.compile 82: 83: # 84: # Creating an empty Xref table to compute signature byte range. 85: # 86: rebuild_dummy_xrefs 87: 88: sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset 89: 90: digsig.ByteRange[0] = 0 91: digsig.ByteRange[1] = sigoffset 92: digsig.ByteRange[2] = sigoffset + digsig.Contents.size 93: 94: digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2] 95: 96: # From that point the file size remains constant 97: 98: # 99: # Correct Xrefs variations caused by ByteRange modifications. 100: # 101: rebuildxrefs 102: 103: filedata = self.to_bin 104: signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]] 105: 106: signature = OpenSSL::PKCS7.sign(certificate, key, signable_data, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der 107: digsig.Contents[0, signature.size] = signature 108: 109: # 110: # No more modification are allowed after signing. 111: # 112: self.freeze 113: 114: end
Returns the final binary representation of the current document.
rebuildxrefs: | Computes xrefs while writing objects (default true). |
obfuscate: | Do some basic syntactic object obfuscation. |
# File sources/parser/pdf.rb, line 464 464: def to_bin(params = {}) 465: 466: has_objstm = self.indirect_objects.values.any?{|obj| obj.is_a?(ObjectStream)} 467: 468: options = 469: { 470: :rebuildxrefs => true, 471: :obfuscate => false, 472: :use_xrefstm => has_objstm, 473: :use_xreftable => (not has_objstm) 474: #todo linearize 475: } 476: options.update(params) 477: 478: # Reset to default params if no xrefs are chosen (hybrid files not supported yet) 479: if options[:use_xrefstm] == options[:use_xreftable] 480: options[:use_xrefstm] = has_objstm 481: options[:use_xreftable] = (not has_objstm) 482: end 483: 484: # Get trailer dictionary 485: trailer_info = get_trailer_info 486: if trailer_info.nil? 487: raise InvalidPDF, "No trailer information found" 488: end 489: trailer_dict = trailer_info.dictionary 490: 491: prev_xref_offset = nil 492: xrefstm_offset = nil 493: xreftable_offset = nil 494: 495: # Header 496: bin = "" 497: bin << @header.to_s 498: 499: # For each revision 500: @revisions.each do |rev| 501: 502: if options[:rebuildxrefs] == true 503: lastno_table, lastno_stm = 0, 0 504: brange_table, brange_stm = 0, 0 505: 506: xrefs_stm = [ XRef.new(0, XRef::LASTFREE, XRef::FREE) ] 507: xrefs_table = [ XRef.new(0, XRef::LASTFREE, XRef::FREE) ] 508: 509: if options[:use_xreftable] == true 510: xrefsection = XRef::Section.new 511: end 512: 513: if options[:use_xrefstm] == true 514: xrefstm = XRefStream.new 515: add_to_revision(xrefstm, rev) 516: end 517: end 518: 519: objset = rev.body.values 520: 521: objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm| 522: objset |= objstm.objects 523: end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true 524: 525: objset.sort # process objects in number order 526: 527: # For each object 528: objset.sort.each { |obj| 529: 530: if options[:rebuildxrefs] == true 531: 532: # Adding subsections if needed 533: if options[:use_xreftable] and (obj.no - lastno_table).abs > 1 534: xrefsection << XRef::Subsection.new(brange_table, xrefs_table) 535: 536: xrefs_table.clear 537: brange_table = obj.no 538: end 539: if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1 540: xrefs_stm.each do |xref| xrefstm << xref end 541: xrefstm.Index ||= [] 542: xrefstm.Index << brange_stm << xrefs_stm.length 543: 544: xrefs_stm.clear 545: brange_stm = obj.no 546: end 547: 548: # Process embedded objects 549: if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream) 550: index = obj.parent.index(obj.no) 551: 552: xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index) 553: 554: lastno_stm = obj.no 555: else 556: xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED) 557: xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED) 558: 559: lastno_table = lastno_stm = obj.no 560: end 561: 562: end 563: 564: if obj.parent == obj or not obj.parent.is_a?(ObjectStream) 565: 566: # Finalize XRefStm 567: if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm 568: xrefstm_offset = bin.size 569: 570: xrefs_stm.each do |xref| xrefstm << xref end 571: xrefstm.Index ||= [] 572: xrefstm.Index << brange_stm << xrefs_stm.size 573: 574: xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict) 575: xrefstm.Prev = prev_xref_offset 576: 577: rev.trailer.dictionary = nil 578: 579: add_to_revision(xrefstm, rev) 580: 581: xrefstm.pre_build 582: xrefstm.post_build 583: end 584: 585: bin << (options[:obfuscate] == true ? obj.to_obfuscated_str : obj.to_s) 586: end 587: } 588: 589: rev.trailer ||= Trailer.new 590: 591: # XRef table 592: if options[:rebuildxrefs] == true 593: 594: if options[:use_xreftable] == true 595: table_offset = bin.size 596: 597: xrefsection << XRef::Subsection.new(brange_table, xrefs_table) 598: rev.xreftable = xrefsection 599: 600: rev.trailer.dictionary = trailer_dict 601: rev.trailer.Size = objset.size + 1 602: rev.trailer.Prev = prev_xref_offset 603: 604: rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true 605: end 606: 607: startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset 608: rev.trailer.startxref = prev_xref_offset = startxref 609: 610: end # end each rev 611: 612: # Trailer 613: 614: bin << rev.xreftable.to_s if options[:use_xreftable] == true 615: bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s) 616: 617: end 618: 619: bin 620: end