- Insert ribbon menu
- Object (in Text)
- From file tab
- (Browse to file)
0x2 0x0 | header |
string\0 | file name |
string\0 | file path |
0x0 0x0 0x3 0x0 | (native header ?) |
int | temp file path length |
string\0 | temp file path |
int | content length |
bytes | content |
int | temp file path utf16 length |
bytes | temp file path utf16 |
int | file name utf16 length |
bytes | file name utf16 |
int | file path utf16 length |
bytes | file path utf16 |
In this sample program, I load some excel workbook and for each embedded ole object, I display its name and its content :
[STAThread] static void Main(string[] args) { var excel = new Application(); try { Workbook workbook = excel.Workbooks.Open(@"D:\Classeur1.xlsx"); foreach (Worksheet worksheet in workbook.Worksheets) { foreach (OLEObject ole in worksheet.OLEObjects()) { Console.WriteLine("name : {0}", ole.Name); if (ole.progID == "Package") { string content = ole.GetContent(); if (content != null) Console.WriteLine(content); } } } workbook.Close(); } finally { excel.Quit(); } Console.Write("Press a key..."); Console.Read(); }The progID has "Package" value when the embedded content is not standard OLE content. The GetContent extension method gets a MemoryStream from the OLEObject and loads the content from the stream :
static class OLEExtensions { public static string GetContent(this OLEObject ole) { ole.Copy(); System.Windows.Forms.IDataObject data = System.Windows.Forms.Clipboard.GetDataObject(); object obj = data.GetData("Native"); System.Windows.Forms.Clipboard.SetDataObject(""); var ms = obj as MemoryStream; if (ms != null) return ms.GetOLEContent(); return null; } }We copy the OLE object to the clipboard to get the MemoryStream. The STAThread attribute is required in Main method to avoid some NullReferenceException when calling GetData method. The GetOLEContent extension method extracts the content from the stream thanks to the reverse engineered stream structure :
static class OLEStreamExtensions { public static int ReadHeader(this MemoryStream ms) { var header = new byte[2]; int read = ms.Read(header, 0, header.Length); if (read != header.Length) throw new FormatException("End of stream while reading header"); if (header[0] != 2 || header[1] != 0) throw new FormatException("Bad header"); return read; } public static string ReadString(this MemoryStream ms) { var sb = new StringBuilder(); while (true) { int b = ms.ReadByte(); if (b == -1) throw new FormatException("End of stream while reading string"); if (b == 0) return sb.ToString(); sb.Append((char)b); } } public static int ReadInt(this MemoryStream ms) { var bytes = new byte[4]; int read = ms.Read(bytes, 0, bytes.Length); if (read != bytes.Length) throw new FormatException("End of stream while reading int"); return BitConverter.ToInt32(bytes, 0); } public static byte[] ReadBytes(this MemoryStream ms, int count) { var bytes = new byte[count]; int read = ms.Read(bytes, 0, count); if (read != count) throw new FormatException("End of stream while reading bytes"); return bytes; } public static string GetOLEContent(this MemoryStream ms) { ms.ReadHeader(); string name = ms.ReadString(); string path = ms.ReadString(); int reserved = ms.ReadInt(); if (reserved != 0x30000) throw new FormatException(string.Format("Unexpected reserved bytes : got {0} but expected {1}", reserved.ToString("x"), 0x30000.ToString("x"))); int tempLength = ms.ReadInt(); string tempPath = ms.ReadString(); if (tempPath.Length + 1 != tempLength) throw new FormatException(string.Format("Mismatch between temp length {0} and temp full path length {1}", tempLength, tempPath.Length + 1)); int contentLength = ms.ReadInt(); byte[] content = ms.ReadBytes(contentLength); int delta = sizeof(int) * 3 + (name.Length + path.Length + tempPath.Length) * 2; if (ms.Length != ms.Position + delta) throw new FormatException("Unexpected end of file"); return UTF8Encoding.UTF8.GetString(content); } }This code uses Excel but it might work with any Office document (Word, Powerpoint...). Of course, you should have installed the PIA. I have validated this code with xml and txt files. I use Excel 2007 SP3 MSO.
Edit : You can refactor this code to extract the name and the bytes of each embedded object to be able to save the contents to files :
class OLEContent { #region Fields private readonly string name; private readonly byte[] content; #endregion public OLEContent(string name, byte[] content) { this.name = name; this.content = content; } public string Name { get { return name; } } public byte[] Content { get { return content; } } } ... public static OLEContent GetOLEContent(this MemoryStream ms) { ... return new OLEContent(name, content); } ... OLEContent content = ole.GetContent(); if (content != null) File.WriteAllBytes(Path.Combine(tempPath, content.Name), content.Content); ...This new version can also save images (like jpg) to disk.
1 comment:
Great post! You saved my life.
Post a Comment