Msgxtractr Save

:card_index: Extract contents from Outlook '.msg' files in R

Project README

Build
Status AppVeyor Build
Status codecov

msgxtractr : Read Outlook ‘.msg’ Files

‘Microsoft’ ‘Outlook’ messages can be saved in ‘.msg’ files. Tools are provided that enable extraction of metadata, envelope, headers, body and attachments from these files.

The following functions are implemented:

  • read_msg: Read in an Outlook ‘.msg’ file
  • save_attachments: Save all attachments from a ‘msg’ object
  • tidy_msg: Turn a ‘msg’ object into a ‘tibble’

Installation

devtools::install_github("hrbrmstr/msgxtractr")

Usage

library(msgxtractr)

# current version
packageVersion("msgxtractr")
## [1] '0.3.0'
str(msg1 <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr")))
## List of 8
##  $ headers         : tibble [1 × 18] (S3: tbl_df/tbl/data.frame)
##   ..$ Return-path               : chr "<[email protected]>"
##   ..$ Received                  :List of 1
##   .. ..$ : chr [1:4] "from st11p00mm-smtpin007.mac.com ([17.172.84.240])\nby ms06561.mac.com (Oracle Communications Messaging Server "| __truncated__ "from mail-vc0-f182.google.com ([209.85.220.182])\nby st11p00mm-smtpin007.mac.com\n(Oracle Communications Messag"| __truncated__ "by mail-vc0-f182.google.com with SMTP id ie18so3484487vcb.13 for\n<[email protected]>; Mon, 18 Nov 2013 00:26:25 -0800 (PST)" "by 10.58.207.196 with HTTP; Mon, 18 Nov 2013 00:26:24 -0800 (PST)"
##   ..$ Original-recipient        : chr "rfc822;[email protected]"
##   ..$ Received-SPF              : chr "pass (st11p00mm-smtpin006.mac.com: domain of [email protected]\ndesignates 209.85.220.182 as permitted sender)\"| __truncated__
##   ..$ DKIM-Signature            : chr "v=1; a=rsa-sha256; c=relaxed/relaxed;        d=gmail.com;\ns=20120113; h=mime-version:date:message-id:subject:f"| __truncated__
##   ..$ MIME-version              : chr "1.0"
##   ..$ X-Received                : chr "by 10.221.47.193 with SMTP id ut1mr14470624vcb.8.1384763184960;\nMon, 18 Nov 2013 00:26:24 -0800 (PST)"
##   ..$ Date                      : chr "Mon, 18 Nov 2013 10:26:24 +0200"
##   ..$ Message-id                : chr "<CADtJ4eNjQSkGcBtVteCiTF+YFG89+AcHxK3QZ=-Mt48xygkvdQ@mail.gmail.com>"
##   ..$ Subject                   : chr "Test for TIF files"
##   ..$ From                      : chr "Brian Zhou <[email protected]>"
##   ..$ To                        : chr "[email protected]"
##   ..$ Cc                        : chr "Brian Zhou <[email protected]>"
##   ..$ Content-type              : chr "multipart/mixed; boundary=001a113392ecbd7a5404eb6f4d6a"
##   ..$ Authentication-results    : chr "st11p00mm-smtpin007.mac.com; dkim=pass\nreason=\"2048-bit key\" header.d=gmail.com [email protected]\nheader."| __truncated__
##   ..$ x-icloud-spam-score       : chr "33322\nf=gmail.com;e=gmail.com;pp=ham;spf=pass;dkim=pass;wl=absent;pwl=absent"
##   ..$ X-Proofpoint-Virus-Version: chr "vendor=fsecure\nengine=2.50.10432:5.10.8794,1.0.14,0.0.0000\ndefinitions=2013-11-18_02:2013-11-18,2013-11-17,19"| __truncated__
##   ..$ X-Proofpoint-Spam-Details : chr "rule=notspam policy=default score=0 spamscore=0\nsuspectscore=0 phishscore=0 bulkscore=0 adultscore=0 classifie"| __truncated__
##  $ sender          :List of 2
##   ..$ sender_email: chr "[email protected]"
##   ..$ sender_name : chr "Brian Zhou"
##  $ recipients      :List of 2
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##  $ subject         : chr "Test for TIF files"
##  $ body            :List of 2
##   ..$ text: chr "This is a test email to experiment with the MS Outlook MSG Extractor\r\n\r\n\r\n-- \r\n\r\n\r\nKind regards\r\n"| __truncated__
##   ..$ html: NULL
##  $ attachments     :List of 2
##   ..$ :List of 4
##   .. ..$ filename     : chr "importOl.tif"
##   .. ..$ long_filename: chr "import OleFileIO.tif"
##   .. ..$ mime         : chr "image/tiff"
##   .. ..$ content      : raw [1:969674] 49 49 2a 00 ...
##   ..$ :List of 4
##   .. ..$ filename     : chr "raisedva.tif"
##   .. ..$ long_filename: chr "raised value error.tif"
##   .. ..$ mime         : chr "image/tiff"
##   .. ..$ content      : raw [1:1033142] 49 49 2a 00 ...
##  $ display_envelope:List of 2
##   ..$ display_cc: chr "Brian Zhou"
##   ..$ display_to: chr "[email protected]"
##  $ times           :List of 3
##   ..$ creation_time: NULL
##   ..$ last_mod_time: NULL
##   ..$ last_mod_name: NULL
##  - attr(*, "class")= chr "msg"
print(msg1)
## Mon, 18 Nov 2013 10:26:24 +0200
## From: Brian Zhou <[email protected]>
## To: [email protected]
## Subject: Test for TIF files
## Attachments: 2
str(msg2 <- read_msg(system.file("extdata/TestMessage-ansi.msg", package="msgxtractr")))
## List of 8
##  $ headers         : NULL
##  $ sender          : list()
##  $ recipients      :List of 3
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : NULL
##   .. ..$ email_address: NULL
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : NULL
##   .. ..$ email_address: NULL
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : NULL
##   .. ..$ email_address: NULL
##  $ subject         : NULL
##  $ body            :List of 2
##   ..$ text: NULL
##   ..$ html: NULL
##  $ attachments     :List of 1
##   ..$ :List of 4
##   .. ..$ filename     : NULL
##   .. ..$ long_filename: NULL
##   .. ..$ mime         : NULL
##   .. ..$ content      : raw [1:10934] 50 4b 03 04 ...
##  $ display_envelope: list()
##  $ times           :List of 3
##   ..$ creation_time: NULL
##   ..$ last_mod_time: NULL
##   ..$ last_mod_name: NULL
##  - attr(*, "class")= chr "msg"
str(msg3 <- read_msg(system.file("extdata/TestMessage-default.msg", package="msgxtractr")))
## List of 8
##  $ headers         : NULL
##  $ sender          :List of 2
##   ..$ sender_email: chr "[email protected]"
##   ..$ sender_name : chr "Sender"
##  $ recipients      :List of 3
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##  $ subject         : chr "New Message!"
##  $ body            :List of 2
##   ..$ text: chr "This is some bold html!"
##   ..$ html: chr "<HTML><HEAD>\r\n<META content=\"text/html; charset=UTF-8\" http-equiv=Content-Type>\r\n<META name=GENERATOR con"| __truncated__
##  $ attachments     :List of 1
##   ..$ :List of 4
##   .. ..$ filename     : chr "TestAttachment1.xlsx"
##   .. ..$ long_filename: chr "TestAttachment1.xlsx"
##   .. ..$ mime         : NULL
##   .. ..$ content      : raw [1:10934] 50 4b 03 04 ...
##  $ display_envelope:List of 2
##   ..$ display_cc: chr "CC1"
##   ..$ display_to: chr "Recipient 1; Recipient 2"
##  $ times           :List of 3
##   ..$ creation_time: NULL
##   ..$ last_mod_time: NULL
##   ..$ last_mod_name: NULL
##  - attr(*, "class")= chr "msg"
str(msg4 <- read_msg(system.file("extdata/TestMessage-unicode.msg", package="msgxtractr")))
## List of 8
##  $ headers         : NULL
##  $ sender          :List of 2
##   ..$ sender_email: chr "[email protected]"
##   ..$ sender_name : chr "Sender"
##  $ recipients      :List of 3
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##   ..$ :List of 3
##   .. ..$ display_name : NULL
##   .. ..$ address_type : chr "SMTP"
##   .. ..$ email_address: chr "[email protected]"
##  $ subject         : chr "New Message!"
##  $ body            :List of 2
##   ..$ text: chr "This is some bold html!"
##   ..$ html: chr "<HTML><HEAD>\r\n<META content=\"text/html; charset=UTF-8\" http-equiv=Content-Type>\r\n<META name=GENERATOR con"| __truncated__
##  $ attachments     :List of 1
##   ..$ :List of 4
##   .. ..$ filename     : chr "TestAttachment1.xlsx"
##   .. ..$ long_filename: chr "TestAttachment1.xlsx"
##   .. ..$ mime         : NULL
##   .. ..$ content      : raw [1:10934] 50 4b 03 04 ...
##  $ display_envelope:List of 2
##   ..$ display_cc: chr "CC1"
##   ..$ display_to: chr "Recipient 1; Recipient 2"
##  $ times           :List of 3
##   ..$ creation_time: NULL
##   ..$ last_mod_time: NULL
##   ..$ last_mod_name: NULL
##  - attr(*, "class")= chr "msg"
str(tidy_msg(msg1), 2)
## tibble [1 × 8] (S3: tbl_df/tbl/data.frame)
##  $ headers         :List of 1
##  $ sender          :List of 1
##  $ recipients      :List of 1
##  $ subject         : chr "Test for TIF files"
##  $ body            :List of 1
##  $ attachments     :List of 1
##  $ display_envelope:List of 1
##  $ times           :List of 1
str(tidy_msg(msg2), 2)
## tibble [1 × 4] (S3: tbl_df/tbl/data.frame)
##  $ recipients :List of 1
##  $ body       :List of 1
##  $ attachments:List of 1
##  $ times      :List of 1
str(tidy_msg(msg3), 2)
## tibble [1 × 7] (S3: tbl_df/tbl/data.frame)
##  $ sender          :List of 1
##  $ recipients      :List of 1
##  $ subject         : chr "New Message!"
##  $ body            :List of 1
##  $ attachments     :List of 1
##  $ display_envelope:List of 1
##  $ times           :List of 1
str(tidy_msg(msg4), 2)
## tibble [1 × 7] (S3: tbl_df/tbl/data.frame)
##  $ sender          :List of 1
##  $ recipients      :List of 1
##  $ subject         : chr "New Message!"
##  $ body            :List of 1
##  $ attachments     :List of 1
##  $ display_envelope:List of 1
##  $ times           :List of 1

Code of Conduct

Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.

Open Source Agenda is not affiliated with "Msgxtractr" Project. README Source: hrbrmstr/msgxtractr
Stars
48
Open Issues
7
Last Commit
1 month ago
Repository

Open Source Agenda Badge

Open Source Agenda Rating