Skip to content

Commit 8a07434

Browse files
committed
added plot method
1 parent cedcb8c commit 8a07434

8 files changed

+226
-29
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role =
66
Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
77
Description: Calculate the formality of text based on part of speech tags.
88
Depends: R (>= 3.2.2)
9-
Imports: data.table, tagger
9+
Imports: data.table, ggplot2, gridExtra, grid, tagger
1010
Suggests: testthat
1111
Date: 2015-10-13
1212
License: GPL-2

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22

33
S3method(formality,Formality)
44
S3method(formality,default)
5+
S3method(plot,Formality)
56
export(formality)
67
importFrom(data.table,":=")

R/formality.R

+134-4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
#' data(presidential_debates_2012)
4040
#' (form1 <- with(presidential_debates_2012, formality(dialogue, person)))
4141
#' with(presidential_debates_2012, formality(form1, list(person, time))) #recycle form 1 for speed
42+
#'
43+
#' plot(form1)
44+
#' plot(with(presidential_debates_2012, formality(form1, list(person, time))))
4245
formality <- function(text.var, grouping.var = NULL, order.by.formality = TRUE, ...){
4346

4447
UseMethod("formality")
@@ -83,8 +86,8 @@ formality.default <- function(text.var, grouping.var = NULL, order.by.formality
8386
}
8487
}
8588

86-
formal <- c('noun', 'adjective', 'preposition', 'article')
87-
contextual <- c('pronoun', 'verb', 'adverb', 'interjection')
89+
formal <- c('noun', 'preposition', 'adjective', 'article')
90+
contextual <- c('verb', 'pronoun', 'adverb', 'interjection')
8891

8992
## in other version this will be extracted
9093
#=============================================
@@ -166,8 +169,8 @@ formality.Formality <- function(text.var, grouping.var = NULL, order.by.formalit
166169
}
167170
}
168171

169-
formal <- c('noun', 'adjective', 'preposition', 'article')
170-
contextual <- c('pronoun', 'verb', 'adverb', 'interjection')
172+
formal <- c('noun', 'preposition', 'adjective', 'article')
173+
contextual <- c('verb', 'pronoun', 'adverb', 'interjection')
171174

172175
counts <- attributes(text.var)[["counts"]][["counts"]]
173176

@@ -191,3 +194,130 @@ formality.Formality <- function(text.var, grouping.var = NULL, order.by.formalit
191194
out
192195

193196
}
197+
198+
199+
200+
#' Plots a Formality Object
201+
#'
202+
#' Plots a Formality object.
203+
#'
204+
#' @param x The Formality object
205+
#' @param plot logical. If \code{TRUE} the output is plotted.
206+
#' @param \ldots ignored.
207+
#' @return Returns a list of the three \pkg{ggplot2} objects that make the
208+
#' combined plot.
209+
#' @importFrom data.table :=
210+
#' @method plot Formality
211+
#' @export
212+
plot.Formality <- function(x, plot = TRUE, ...){
213+
214+
group.vars <- n <- warn <- contextual <- formal <- type <- NULL
215+
216+
grps <- attr(x, "group.var")
217+
pos <- attr(x, "pos.vars")
218+
219+
## Prepare the pos data
220+
express1 <- paste0("lapply(list(", paste(pos, collapse=","), "), function(y) as.numeric(y/n))")
221+
express2 <- paste0("paste(", paste(grps, collapse=", "), ", sep = \"_\")")
222+
pos_dat <- x[, c(grps, pos, "n"), with=FALSE][,
223+
(pos) := eval(parse(text=express1))][,
224+
'group.vars' := eval(parse(text=express2))][,
225+
'group.vars' := factor(group.vars, levels=rev(group.vars))][,
226+
c(pos, "n", "group.vars"), with = FALSE]
227+
228+
pos_dat_long <- data.table::melt(pos_dat, id = c("group.vars", "n"),
229+
variable.name = "pos", value.name = "proportion")[,
230+
pos := factor(pos, levels = attr(x, "pos.vars"))]
231+
232+
## prepare the formality data
233+
form_dat <- x[, c(grps, "n", "F"), with=FALSE][,
234+
'group.vars' := eval(parse(text=express2))][,
235+
'group.vars' := factor(group.vars, levels=rev(group.vars))][,
236+
c("group.vars", "n", "F"), with = FALSE][,
237+
warn := ifelse(n > 300, FALSE, TRUE)]
238+
239+
## prepare the contectual/formal data
240+
con_form_dat <- x[, c(grps, "contextual", "formal", "n"), with=FALSE][,
241+
(c("contextual", "formal")) := list(contextual/n, formal/n)][,
242+
'group.vars' := eval(parse(text=express2))][,
243+
'group.vars' := factor(group.vars, levels=rev(group.vars))][,
244+
c("contextual", "formal", "n", "group.vars"), with = FALSE]
245+
246+
con_form_long <- data.table::melt(con_form_dat, id = c("group.vars", "n"),
247+
variable.name = "type", value.name = "proportion")[,
248+
type := factor(type, levels = c("formal", "contextual"))]
249+
250+
con_form_plot <- ggplot2::ggplot(con_form_long,
251+
ggplot2::aes_string(x = "group.vars", weight = "proportion", fill ="type")) +
252+
ggplot2::geom_bar() +
253+
ggplot2::coord_flip() +
254+
ggplot2::xlab(NULL) +
255+
ggplot2::ylab("") +
256+
ggplot2::theme_bw() +
257+
ggplot2::theme(
258+
panel.grid = ggplot2::element_blank(),
259+
#legend.position="bottom",
260+
legend.title = ggplot2::element_blank(),
261+
panel.border = ggplot2::element_blank(),
262+
axis.line = ggplot2::element_line(color="grey70")
263+
) +
264+
ggplot2::scale_y_continuous(labels=function(x) paste0(round(x*100, 0), "%"),
265+
expand = c(0,0)) +
266+
ggplot2::scale_fill_manual(values=pals[c(2, 6), 2])
267+
268+
form_plot <- ggplot2::ggplot(form_dat,
269+
ggplot2::aes_string(y = "group.vars", x = "F")) +
270+
ggplot2::geom_point(ggplot2::aes_string(size="n"), alpha=.22) +
271+
ggplot2::scale_size(range=c(1, 7), name = "Text\nLength") +
272+
ggplot2::geom_point(ggplot2::aes_string(color="warn"), size=1.5) +
273+
ggplot2::scale_color_manual(values=c("black", "red"), guide=FALSE) +
274+
ggplot2::ylab(NULL) +
275+
ggplot2::xlab("F Measure") +
276+
ggplot2::theme_bw() +
277+
ggplot2::theme(
278+
#legend.position="bottom",
279+
axis.title.x = ggplot2::element_text(size=11),
280+
#legend.title = ggplot2::element_blank(),
281+
panel.border = ggplot2::element_blank(),
282+
axis.line = ggplot2::element_line(color="grey70")
283+
)
284+
285+
pos_heat_plot <- ggplot2::ggplot(pos_dat_long,
286+
ggplot2::aes_string(y = "group.vars", x = "pos", fill="proportion")) +
287+
ggplot2::geom_tile() +
288+
ggplot2::scale_fill_gradient(
289+
labels=function(x) paste0(round(x*100, 0), "%"),
290+
high="#BF812D",
291+
low="white",
292+
name = ggplot2::element_blank()
293+
)+
294+
ggplot2::ylab(NULL) +
295+
ggplot2::xlab("Part of Speech") +
296+
ggplot2::theme_bw() +
297+
ggplot2::theme(
298+
panel.grid = ggplot2::element_blank(),
299+
#legend.position="bottom",
300+
axis.title.x = ggplot2::element_text(size=11),
301+
legend.title = ggplot2::element_blank(),
302+
panel.border = ggplot2::element_rect(color="grey88")
303+
) +
304+
ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = .5, barheight = 10)) #+
305+
#ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = 14, barheight = .5))
306+
307+
plotout1 <- gridExtra::arrangeGrob(con_form_plot, form_plot,
308+
widths = grid::unit(c(.5, .5), "native"), ncol=2)
309+
310+
plotout2 <- gridExtra::arrangeGrob(plotout1, pos_heat_plot, ncol=1)
311+
if (isTRUE(plot)) gridExtra::grid.arrange(plotout2)
312+
return(invisible(list(formality = form_plot, contextual_formal = con_form_plot, pos = pos_heat_plot)))
313+
}
314+
315+
316+
pals <- structure(list(pos = c("noun", "adjective", "preposition", "article",
317+
"pronoun", "verb", "adverb", "interjection"), cols = c("#8C510A",
318+
"#BF812D", "#DFC27D", "#F6E8C3", "#C7EAE5", "#80CDC1", "#35978F",
319+
"#01665E")), .Names = c("pos", "cols"), row.names = c(NA, -8L
320+
), class = "data.frame")
321+
322+
323+

README.Rmd

+25-2
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,26 @@ output:
66
toc: true
77
---
88

9-
```{r, echo=FALSE}
9+
```{r, echo=FALSE, message=FALSE, warning=FALSE}
10+
library(knitr)
1011
desc <- suppressWarnings(readLines("DESCRIPTION"))
1112
regex <- "(^Version:\\s+)(\\d+\\.\\d+\\.\\d+)"
1213
loc <- grep(regex, desc)
1314
ver <- gsub(regex, "\\2", desc[loc])
1415
verbadge <- sprintf('<a href="https://img.shields.io/badge/Version-%s-orange.svg"><img src="https://img.shields.io/badge/Version-%s-orange.svg" alt="Version"/></a></p>', ver, ver)
1516
````
1617
17-
[![Project Status: Wip - Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](http://www.repostatus.org/badges/0.1.0/wip.svg)](http://www.repostatus.org/#wip)
18+
```{r, echo=FALSE}
19+
knit_hooks$set(htmlcap = function(before, options, envir) {
20+
if(!before) {
21+
paste('<p class="caption"><b><em>',options$htmlcap,"</em></b></p>",sep="")
22+
}
23+
})
24+
knitr::opts_knit$set(self.contained = TRUE, cache = FALSE)
25+
knitr::opts_chunk$set(fig.path = "inst/figure/")
26+
```
27+
28+
[![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/0.1.0/active.svg)](http://www.repostatus.org/#active)
1829
[![Build Status](https://travis-ci.org/trinker/formality.svg?branch=master)](https://travis-ci.org/trinker/formality)
1930
[![Coverage Status](https://coveralls.io/repos/trinker/formality/badge.svg?branch=master)](https://coveralls.io/r/trinker/formality?branch=master)
2031
`r verbadge`
@@ -103,4 +114,16 @@ This will take ~20 seconds because of the part of speech tagging that must be un
103114
with(presidential_debates_2012, formality(form1, list(time, person)))
104115
```
105116

117+
## Plotting
118+
119+
The generic `plot` function provides three views of the data:
106120

121+
1. A filled bar plot of formal vs. contextual usage
122+
2. A dotplot of formality\*\*
123+
3. A heatmap of the usage of the parts of speech used to calculate the formality score
124+
125+
\*\****Note*** *red dot in center is a warning of less than 300 words*
126+
127+
```{r}
128+
plot(form1)
129+
````

README.md

+39-22
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ formality
22
============
33

44

5-
[![Project Status: Wip - Initial development is in progress, but there
6-
has not yet been a stable, usable release suitable for the
7-
public.](http://www.repostatus.org/badges/0.1.0/wip.svg)](http://www.repostatus.org/#wip)
5+
[![Project Status: Active - The project has reached a stable, usable
6+
state and is being actively
7+
developed.](http://www.repostatus.org/badges/0.1.0/active.svg)](http://www.repostatus.org/#active)
88
[![Build
99
Status](https://travis-ci.org/trinker/formality.svg?branch=master)](https://travis-ci.org/trinker/formality)
1010
[![Coverage
@@ -33,6 +33,7 @@ Table of Contents
3333
- [Load the Tools/Data](#load-the-toolsdata)
3434
- [Assessing Formality](#assessing-formality)
3535
- [Recycling the First Run](#recycling-the-first-run)
36+
- [Plotting](#plotting)
3637

3738
Formality Equation
3839
============
@@ -126,13 +127,13 @@ smaller text Heylighen & Dewaele (2002) state:
126127
form1 <- with(presidential_debates_2012, formality(dialogue, person))
127128
form1
128129

129-
## person noun adjective preposition article pronoun verb adverb
130-
## 1: QUESTION 155 70 91 38 77 112 26
131-
## 2: LEHRER 182 93 104 62 101 164 48
132-
## 3: SCHIEFFER 347 176 209 102 211 342 69
133-
## 4: ROMNEY 4406 2346 3178 1396 2490 4676 1315
134-
## 5: OBAMA 3993 1935 2909 1070 2418 4593 1398
135-
## 6: CROWLEY 387 135 269 104 249 405 134
130+
## person noun preposition adjective article verb pronoun adverb
131+
## 1: QUESTION 155 91 70 38 112 77 26
132+
## 2: LEHRER 182 104 93 62 164 101 48
133+
## 3: SCHIEFFER 347 209 176 102 342 211 69
134+
## 4: ROMNEY 4406 3178 2346 1396 4676 2490 1315
135+
## 5: OBAMA 3993 2909 1935 1070 4593 2418 1398
136+
## 6: CROWLEY 387 269 135 104 405 249 134
136137
## interjection formal contextual n F
137138
## 1: 4 354 219 573 61.78010
138139
## 2: 8 441 321 762 57.87402
@@ -150,17 +151,17 @@ time to a fraction of the first run.
150151

151152
with(presidential_debates_2012, formality(form1, list(time, person)))
152153

153-
## time person noun adjective preposition article pronoun verb
154-
## 1: time 2 QUESTION 155 70 91 38 77 112
155-
## 2: time 1 LEHRER 182 93 104 62 101 164
156-
## 3: time 1 ROMNEY 950 483 642 286 504 978
157-
## 4: time 3 ROMNEY 1766 958 1388 617 1029 1920
158-
## 5: time 3 SCHIEFFER 347 176 209 102 211 342
159-
## 6: time 2 ROMNEY 1690 905 1148 493 957 1778
160-
## 7: time 3 OBAMA 1546 741 1185 432 973 1799
161-
## 8: time 1 OBAMA 792 357 579 219 452 925
162-
## 9: time 2 OBAMA 1655 837 1145 419 993 1869
163-
## 10: time 2 CROWLEY 387 135 269 104 249 405
154+
## time person noun preposition adjective article verb pronoun
155+
## 1: time 2 QUESTION 155 91 70 38 112 77
156+
## 2: time 1 LEHRER 182 104 93 62 164 101
157+
## 3: time 1 ROMNEY 950 642 483 286 978 504
158+
## 4: time 3 ROMNEY 1766 1388 958 617 1920 1029
159+
## 5: time 3 SCHIEFFER 347 209 176 102 342 211
160+
## 6: time 2 ROMNEY 1690 1148 905 493 1778 957
161+
## 7: time 3 OBAMA 1546 1185 741 432 1799 973
162+
## 8: time 1 OBAMA 792 579 357 219 925 452
163+
## 9: time 2 OBAMA 1655 1145 837 419 1869 993
164+
## 10: time 2 CROWLEY 387 269 135 104 405 249
164165
## adverb interjection formal contextual n F
165166
## 1: 26 4 354 219 573 61.78010
166167
## 2: 48 8 441 321 762 57.87402
@@ -171,4 +172,20 @@ time to a fraction of the first run.
171172
## 7: 522 4 3904 3298 7202 54.20716
172173
## 8: 281 2 1947 1660 3607 53.97838
173174
## 9: 595 7 4056 3464 7520 53.93617
174-
## 10: 134 0 895 788 1683 53.17885
175+
## 10: 134 0 895 788 1683 53.17885
176+
177+
Plotting
178+
--------
179+
180+
The generic `plot` function provides three views of the data:
181+
182+
1. A filled bar plot of formal vs. contextual usage
183+
2. A dotplot of formality\*\*
184+
3. A heatmap of the usage of the parts of speech used to calculate the
185+
formality score
186+
187+
\*\****Note*** *red dot in center is a warning of less than 300 words*
188+
189+
plot(form1)
190+
191+
![](inst/figure/unnamed-chunk-6-1.png)

inst/figure/unnamed-chunk-6-1.png

18.1 KB
Loading

man/formality.Rd

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ purposes of illustration" (p. 24).
4949
data(presidential_debates_2012)
5050
(form1 <- with(presidential_debates_2012, formality(dialogue, person)))
5151
with(presidential_debates_2012, formality(form1, list(person, time))) #recycle form 1 for speed
52+
53+
plot(form1)
54+
plot(with(presidential_debates_2012, formality(form1, list(person, time))))
5255
}
5356
\references{
5457
Heylighen, F. (1999). Advantages and limitations of formal expression. doi:10.1023/A:1009686703349 \cr \cr

man/plot.Formality.Rd

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
% Generated by roxygen2 (4.1.1): do not edit by hand
2+
% Please edit documentation in R/formality.R
3+
\name{plot.Formality}
4+
\alias{plot.Formality}
5+
\title{Plots a Formality Object}
6+
\usage{
7+
\method{plot}{Formality}(x, plot = TRUE, ...)
8+
}
9+
\arguments{
10+
\item{x}{The Formality object}
11+
12+
\item{plot}{logical. If \code{TRUE} the output is plotted.}
13+
14+
\item{\ldots}{ignored.}
15+
}
16+
\value{
17+
Returns a list of the three \pkg{ggplot2} objects that make the
18+
combined plot.
19+
}
20+
\description{
21+
Plots a Formality object.
22+
}
23+

0 commit comments

Comments
 (0)