Nina
0
Q:

convert word content to excel in R

# Load readtext package
library(readtext)
0
## Read in Word data (.docx)
readtext(paste0(DATA_DIR, "/word/*.docx"))
## readtext object consisting of 2 documents and 0 docvars.
## # Description: df[,2] [2 × 2]
##   doc_id                      text               
##   <chr>                       <chr>              
## 1 UK_2015_EccentricParty.docx "\"The Eccent\"..."
## 2 UK_2015_LoonyParty.docx     "\"The Offici\"..."
0
sample_text_b <- "The quick brown fox named Seamus 
- 1 - 
jumps over the lazy dog also named Seamus, with 
- 2 - 
the newspaper from a boy named quick Seamus, in his mouth. 
- 33 - 
The quicker brown fox jumped over 2 lazy dogs."

sample_text_b
## [1] "The quick brown fox named Seamus \n- 1 - \njumps over the lazy dog also named Seamus, with \n- 2 - \nthe newspaper from a boy named quick Seamus, in his mouth. \n- 33 - \nThe quicker brown fox jumped over 2 lazy dogs."

sample_text_b2 <- unlist(stri_split_fixed(sample_text_b, '\n'), use.names = FALSE)
sample_text_b2 <- stri_replace_all_regex(sample_text_b2, "[-] \\d* [-]", "")
sample_text_b2 <- stri_trim_both(sample_text_b2)
sample_text_b2 <- sample_text_b2[sample_text_b2 != '']
stri_paste(sample_text_b2, collapse = '\n')
## [1] "The quick brown fox named Seamus\njumps over the lazy dog also named Seamus, with\nthe newspaper from a boy named quick Seamus, in his mouth.\nThe quicker brown fox jumped over 2 lazy dogs."
0
# Make some text with page numbers
sample_text_a <- "The quick brown fox named Seamus jumps over the lazy dog also named Seamus, 
page 1 
with the newspaper from a boy named quick Seamus, in his mouth.
page 2
The quicker brown fox jumped over 2 lazy dogs."

sample_text_a
## [1] "The quick brown fox named Seamus jumps over the lazy dog also named Seamus, \npage 1 \nwith the newspaper from a boy named quick Seamus, in his mouth.\npage 2\nThe quicker brown fox jumped over 2 lazy dogs."

# Remove "page" and respective digit
sample_text_a2 <- unlist(stri_split_fixed(sample_text_a, '\n'), use.names = FALSE)
sample_text_a2 <- stri_replace_all_regex(sample_text_a2, "page \\d*", "")
sample_text_a2 <- stri_trim_both(sample_text_a2)
sample_text_a2 <- sample_text_a2[sample_text_a2 != '']
stri_paste(sample_text_a2, collapse = '\n')
## [1] "The quick brown fox named Seamus jumps over the lazy dog also named Seamus,\nwith the newspaper from a boy named quick Seamus, in his mouth.\nThe quicker brown fox jumped over 2 lazy dogs."
0

New to Communities?

Join the community