-
Notifications
You must be signed in to change notification settings - Fork 34
Closed
Description
The Gallery https://github.com/tdhock/animint/wiki/Gallery contains links to rendered animints, along with links to their source code. Many of these were hosted on gist/blocks. Recently (~Jan 2023) bl.ocks.org stopped rendering animints on gist (now redirecting to gist which gives 400: Invalid request). Here is some code for downloading them to the local machine,
dl.dir <- 'download-blocks'
dl.out <- 'download-blocks-out'
dir.create(dl.dir, showWarnings=FALSE)
dir.create(dl.out, showWarnings=FALSE)
Gallery.html <- file.path(dl.dir, "Gallery.html")
if(!file.exists(Gallery.html)){
download.file("https://github.com/tdhock/animint/wiki/Gallery", Gallery.html)
}
gist.dt <- nc::capture_all_str(
Gallery.html,
"http://bl.ocks.org/",
user=".*?",
"/raw/",
gist_id=".*?",
'/.*?>',
title=".*?",
"</a>",
list(
'.*?<a href="',
source_url=".*?",
'"[^>]*>source</a>'
), "?")
gist.prefix <- "https://gist.github.com"
for(gist.i in 1:nrow(gist.dt)){
gist.row <- gist.dt[gist.i]
gist.csv <- file.path(dl.dir, paste0(gist.row$gist_id,".csv"))
if(!file.exists(gist.csv)){
gist.html <- file.path(dl.dir, paste0(gist.row$gist_id,".html"))
if(!file.exists(gist.html)){
gist.url <- gist.row[, paste0(gist.prefix,"/",user,"/",gist_id)]
download.file(gist.url, gist.html)
}
zip.dt <- nc::capture_all_str(
gist.html,
nc::field("datetime", '="', ".*?"),
'"',
"(?:.*\n)*?",
'.*? ',
nc::field("href", '="', ".*?"),
'">Download ZIP')
if(nrow(zip.dt)!=1)stop("got nrow=",nrow(zip.dt))
data.table::fwrite(zip.dt, gist.csv)
}
zip.dt <- data.table::fread(gist.csv)
gist.zip <- file.path(dl.dir, paste0(gist.row$gist_id,".zip"))
if(!file.exists(gist.zip)){
url.zip <- paste0(gist.prefix,zip.dt$href)
download.file(url.zip, gist.zip)
}
gist.dt[gist.i, datetime := zip.dt$datetime]
created <- strftime(zip.dt$datetime, "%Y-%m-%d-")
dest.path <- file.path(dl.out,paste0(created,gsub("[/ ]","-",gist.row$title)))
gist.dt[gist.i, dir := basename(dest.path)]
if(!dir.exists(dest.path)){
out.df <- unzip(gist.zip,list=TRUE)
out.dir <- out.df$Name[1]
cat(sprintf("%4d / %4d %s -> %s\n", gist.i, nrow(gist.dt), out.dir, dest.path))
unzip(gist.zip)
file.rename(out.dir, dest.path)
writeLines(gist.row$source_url,file.path(dest.path,"source_url.txt"))
writeLines(gist.row$title,file.path(dest.path,"title.txt"))
}
##browseURL(normalizePath(file.path(dest.path,"index.html")))
}
gist.dt[, .(user, title=substr(title,1,10),gist_id, src=substr(source_url, 1,10))]
out.dt <- gist.dt[order(datetime), .(
Preview=sprintf(
'<a href="%s/index.html"><img src="%s/Capture.PNG" width=400 /></a>',
dir, dir),
Created=paste(datetime),
`Link to source code`=sprintf('<a href="%s">%s</a>',source_url,title)
)]
xt <- xtable::xtable(out.dt)
print(
xt,
type="html",
file=file.path(dl.out, "index.html"),
sanitize.text.function=identity,
include.rownames=TRUE)Most appear to be rendering fine. TODOs
- put each in an Rmd and re-render using most recent animint2 (should be easier for users to learn, when source code and output is on same page).
- total size is 220 MB, maybe not possible to host on github pages?
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels