Commit 5ada1905 authored by Himanshu's avatar Himanshu
Browse files

Version 2 files

parent 15a3eceb

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.
########
# -------- Download scenes for all tiles --------
# Extract tile names and extent from EU shapefile
# Use EU extent to download information for all scenes, outnames.txt
# Create tile Meta Data, assign scenes to tiles
# Each tile stores the line numbers of scenes in outnames.txt
# Using scene line numbers, for each tile download scenes using outnames.txt
#
# Data downloaded in temp folder
# For now manual move required from temp before running RF
# Only 2 parallel downloads possible (api limitation)
# Tiles already downloaded (present in completedTiles) not downloaded again
#
# Make sure library optparser is installed
# Make sure paths are correct
# Shapefile for EU should also be provided
# Also new version of Sentinel download should be used
#
# To Run: Rscript --vanilla All_tiles_download.R (optional --createout F)
########
rm(list = ls())
library(rgdal)
library(optparse)
library(doMC)
registerDoMC(2)
#Option for downloading outnames.txt or using existing one
option_list = list(make_option(c("-c", "--createout"), action = "store", default = TRUE,
help="create outnames.txt or use existing", metavar="TRUE/FALSE or T/F"))
opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser)
#Paths
setwd("/home/rus/Desktop/shared/download_script")
completedOut <- "/home/rus/Desktop/shared/MyData/completedTiles_ALL"
dataOut <- "/home/rus/Desktop/shared/download_script/Test/temp"
dataIn <- "/home/rus/Desktop/shared/MyData/RestOfWestEU"
In_dir_shp <- "./s2_tiles_eu_land_plus_ukraine_esa"
inname_shp <- "s2_tiles_eu_land"
########
#Step 1: Download scene info for all tiles in EU
########
euShp <- readOGR(In_dir_shp, inname_shp, stringsAsFactors=FALSE)
euShp$ID <- seq.int(nrow(euShp))
allTiles <- euShp$Name
tLen <- length(allTiles)
bb <- bbox(euShp)
#download outnames.txt file using extent, skip if specified otherwise
if (opt$createout) {
system(paste("python Sentinel_download.py --latmin", bb[2], "--latmax",
bb[4], "--lonmin", bb[1], "--lonmax", bb[3],
"-a apihub.txt -m 30 -n -s S2A -l L2A -r 70000 > scsMData.txt"),
intern=FALSE, wait=TRUE)
system(paste("cat scsMData.txt | grep wget > outnames.txt"))
} else{
print(paste("----Skipping outnames creation"))
}
########
# Step 2: Extract and assign scenes for each tile
########
scenesData <- read.table("outnames.txt", stringsAsFactors=FALSE)
tile <- substr(scenesData$V6, regexpr("_T", scenesData[1,6])[1] + 2, regexpr("_T", scenesData[1,6])[1] + 6)
scenesData$tile <- tile
#indices <- vector(mode="character", length = tLen)
indices <- character(tLen)
nscenes <- numeric(tLen)
bounds <- character(tLen)
toSkip <- logical(tLen)
tData <- data.frame("tileName" = allTiles, "lineNums" = indices,
"numScenes" = nscenes, "bounds" = bounds, "toSkip" = toSkip,
stringsAsFactors = FALSE)
#create a list with all completed tiles, and skip download if already present
completedTiles <- list.dirs(path=completedOut, full.names=F, recursive=F)
completedTlOut <- list.dirs(path=dataOut, full.names=F, recursive=F)
completedTlIn <- list.dirs(path=dataIn, full.names=F, recursive=F)
for (i in 1:tLen) {
tileName <- allTiles[i]
temp <- scenesData[scenesData$tile == tileName, ]
inds <- paste(as.numeric(rownames(temp)), collapse = " ")
bb <- bbox(euShp[i, ])
bd <- paste(cbind(bb[1], bb[3], bb[2], bb[4]), collapse = " ")
tData[i, 2]= inds
tData[i, 3]= nrow(temp)
tData[i, 4]= bd
if (is.element(tileName, completedTiles) | is.element(tileName, completedTlOut)
| is.element(tileName, completedTlIn)) {
tData[i, 5]= TRUE
}
}
# write.csv(tData, file = "tileMetaData.csv")
# countT <- 0
# countN <- 0
# countB <- 0
# for (i in 1:nrow(tData)) {
# if (tData$toSkip[i]) {
# countT <- countT + 1
# }
# if (tData$numScenes[i] == 0) {
# countN <- countN + 1
# }
# if((tData$toSkip[i]) & (tData$numScenes[i] == 0)) {
# countB <- countB + 1
# }
# }
########
# Step 3: Use tile Metadata to download scenes iteratively for each tile
########
start <- Sys.time()
print(paste("----All download started:", start))
#change the value of the loop if do not want to download all of the tiles at once
for (i in 1:nrow(tData)) {
tile <- tData$tileName[i]
if (tData$toSkip[i]) {
print(paste("----", tile, "already completed, skipping it!"))
next
}
if (tData$numScenes[i] == 0) {
print(paste("----", tile, "does not have any scenes, skipping it!"))
next
}
print(paste("----", tile, ", no. of scenes to download: ", tData$numScenes[i]))
#get the line numbers for scenes in outnames.txt
scLineNums <- unlist(strsplit(tData$lineNums[i], split=" "))
start1 <- Sys.time()
print(paste("----", tile, "download started:", start1))
foreach (j = scLineNums) %dopar% {
print(paste("Line", j, "downloading.."))
system(paste("eval `sed \"", j, "q;d\" outnames.txt`"), intern=FALSE, ignore.stdout=TRUE, wait=TRUE)
}
system(paste("mkdir", tile))
system(paste("mv *.zip", tile))
system(paste("mv", tile, dataOut))
end1 <- Sys.time()
print(paste("----", tile, "all scenes dowloaded and moved", end1))
print(end1 - start1)
}
end <- Sys.time()
print(paste("----All tiles dowloaded", end))
print(end - start)
######## Download scenes script
# Downloads scenes for tiles by extracting names from outnames.txt
# Data downloaded in tempData
# For now manual move required for tiles from tempData to ../data/inputData
# Only 2 parallel downloads possible (api limitation)
# Tiles which are already downloaded (present in folder completedTiles) are not downloaded again
########
rm(list = ls())
library(doMC)
registerDoMC(2)
#for each user using command line tool only 2 concurrent downloads are allowed, hence using only 2 cores
completedOut <- "../data/completedTiles"
dataOut <- "./tempData"
scenesData <- read.table("./outnames.txt")
tile <- substr(scenesData$V6, regexpr("_T", scenesData[1,6])[1] + 2, regexpr("_T", scenesData[1,6])[1] + 6)
scenesData$tile <- tile
allTiles <- unique(scenesData$tile)
allTiles <- sort(allTiles)
#create a list with all completed tiles, and skip download if already present
completedTiles <- list.dirs(path = completedOut, full.names=F, recursive=F)
start <- Sys.time()
print(paste("----All download started:", start))
#change the value of the loop if do not want to download all of the tiles at once
#for (i in 61:length(allTiles)) {
for (i in 1:length(allTiles)) {
tile <- allTiles[i]
if (is.element(tile, completedTiles)) {
print(paste("----", tile, "already completed, skipping it!"))
next
}
start1 <- Sys.time()
print(paste("----", tile, "download started:", start1))
foreach (j = 1:nrow(scenesData)) %dopar% {
if (scenesData$tile[j] == tile) {
print(paste("Line", j, "downloading.."))
system(paste("eval `sed \"", j, "q;d\" outnames.txt`"), intern=FALSE, ignore.stdout=TRUE, wait=TRUE)
}
}
system(paste("mkdir", tile))
system(paste("mv *.zip", tile))
system(paste("mv", tile, dataOut))
end1 <- Sys.time()
print(paste("----", tile, "all scenes dowloaded and moved", end1))
print(end1 - start1)
}
end <- Sys.time()
print(paste("----All tiles dowloaded", end))
print(end - start)
########
# -------- Download scenes for tiles which show 0 scenes earlier--------
# alternate way to download scenes
# scene metadata is queried per tile instead for whole region
########
rm(list = ls())
library(rgdal)
library(optparse)
library(doMC)
registerDoMC(2)
#Paths
setwd("/home/rus/Desktop/shared/download_script")
mergedOut <- "/home/rus/Desktop/shared/MyData/MergeResults_ALL"
completedOut <- "/home/rus/Desktop/shared/MyData/completedTiles_ALL"
dataIn <- "/home/rus/Desktop/shared/MyData/RestOfWestEU"
dataOut <- '/home/rus/Desktop/shared/download_script/Test/temp'
In_dir_shp <- "./s2_tiles_eu_land_plus_ukraine_esa"
inname_shp <- "s2_tiles_eu_land"
########
#Step 1: Download scene info for all tiles in EU
########
euShp <- readOGR(In_dir_shp, inname_shp, stringsAsFactors=FALSE)
euShp$ID <- seq.int(nrow(euShp))
allTiles <- euShp$Name
tLen <- length(allTiles)
########
# Step 2: Extract and assign scenes for each tile
########
#create a list with all completed tiles, and skip download if already present
mergedTiles <- list.files(path=mergedOut, pattern="*.tif", full.names=F, recursive=F)
completedTiles <- list.dirs(path=completedOut, full.names=F, recursive=F)
completedTlIn <- list.dirs(path=dataIn, full.names=F, recursive=F)
count1 <- 0
count2 <- 0
problemTiles <- character()
start <- Sys.time()
print(paste('----All download started:', start))
for (i in 1:tLen) {
tileName <- allTiles[i]
if (is.element(paste0(tileName, "_merge.tif"), mergedTiles)) {
#if (is.element(tileName, completedTiles) | is.element(tileName, completedTlIn)) {
print(paste("----", tileName, "already completed, skipping it!"))
next
}
bb <- bbox(euShp[i, ])
system(paste("python Sentinel_download.py --latmin", (bb[2] - 0.2), "--latmax",
(bb[4] + 0.2), "--lonmin", (bb[1] - 0.2), "--lonmax", (bb[3] + 0.2),
"-a apihub.txt -m 30 -n -s S2A -l L2A -r 1000 > scsMDataTemp.txt"),
intern=FALSE, wait=TRUE)
system(paste("cat scsMDataTemp.txt | grep wget > outnamesTemp.txt"))
if (file.info("outnamesTemp.txt")$size == 0) {
print(paste("----", tileName, "no scenes still, skipping it!"))
next
}
count1 <- count1 + 1
if (is.element(tileName, completedTiles)) {
problemTiles <- append(problemTiles, tileName)
count2 <- count2 + 1
}
scenesData <- read.table("outnamesTemp.txt", stringsAsFactors=FALSE)
tile <- substr(scenesData$V6, regexpr("_T", scenesData[1,6])[1] + 2, regexpr("_T", scenesData[1,6])[1] + 6)
scenesData$tile <- tile
start1 <- Sys.time()
print(paste('----', tileName, 'download started:', start1))
foreach (j = 1:nrow(scenesData)) %dopar% {
if (scenesData$tile[j] == tileName) {
print(paste('Line', j, 'downloading..'))
system(paste('eval `sed \'', j, 'q;d\' outnamesTemp.txt`'), intern=FALSE, ignore.stdout=TRUE, wait=TRUE)
}
}
system(paste('mkdir', tileName))
system(paste('mv *.zip', tileName))
system(paste('mv', tileName, dataOut))
end1 <- Sys.time()
print(paste('----', tileName, 'all scenes dowloaded and moved', end1))
print(end1 - start1)
}
end <- Sys.time()
print(paste("----", count1, "tiles dowloaded", end))
print(paste("----", count2, "problem tiles dowloaded", end))
print(end - start)
capture.output(print(problemTiles), file="problemTiles.txt")
This diff is collapsed.
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
#!bin/bash
#calling Olivier Hagolle's script, make sure to change acount login password in apihub.txt
python Sentinel_download.py --latmin 36.1 --latmax 47.7 --lonmin 5.2 --lonmax 21.3 -a apihub.txt -m 30 -n -s S2A -l L2A -r 50000 > scsMData.txt
#extracting names of scenes with less than 30% cloud coverage
cat scsMData.txt | grep wget > outnames.txt
#this step can be avoided depending upon the location of the download_scene.R script
mv outnames.txt Test
This diff is collapsed.
flowchart/Flowchart.jpg

180 KB | W: | H:

flowchart/Flowchart.jpg

268 KB | W: | H:

flowchart/Flowchart.jpg
flowchart/Flowchart.jpg
flowchart/Flowchart.jpg
flowchart/Flowchart.jpg
  • 2-up
  • Swipe
  • Onion skin
Loading required package: methods
Loading required package: sp
rgdal: version: 1.3-2, (SVN revision 755)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
Path to GDAL shared files: /usr/local/OTB-6.0.0-Linux64/share/gdal
GDAL binary built with GEOS: TRUE
Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
Path to PROJ.4 shared files: (autodetected)
Linking to sp version: 1.3-1
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
Loading required package: DBI
[1] "---- 28UGC tile started: 2018-09-05 18:32:56"
[1] "Tile: 28UGC No. of scenes: 7"
[1] "6 scene unzipped & zip deleted: 2018-09-05 18:33:56"
[1] "1 scene unzipped & zip deleted: 2018-09-05 18:33:58"
[1] "3 scene unzipped & zip deleted: 2018-09-05 18:34:33"
[1] "5 scene unzipped & zip deleted: 2018-09-05 18:34:39"
[1] "4 scene unzipped & zip deleted: 2018-09-05 18:35:14"
[1] "2 scene unzipped & zip deleted: 2018-09-05 18:35:19"
[1] "7 scene unzipped & zip deleted: 2018-09-05 18:35:22"
[1] "----scenes unzipped: 2018-09-05 18:35:22"
[1] "----masking of rasters started"
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
[1] "----masking of rasters done 2018-09-05 18:47:21"
[[1]]
class : RasterLayer
dimensions : 10980, 10980, 120560400 (nrow, ncol, ncell)
resolution : 10, 10 (x, y)
extent : 699960, 809760, 5690220, 5800020 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=utm +zone=28 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
data source : /shared/MyData/RestOfWestEU/28UGC/28UGC_B02_10m.tif
names : X28UGC_B02_10m
values : 1, 14539 (min, max)
[[2]]
class : RasterLayer
dimensions : 10980, 10980, 120560400 (nrow, ncol, ncell)
resolution : 10, 10 (x, y)
extent : 699960, 809760, 5690220, 5800020 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=utm +zone=28 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
data source : /shared/MyData/RestOfWestEU/28UGC/28UGC_B03_10m.tif
names : X28UGC_B03_10m
values : 1, 11279 (min, max)
[[3]]
class : RasterLayer
dimensions : 10980, 10980, 120560400 (nrow, ncol, ncell)
resolution : 10, 10 (x, y)
extent : 699960, 809760, 5690220, 5800020 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=utm +zone=28 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
data source : /shared/MyData/RestOfWestEU/28UGC/28UGC_B04_10m.tif
names : X28UGC_B04_10m
values : 1, 12073 (min, max)
[[4]]
class : RasterLayer
dimensions : 10980, 10980, 120560400 (nrow, ncol, ncell)
resolution : 10, 10 (x, y)
extent : 699960, 809760, 5690220, 5800020 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=utm +zone=28 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
data source : /shared/MyData/RestOfWestEU/28UGC/28UGC_B08_10m.tif
names : X28UGC_B08_10m
values : 1, 11831 (min, max)
[1] "----mosaicing of rasters done 2018-09-05 18:49:49"
[1] "---- 28UGC_brick written to output: 2018-09-05 18:57:43"
[1] "---- Now creating training raster"
[1] "----training shapefile extractedfrom db 2018-09-05 19:00:16"
[1] "----transformation done 2018-09-05 19:00:17"
Warning message:
In writeOGR(obj = p, dsn = InTile, layer = tile, driver = "ESRI Shapefile") :
Field names abbreviated for ESRI Shapefile driver
[1] "---- 28UGC shapefile created, now rasterizing"
OGR data source with driver: ESRI Shapefile
Source: "/shared/MyData/RestOfWestEU/28UGC", layer: "28UGC"
with 4 features
It has 3 fields
[1] "----rasterization done 2018-09-05 19:00:21"
[1] "---- 28UGC_brick written to output: 2018-09-05 19:00:21"
Time difference of 27.41948 mins
[1] TRUE
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Loading required package: methods
Loading required package: sp
rgdal: version: 1.2-16, (SVN revision 701)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
Path to GDAL shared files: /usr/local/OTB-6.0.0-Linux64/share/gdal
GDAL binary built with GEOS: TRUE
Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
Path to PROJ.4 shared files: (autodetected)
Linking to sp version: 1.2-6
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
Loading required package: DBI
[1] "---- 29SMA tile started: 2018-03-04 09:00:56"
[1] "Tile: 29SMA No. of scenes: 41"
[1] "2 scene unzipped & zip deleted: 2018-03-04 09:01:53"
[1] "1 scene unzipped & zip deleted: 2018-03-04 09:01:57"
[1] "5 scene unzipped & zip deleted: 2018-03-04 09:02:01"
[1][1] "4 scene unzipped & zip deleted: 2018-03-04 09:02:05"
"3 scene unzipped & zip deleted: 2018-03-04 09:02:05"
[1] "6 scene unzipped & zip deleted: 2018-03-04 09:02:09"
[1] "8 scene unzipped & zip deleted: 2018-03-04 09:02:50"
[1] "10 scene unzipped & zip deleted: 2018-03-04 09:03:08"
[1] "12 scene unzipped & zip deleted: 2018-03-04 09:03:11"
[1] "14 scene unzipped & zip deleted: 2018-03-04 09:03:50"
[1] "18 scene unzipped & zip deleted: 2018-03-04 09:04:12"
[1] "16 scene unzipped & zip deleted: 2018-03-04 09:04:12"
[1] "7 scene unzipped & zip deleted: 2018-03-04 09:05:08"
[1] "11 scene unzipped & zip deleted: 2018-03-04 09:05:11"
[1] "9 scene unzipped & zip deleted: 2018-03-04 09:05:22"
[1] "20 scene unzipped & zip deleted: 2018-03-04 09:05:42"
[1] "22 scene unzipped & zip deleted: 2018-03-04 09:06:08"
[1] "28 scene unzipped & zip deleted: 2018-03-04 09:06:16"
[1] "26 scene unzipped & zip deleted: 2018-03-04 09:06:33"
[1] "32 scene unzipped & zip deleted: 2018-03-04 09:06:38"
[1] "24 scene unzipped & zip deleted: 2018-03-04 09:06:58"
[1] "34 scene unzipped & zip deleted: 2018-03-04 09:07:03"
[1] "40 scene unzipped & zip deleted: 2018-03-04 09:07:12"
[1] "30 scene unzipped & zip deleted: 2018-03-04 09:07:37"
[1] "36 scene unzipped & zip deleted: 2018-03-04 09:07:55"
[1] "17 scene unzipped & zip deleted: 2018-03-04 09:08:01"
[1] "13 scene unzipped & zip deleted: 2018-03-04 09:08:04"
[1] "15 scene unzipped & zip deleted: 2018-03-04 09:08:04"
[1] "38 scene unzipped & zip deleted: 2018-03-04 09:08:12"
[1] "23 scene unzipped & zip deleted: 2018-03-04 09:08:27"
[1] "21 scene unzipped & zip deleted: 2018-03-04 09:08:34"
[1] "19 scene unzipped & zip deleted: 2018-03-04 09:08:36"
[1] "27 scene unzipped & zip deleted: 2018-03-04 09:08:41"
[1] "25 scene unzipped & zip deleted: 2018-03-04 09:08:58"
[1] "31 scene unzipped & zip deleted: 2018-03-04 09:09:10"
[1] "37 scene unzipped & zip deleted: 2018-03-04 09:09:35"
[1] "29 scene unzipped & zip deleted: 2018-03-04 09:09:44"
[1] "35 scene unzipped & zip deleted: 2018-03-04 09:09:48"
[1] "33 scene unzipped & zip deleted: 2018-03-04 09:09:57"
[1] "41 scene unzipped & zip deleted: 2018-03-04 09:10:10"
[1] "39 scene unzipped & zip deleted: 2018-03-04 09:10:14"
[1] "----scenes unzipped: 2018-03-04 09:10:14"
[1] "----masking of rasters started"
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
[[8]]
NULL
[[9]]
NULL
[[10]]
NULL
[[11]]
NULL
[[12]]
NULL
[[13]]
NULL
[[14]]
NULL
[[15]]
NULL
[[16]]
NULL
[[17]]
NULL
[[18]]
NULL
[[19]]
NULL
[[20]]
NULL
[[21]]
NULL
[[22]]
NULL
[[23]]
NULL
[[24]]
NULL
[[25]]
NULL
[[26]]
NULL