Same-day Landsat-8 and Sentinel-2 Pair Generation on GEE

Page content

This notebook presents a data preparation workflow that finds same-date and overlapped Landsat8 and Sentinel2 image patches and provide a way to download the processed images to the local machine.

This is a part of the ongoing research project Landsat2Sentinel.

The whole notebook is available on my Github repository.

import ee
ee.Initialize()

from IPython import display
import zipfile
import urllib
from ipywidgets import IntProgress

Function for downloading images from GEE

def download_tif(image, scale, name, folder):
    url = ee.data.makeDownloadUrl(ee.data.getDownloadId({
        'image': image.serialize(),
        'scale': '%d' % scale,
        'filePerBand': 'false',
        'name': name,
    }))
    local_zip, header = urllib.request.urlretrieve(url)
    with zipfile.ZipFile(local_zip) as local_zipfile:
        return local_zipfile.extract(name + '.tif', folder)

Functions used in the process

#===============================
def mapAddDates(img):
    '''
    The function used to map over Sentinel2 images to add a date attribute, 
    and rename the NIR band from 'B8' to 'B5' to match the naming convention of Landsat
    '''
    date_time = ee.Date(img.get('GENERATION_TIME'))
    return img.set('DATE', date_time.format('yyyy-MM-dd')).rename(['B2', 'B3', 'B4', 'B5'])

def getSameDateWithGeom(l8, sent, geom):
    '''
    Find pairs of Landsat8 and Sentinel2 images that are acquired in the same day
    and their footprints intersect with the input geometry
    '''
    l8pool = l8.filterBounds(geom).sort('DATE_ACQUIRED') \
                .filterMetadata('CLOUD_COVER', 'less_than', 10) \ # cloud cover limit set to 10%
                 .select('B[2-5]')                                # only use R,G,B,NIR bands
    sentpool = sent.filterBounds(geom).sort('GENERATION_TIME') \
                    .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', 10) \
                     .filterMetadata('PROCESSING_BASELINE', 'not_ends_with', '2') \
                     .select('B[2-4|8]') \
                     .map(mapAddDates)

    eqfilter = ee.Filter.equals(leftField='DATE_ACQUIRED', rightField='DATE')
    innerjoin = ee.Join.inner()
    matches = innerjoin.apply(l8pool, sentpool, eqfilter)

    return matches
#===============================


def getRandomPoints(bound, num):
    '''
    Generate random points from the input boundary (for simplity, we used convex hull of the input boundary)
    '''
    points = ee.FeatureCollection.randomPoints(bound.geometry().convexHull(), num)
    return points

Main Process

##### Import imagery and boundary #####
l8toa = ee.ImageCollection('LANDSAT/LC08/C01/T1_TOA')
sent2 = ee.ImageCollection('COPERNICUS/S2')
boundary = ee.FeatureCollection('users/CSISS_LCLUC/studyarea_V4')
##### Generate square patch around random points #######
points = ee.FeatureCollection.randomPoints(boundary.geometry().convexHull(), 200, seed=10)

#===================
def mapGetPatch(pt):
    return pt.buffer(128*10).bounds()

patches = points.map(mapGetPatch)

Get the matched image pairs of Landsat8 and Sentinel2

The match_collection has the following structure (each level to the next level is a one-to-many relationship):

  • FeatureCollection (random points)
    • FeatureCollection (pairs with different date for the given point)
      • Feature {'primary': landsat8 image, 'secondary': sentinel2 image}
# Get the same date image pairs
def mapGetMatches(pts):
    def mapClipImageToPatch(f):
        l8_cand = ee.Image(f.get('primary'))
        sent_cand = ee.Image(f.get('secondary'))
        l8_clip = l8_cand.clipToBoundsAndScale(geometry=pts.geometry(), width=256, height=256)      # Fixed image size
        sent2_clip = sent_cand.clipToBoundsAndScale(geometry=pts.geometry(), width=256, height=256) 
#         intersection = l8_clip.geometry().intersection(sent2_clip.geometry())
        return f.set('primary', l8_clip).set('secondary', sent2_clip)
    
    match = getSameDateWithGeom(l8toa, sent2, pts.geometry())
    match_clip = match.map(mapClipImageToPatch)
    return match_clip
#-------------------------------------------

match_collection = patches.map(mapGetMatches)
####### Remove matches that no image pairs are found ######

match_list = match_collection.toList(match_collection.size())

#=================================
def mapDropNull(f):
    f = ee.FeatureCollection(f)
    return ee.Algorithms.If(f.size().eq(0), None, f)

pairs_list = match_list.map(mapDropNull)
#================================

pairs_list = pairs_list.removeAll(ee.List([None]))

Download Images

num_pairs = pairs_list.length().getInfo()
num_pairs
180
bar = IntProgress(min=0, max=num_pairs)
display.display(bar)

for i in range(155, num_pairs):
    pairs = ee.FeatureCollection(pairs_list.get(i))
    num_pair = pairs.size().getInfo()
    pair_list = pairs.toList(num_pair)
    for j in range(num_pair):
        pair = ee.Feature(pair_list.get(j))
        primary = ee.Image(pair.get('primary'))
        secondary = ee.Image(pair.get('secondary')).divide(10000)   # Scale down the Sentinel2 pixel values to [0,1]
        download_tif(primary, 10, 'l', 'data/'+str(i)+'_'+str(j)+'/')
        download_tif(secondary, 10, 's', 'data/'+str(i)+'_'+str(j)+'/')
    bar.value = i