# !apt-get install libgeos-3.5.0
# !apt-get install libgeos-dev
# !pip install https://github.com/matplotlib/basemap/archive/master.zip
#from google.colab import drive
#drive.mount('/content/drive')

Introduction

Using Data from OCO-2 Satellite, issued by the NASA.

//TODO: Explanation

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
#from mpl_toolkits.basemap import Basemap  #Imported directly from the github repository

Retieve Data

Sample data can be accessed freely on the NASA Database, among other open data from several NASA sattelites.

We will be using CSV aggregated by Benoit Courty here.

#data_1610 = pd.read_csv("http://courty.fr/OCO2/oco2_1610.csv", sep=";")
#data_1705 = pd.read_csv("http://courty.fr/OCO2/oco2_1705.csv", sep=";")
#data_1803 = pd.read_csv("http://courty.fr/OCO2/oco2_1803.csv", sep=";")
#data_1805 = pd.read_csv("http://courty.fr/OCO2/oco2_1805.csv", sep=";")
#data_1808 = pd.read_csv("http://courty.fr/OCO2/oco2_1808.csv", sep=";")
data_1808 = pd.read_csv("../../../datasets/OCO2/csv/oco2_1808.csv", sep=";")
#data_1809 = pd.read_csv("http://courty.fr/OCO2/oco2_1809.csv", sep=";")

data_1808.head()
sounding_id latitude longitude xco2 xco2_uncert orbit windspeed_u windspeed_v
0 2018080100462105 -33.015541 -164.508881 405.143188 0.491368 21709 3.749916 9.128431
1 2018080100462137 -32.988529 -164.553787 404.893677 0.497189 21709 3.720200 9.087859
2 2018080100462171 -32.996235 -164.435699 404.729431 0.537358 21709 3.815527 9.151507
3 2018080100462172 -32.992409 -164.455872 404.819550 0.498803 21709 3.799832 9.138914
4 2018080100462173 -32.988403 -164.476196 404.706451 0.496855 21709 3.783962 9.126184
data_1808.describe()
sounding_id latitude longitude xco2 xco2_uncert orbit windspeed_u windspeed_v
count 2.709745e+06 2.709745e+06 2.709745e+06 2.709745e+06 2.709745e+06 2.709745e+06 2.709745e+06 2.709745e+06
mean 2.018082e+15 9.176796e+00 -3.778302e+00 4.047848e+02 5.104777e-01 2.194239e+04 -1.533030e+00 2.852832e-01
std 9.141199e+08 2.900338e+01 1.112529e+02 1.868212e+00 1.293196e-01 1.332497e+02 4.292128e+00 3.490388e+00
min 2.018080e+15 -5.199191e+01 -1.799998e+02 3.895988e+02 2.017396e-03 2.170900e+04 -1.231609e+01 -1.428720e+01
25% 2.018081e+15 -1.714714e+01 -1.143912e+02 4.038899e+02 4.171058e-01 2.181300e+04 -4.930158e+00 -2.060896e+00
50% 2.018082e+15 4.388992e+00 4.388185e-01 4.052918e+02 4.851733e-01 2.195700e+04 -2.304201e+00 3.646293e-01
75% 2.018082e+15 3.448813e+01 1.007937e+02 4.060902e+02 5.803099e-01 2.205600e+04 1.599859e+00 2.797290e+00
max 2.018083e+15 8.186122e+01 1.799996e+02 4.169399e+02 1.983425e+00 2.216000e+04 1.637950e+01 1.493328e+01

To convert the sounding_id into a datetime variable data:

from datetime import datetime
def to_date(a):
    return datetime.strptime(str(a), '%Y%m%d%H%M%S%f')

# data_1610['date'] = data_1610['sounding_id'].apply(to_date)
# data_1705['date'] = data_1705['sounding_id'].apply(to_date)
# data_1803['date'] = data_1803['sounding_id'].apply(to_date)
# data_1805['date'] = data_1805['sounding_id'].apply(to_date)
data_1808['date'] = data_1808['sounding_id'].apply(to_date)
# data_1809['date'] = data_1809['sounding_id'].apply(to_date)
data_1808.head()
sounding_id latitude longitude xco2 xco2_uncert orbit windspeed_u windspeed_v date
0 2018080100462105 -33.015541 -164.508881 405.143188 0.491368 21709 3.749916 9.128431 2018-08-01 00:46:21.050
1 2018080100462137 -32.988529 -164.553787 404.893677 0.497189 21709 3.720200 9.087859 2018-08-01 00:46:21.370
2 2018080100462171 -32.996235 -164.435699 404.729431 0.537358 21709 3.815527 9.151507 2018-08-01 00:46:21.710
3 2018080100462172 -32.992409 -164.455872 404.819550 0.498803 21709 3.799832 9.138914 2018-08-01 00:46:21.720
4 2018080100462173 -32.988403 -164.476196 404.706451 0.496855 21709 3.783962 9.126184 2018-08-01 00:46:21.730

We are seaking the emission peaks taken as an example in the annexes of F. Chevallier's article Observing carbon dioxide emissions over China's cities with the Orbiting Carbon Observatory-2:

  • Over Anshan, the 17th October 2016
  • Over Baotou, the 17th May 2018
  • Over Dezhou, the 24th September 2018
  • Over Laiwu, the 25th August 2018
  • Over Nanjing, the 9th March 2018
  • Over Tangshan, the 18th May 2017

Laiwu, 25th August 2018

# We consider the August 2018 datset at the right day
data_1808_25 = data_1808[data_1808['date'] < "2018-08-26"]
data_1808_25 = data_1808_25[data_1808_25['date'] > "2018-08-25"]

#draw_map(data_1808_25)
# We consider the orgit going over East China
#data_laiwu = data_1808_25[data_1808_25['longitude'] > 110]
#data_laiwu = data_laiwu[data_laiwu['longitude'] < 125]
data_laiwu = data_1808_25[data_1808_25['orbit'] == 22061]
data_laiwu.head(3)
sounding_id latitude longitude xco2 xco2_uncert orbit windspeed_u windspeed_v date
2061319 2018082504501738 -43.749119 135.989822 403.642273 0.572042 22061 11.114212 -4.509858 2018-08-25 04:50:17.380
2061320 2018082504501777 -43.735889 136.010880 404.398529 0.466245 22061 11.103701 -4.486870 2018-08-25 04:50:17.770
2061321 2018082504501778 -43.731358 135.985352 404.115814 0.528921 22061 11.101484 -4.501010 2018-08-25 04:50:17.780

Keep only the city zone

data_laiwu=data_laiwu.query('longitude>116.5 and longitude<118.2 and latitude>35.4 and latitude<37.1')

Plot the OCO2 sensor data

data_laiwu.plot.scatter(x='longitude', y='latitude', c='xco2', colormap='viridis')
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:308: MatplotlibDeprecationWarning: 
The rowNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().rowspan.start instead.
  layout[ax.rowNum, ax.colNum] = ax.get_visible()
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:308: MatplotlibDeprecationWarning: 
The colNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().colspan.start instead.
  layout[ax.rowNum, ax.colNum] = ax.get_visible()
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:308: MatplotlibDeprecationWarning: 
The rowNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().rowspan.start instead.
  layout[ax.rowNum, ax.colNum] = ax.get_visible()
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:308: MatplotlibDeprecationWarning: 
The colNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().colspan.start instead.
  layout[ax.rowNum, ax.colNum] = ax.get_visible()
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:314: MatplotlibDeprecationWarning: 
The rowNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().rowspan.start instead.
  if not layout[ax.rowNum + 1, ax.colNum]:
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:314: MatplotlibDeprecationWarning: 
The colNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().colspan.start instead.
  if not layout[ax.rowNum + 1, ax.colNum]:
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:314: MatplotlibDeprecationWarning: 
The rowNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().rowspan.start instead.
  if not layout[ax.rowNum + 1, ax.colNum]:
/media/data-nvme/dev/anaconda3/lib/python3.7/site-packages/pandas/plotting/_tools.py:314: MatplotlibDeprecationWarning: 
The colNum attribute was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use ax.get_subplotspec().colspan.start instead.
  if not layout[ax.rowNum + 1, ax.colNum]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f8e4126a910>
data_laiwu.plot.scatter(x='sounding_id', y='xco2')
<matplotlib.axes._subplots.AxesSubplot at 0x7f8e2895f110>

Compute distance from latitude, longitude (haversine)

import math
#TODO: Change the formula to compute the distance from the trace origin
latitude_origin = data_laiwu.iloc[0]['latitude']
longitude_origin = data_laiwu.iloc[0]['longitude']
data_laiwu['distance'] = 6367 * 2 * np.arcsin(np.sqrt(np.sin((np.radians(data_laiwu['latitude'])
    - math.radians(latitude_origin))/2)**2 + math.cos(math.radians(latitude_origin))
    * np.cos(np.radians(data_laiwu['latitude'])) * np.sin((np.radians(data_laiwu['longitude'])
    - math.radians(longitude_origin))/2)**2))
data_laiwu.plot.scatter(x='distance', y='xco2')
<matplotlib.axes._subplots.AxesSubplot at 0x7f8e28a0cdd0>

Gaussian fit

scipy curve_fit

data_laiwu = data_laiwu.sort_values(by=['distance']).reindex()
data_laiwu.head(3)
sounding_id latitude longitude xco2 xco2_uncert orbit windspeed_u windspeed_v date distance xco2_enhancement
2069837 2018082505140774 35.429119 117.594986 400.076141 0.716316 22061 -2.914134 -0.235950 2018-08-25 05:14:07.740 0.000000 -1.221359
2069843 2018082505140837 35.436092 117.599686 400.578766 0.576075 22061 -2.915572 -0.225130 2018-08-25 05:14:08.370 0.884057 -0.718735
2069844 2018082505140838 35.426098 117.605133 400.573578 0.666210 22061 -2.920266 -0.238718 2018-08-25 05:14:08.380 0.978238 -0.723923
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np

d_good_temp = data_laiwu
d_good = data_laiwu
x = data_laiwu['distance']
y = data_laiwu['xco2']

# weighted arithmetic mean (corrected - check the section below)
# mean = sum(x * y) / sum(y)
# sigma = np.sqrt(sum(y * (x - mean)**2) / sum(y))

# spatial window for the detection (km)
window = 200
good_find = 0
popt_saved, pcov_saved = [], []
soundings, sigmas = [], []
def func(x, m, b, A, sig):
    return m * x + b + A / (sig * (2 * np.pi)**0.5) * np.exp(-x**2 / (2*sig**2))

try:
    for i, j in enumerate(data_laiwu.index):
        if i > 10:
            #print(i)
            if str(i)[-1] != '0':
                continue
        med_temp = np.median(data_laiwu['xco2'])
        std_temp = np.std(data_laiwu['xco2'])
        data_laiwu['xco2_enhancement'] = data_laiwu['xco2'] - med_temp
        p0 = (0.,med_temp,30*data_laiwu.loc[j,'xco2_enhancement'],10.)
        d_centered = data_laiwu['distance'] - data_laiwu.loc[j, 'distance']
        popt, pcov = curve_fit(func, d_centered, data_laiwu['xco2'], sigma = data_laiwu['xco2_uncert'], p0 = p0, maxfev=20000)
        sig = abs(popt[3])  # sigma of the Gaussian (km)
        if sig < 2 : continue  # too narrow
        if 3*sig > window / 2.: continue  # too large
        delta = popt[2]/(popt[3]*(2 * np.pi)**0.5)  # height of the peak (ppm)
        if delta < 0: continue  # depletion
        d_plume = d_good_temp[(d_centered >= -2*sig) & (d_centered <= 2*sig)]
        d_backg = d_good_temp[(d_centered < -2*sig) | (d_centered > 2*sig)]
        d_peak = d_good_temp[(d_centered >= -4*sig) & (d_centered <= 4*sig)]
        d_peak_distance = d_peak['distance'] - d_good.loc[j, 'distance']
        # we want at least 1 1-km-sounding per km on average on both sides of the peak within 2 sigmas and between 2 and 3 sigmas
        if len(d_good_temp[(d_centered >= -1*sig) & (d_centered <= 0)]) < int(sig): continue
        if len(d_good_temp[(d_centered <= 1*sig) & (d_centered >= 0)]) < int(sig): continue
        if len(d_good_temp[(d_centered >= -3*sig) & (d_centered <= -2*sig)]) < int(sig): continue
        if len(d_good_temp[(d_centered <= 3*sig) & (d_centered >= 2*sig)]) < int(sig): continue
        # check the quality of the fit
        R = np.corrcoef(func(d_peak_distance,*popt), d_peak['xco2'])
        if R[0,1]**2 < 0.25 : continue
        good_find += 1
        print('index',j, 'Number of good fit',good_find, 'Sigma:', sig, 'Ampleur de l\'émission de CO²:',delta,'Coef de coreflation',R)
        soundings.append(j)
        sigmas.append(sig)
        popt_saved.append(popt)
        pcov_saved.append(pcov)
except RuntimeError: 
  # curve_fit failed 
  print('LOST', d_good.loc[j, 'sounding_id'])
index 2070045 Number of good fit 1 Sigma: 28.23721179217691 Ampleur de l'émission de CO²: 3.3741936684058076 Coef de coreflation [[1.        0.6589488]
 [0.6589488 1.       ]]
index 2070027 Number of good fit 2 Sigma: 22.465496876615877 Ampleur de l'émission de CO²: 3.431652095420562 Coef de coreflation [[1.        0.7036994]
 [0.7036994 1.       ]]
index 2070040 Number of good fit 3 Sigma: 19.153138288248023 Ampleur de l'émission de CO²: 3.613302504102211 Coef de coreflation [[1.         0.75592095]
 [0.75592095 1.        ]]
index 2070047 Number of good fit 4 Sigma: 16.682874701268304 Ampleur de l'émission de CO²: 3.8092629415713453 Coef de coreflation [[1.         0.80218004]
 [0.80218004 1.        ]]
index 2070068 Number of good fit 5 Sigma: 14.399258196038623 Ampleur de l'émission de CO²: 4.017469731345853 Coef de coreflation [[1.         0.82426659]
 [0.82426659 1.        ]]
index 2070073 Number of good fit 6 Sigma: 14.252737704510007 Ampleur de l'émission de CO²: 4.013139681824819 Coef de coreflation [[1.         0.83091661]
 [0.83091661 1.        ]]
index 2070093 Number of good fit 7 Sigma: 15.215290807987461 Ampleur de l'émission de CO²: 3.8439169619522158 Coef de coreflation [[1.         0.80560701]
 [0.80560701 1.        ]]
index 2070099 Number of good fit 8 Sigma: 16.869101608599085 Ampleur de l'émission de CO²: 3.548502509129814 Coef de coreflation [[1.         0.75536402]
 [0.75536402 1.        ]]
index 2070106 Number of good fit 9 Sigma: 18.89063683349686 Ampleur de l'émission de CO²: 3.212305763286154 Coef de coreflation [[1.         0.70446062]
 [0.70446062 1.        ]]
index 2070113 Number of good fit 10 Sigma: 21.522340109028512 Ampleur de l'émission de CO²: 2.8763986275753473 Coef de coreflation [[1.         0.63512867]
 [0.63512867 1.        ]]
index 2070146 Number of good fit 11 Sigma: 24.689427064311754 Ampleur de l'émission de CO²: 2.644834325947664 Coef de coreflation [[1.         0.59358821]
 [0.59358821 1.        ]]
# print(x)
# print(popt)
print(func(50, *popt))
408.7106391301036
%matplotlib inline
from matplotlib.pyplot import figure
figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')

index=np.argmin(sigmas)
popt=popt_saved[index]
print('Best m, b, A, sig = ', popt)
plt.scatter(x, y, c=y, s=3, label='data')
plt.plot(x, func(x = x-data_laiwu.loc[soundings[index], 'distance'], m=popt[0], b=popt[1], A=popt[2], sig=popt[3]), 'r', label='fit')
plt.legend()
plt.title('OCO 2 data')
plt.xlabel('Distance')
plt.ylabel('CO²')
plt.show()
Best m, b, A, sig =  [3.92560177e-03 4.00770662e+02 1.43374694e+02 1.42527377e+01]
%matplotlib inline
from matplotlib.pyplot import figure
figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')

x = data_laiwu['distance']
y = data_laiwu['xco2']

index=np.argmin(sigmas)
popt=popt_saved[index]
print('Best m, b, A, sig = ', popt)
m=popt[0]
b=popt[1]
A=popt[2]
sig=popt[3]
y = y - m * x - b
plt.scatter(x, y, c=y, s=3, label='data')
plt.plot(x, func(x = x-data_laiwu.loc[soundings[index], 'distance'], m=0, b=0, A=popt[2], sig=popt[3]), 'r', label='fit')
plt.legend()
plt.title('OCO 2 data')
plt.xlabel('Distance')
plt.ylabel('CO²')
plt.show()
Best m, b, A, sig =  [3.92560177e-03 4.00770662e+02 1.43374694e+02 1.42527377e+01]

sklearn GaussianProcessRegressor

# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
#         Jake Vanderplas <vanderplas@astro.washington.edu>
#         Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>s
# License: BSD 3 clause

import numpy as np
from matplotlib import pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

np.random.seed(1)


# def f(x):
#     """The function to predict."""
#     return x * np.sin(x)

# # now the noisy case
# X = np.linspace(0.1, 9.9, 20)
# X = np.atleast_2d(X).T

# # Observations and noise
# y = f(X).ravel()
dy = 0.5 + 1.0 * np.random.random(y.shape)
# noise = np.random.normal(0, dy)
# y += noise

# Instantiate a Gaussian Process model
gp = GaussianProcessRegressor(kernel=kernel, alpha=dy ** 2,
                              n_restarts_optimizer=10)
#x.to_array
X = np.array(x).reshape(-1, 1)
X.shape
# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)

# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(X, return_std=True)

# Plot the function, the prediction and the 95% confidence interval based on
# the MSE
plt.figure()
#plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$')
plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label='Observations')
plt.plot(x, y_pred, 'b-', label='Prediction')
plt.fill(np.concatenate([x, x[::-1]]),
         np.concatenate([y_pred - 1.9600 * sigma,
                        (y_pred + 1.9600 * sigma)[::-1]]),
         alpha=.5, fc='b', ec='None', label='95% confidence interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(-10, 20)
plt.legend(loc='upper left')

plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-467b2bfcd15a> in <module>
     28 
     29 # Instantiate a Gaussian Process model
---> 30 gp = GaussianProcessRegressor(kernel=kernel, alpha=dy ** 2,
     31                               n_restarts_optimizer=10)
     32 #x.to_array

NameError: name 'kernel' is not defined

GPflow

!pip install GPflow
# https://blog.dominodatalab.com/fitting-gaussian-process-models-python/

import GPflow

k = GPflow.kernels.Matern32(1, variance=1, lengthscales=1.2)
m = GPflow.gpr.GPR(X, Y, kern=k)
m.likelihood.variance = 0.01
m.optimize()
!pip install lmfit
import numpy as np
from lmfit import Model
from lmfit.models import GaussianModel, ConstantModel
import matplotlib.pyplot as plt

xval = np.array(x)
yval = np.array(y)
#err = np.array(e)

peak = GaussianModel()
offset = ConstantModel()
model = peak + offset

pars = offset.make_params(c=np.median(y))
pars += peak.guess(yval, x=xval, amplitude=-0.5)

result = model.fit(yval, pars, x=xval) # , weights=1/err
print(result.fit_report())

plt.plot(xval, yval, 'ro', ms=6)
plt.plot(xval, result.best_fit, 'b--')
import numpy as np
from lmfit import Model
from lmfit.models import GaussianModel, ConstantModel
import matplotlib.pyplot as plt

xval = np.array(data_laiwu['distance'])
yval = np.array(data_laiwu['xco2'])
err = np.array(data_laiwu['xco2_uncert'])

peak = GaussianModel()
offset = ConstantModel()
model = peak + offset

pars = offset.make_params(c=np.median(y))
pars += peak.guess(yval, x=xval, amplitude=-5)
print(pars)
result = model.fit(yval, pars, x=xval, weights=1/err)
print(result.fit_report())

plt.plot(xval, yval, 'ro', ms=6)
plt.plot(xval, result.best_fit, 'b--')
# <examples/doc_model_gaussian.py>
import matplotlib.pyplot as plt
from numpy import exp, loadtxt, pi, sqrt

from lmfit import Model

x = data_laiwu['distance']
y = data_laiwu['xco2']

def gaussian(x, amp, cen, wid):
    """1-d gaussian: gaussian(x, amp, cen, wid)"""
    return (amp / (sqrt(2*pi) * wid)) * exp(-(x-cen)**2 / (2*wid**2))

gmodel = Model(gaussian)
print('parameter names: {}'.format(gmodel.param_names))
print('independent variables: {}'.format(gmodel.independent_vars))
result = gmodel.fit(y, x=x, amp=500, cen=402, wid=1)

print(result.fit_report())

plt.plot(x, y, 'bo')
plt.plot(x, result.init_fit, 'k--', label='initial fit')
plt.plot(x, result.best_fit, 'r-', label='best fit')
plt.legend(loc='best')
plt.show()
# We retrieve the Figure 2.A
draw_map(data_laiwu, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_laiwu, lon_min=116.5, lon_max=118.2, lat_min=35.4, lat_max=37.1, frontier=True, size_point=5)

Switch to Cartopy

# import matplotlib.pyplot as plt
# import numpy as np

# import cartopy.crs as ccrs
# import cartopy.feature as cfeature


# def sample_data(shape=(20, 30)):
#     """
#     Return ``(x, y, u, v, crs)`` of some vector data
#     computed mathematically. The returned crs will be a rotated
#     pole CRS, meaning that the vectors will be unevenly spaced in
#     regular PlateCarree space.

#     """
#     crs = ccrs.RotatedPole(pole_longitude=177.5, pole_latitude=37.5)

#     x = np.linspace(311.9, 391.1, shape[1])
#     y = np.linspace(-23.6, 24.8, shape[0])

#     x2d, y2d = np.meshgrid(x, y)
#     u = 10 * (2 * np.cos(2 * np.deg2rad(x2d) + 3 * np.deg2rad(y2d + 30)) ** 2)
#     v = 20 * np.cos(6 * np.deg2rad(x2d))

#     return x, y, u, v, crs


# def main():
#     fig = plt.figure()
#     ax = fig.add_subplot(1, 1, 1, projection=ccrs.Orthographic(-10, 45))

#     ax.add_feature(cfeature.OCEAN, zorder=0)
#     ax.add_feature(cfeature.LAND, zorder=0, edgecolor='black')

#     ax.set_global()
#     ax.gridlines()

#     x, y, u, v, vector_crs = sample_data()
#     ax.quiver(x, y, u, v, transform=vector_crs)

#     plt.show()


# if __name__ == '__main__':
#     main()

Show Data on the map

draw_map: Function to draw the map and the observations (relief style). The column names can be specified in the arguments.

Parameters:

  • (DataFrame) data: the dataset to map.
  • (string) x : the name of the longitude column. default: 'longitide'
  • (string) y: the name of the latitude column. default: 'latitude'
  • (string) c: the name of the XCO2 column (or other measure wanted to be plotted). default: 'xco2'
  • (int) lon_min : the minimum longitude. default: -180
  • (int) lon_max: the maximum longitude. default: 180
  • (int) lat_min: the minimum latitude. default: -90
  • (int) lat_max: the maximum latitude. default: 90
  • (int) size_point: size of the point to plot (useful if we zoom in). default: 1
  • (Bool) frontier: whether or not to draw the countries borders. default: False
def draw_map(data, x="longitude", y="latitude", c="xco2", lon_min=-180, lon_max=180, lat_min=-90, lat_max=90, size_point=1, frontier=False):

    plt.figure(figsize=(15, 10), edgecolor='w')
    m = Basemap(llcrnrlat=lat_min, urcrnrlat=lat_max, llcrnrlon=lon_min, urcrnrlon=lon_max)
    
    m.shadedrelief()
    
    parallels = np.arange(-80.,81,10.)
    m.drawparallels(parallels,labels=[False,True,True,False])

    meridians = np.arange(10.,351.,20.)
    m.drawmeridians(meridians,labels=[True,False,False,True])

    normal = matplotlib.colors.LogNorm(vmin=data[c].min(), vmax=data[c].max())

    m.scatter(data[x], data[y], c=data[c], cmap=plt.cm.jet, s=size_point, norm=normal)

    if (frontier):
      m.drawcountries(linewidth=0.5)
      m.drawcoastlines(linewidth=0.7)

    plt.show()