# !apt-get install libgeos-3.5.0
# !apt-get install libgeos-dev
# !pip install https://github.com/matplotlib/basemap/archive/master.zip
#from google.colab import drive
#drive.mount('/content/drive')
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
#from mpl_toolkits.basemap import Basemap #Imported directly from the github repository
Retieve Data
Sample data can be accessed freely on the NASA Database, among other open data from several NASA sattelites.
We will be using CSV aggregated by Benoit Courty here.
#data_1610 = pd.read_csv("http://courty.fr/OCO2/oco2_1610.csv", sep=";")
#data_1705 = pd.read_csv("http://courty.fr/OCO2/oco2_1705.csv", sep=";")
#data_1803 = pd.read_csv("http://courty.fr/OCO2/oco2_1803.csv", sep=";")
#data_1805 = pd.read_csv("http://courty.fr/OCO2/oco2_1805.csv", sep=";")
#data_1808 = pd.read_csv("http://courty.fr/OCO2/oco2_1808.csv", sep=";")
data_1808 = pd.read_csv("../../../datasets/OCO2/csv/oco2_1808.csv", sep=";")
#data_1809 = pd.read_csv("http://courty.fr/OCO2/oco2_1809.csv", sep=";")
data_1808.head()
data_1808.describe()
To convert the sounding_id
into a datetime variable data
:
from datetime import datetime
def to_date(a):
return datetime.strptime(str(a), '%Y%m%d%H%M%S%f')
# data_1610['date'] = data_1610['sounding_id'].apply(to_date)
# data_1705['date'] = data_1705['sounding_id'].apply(to_date)
# data_1803['date'] = data_1803['sounding_id'].apply(to_date)
# data_1805['date'] = data_1805['sounding_id'].apply(to_date)
data_1808['date'] = data_1808['sounding_id'].apply(to_date)
# data_1809['date'] = data_1809['sounding_id'].apply(to_date)
data_1808.head()
We are seaking the emission peaks taken as an example in the annexes of F. Chevallier's article Observing carbon dioxide emissions over China's cities with the Orbiting Carbon Observatory-2:
- Over Anshan, the 17th October 2016
- Over Baotou, the 17th May 2018
- Over Dezhou, the 24th September 2018
- Over Laiwu, the 25th August 2018
- Over Nanjing, the 9th March 2018
- Over Tangshan, the 18th May 2017
# We consider the August 2018 datset at the right day
data_1808_25 = data_1808[data_1808['date'] < "2018-08-26"]
data_1808_25 = data_1808_25[data_1808_25['date'] > "2018-08-25"]
#draw_map(data_1808_25)
# We consider the orgit going over East China
#data_laiwu = data_1808_25[data_1808_25['longitude'] > 110]
#data_laiwu = data_laiwu[data_laiwu['longitude'] < 125]
data_laiwu = data_1808_25[data_1808_25['orbit'] == 22061]
data_laiwu.head(3)
data_laiwu=data_laiwu.query('longitude>116.5 and longitude<118.2 and latitude>35.4 and latitude<37.1')
data_laiwu.plot.scatter(x='longitude', y='latitude', c='xco2', colormap='viridis')
data_laiwu.plot.scatter(x='sounding_id', y='xco2')
import math
#TODO: Change the formula to compute the distance from the trace origin
latitude_origin = data_laiwu.iloc[0]['latitude']
longitude_origin = data_laiwu.iloc[0]['longitude']
data_laiwu['distance'] = 6367 * 2 * np.arcsin(np.sqrt(np.sin((np.radians(data_laiwu['latitude'])
- math.radians(latitude_origin))/2)**2 + math.cos(math.radians(latitude_origin))
* np.cos(np.radians(data_laiwu['latitude'])) * np.sin((np.radians(data_laiwu['longitude'])
- math.radians(longitude_origin))/2)**2))
data_laiwu.plot.scatter(x='distance', y='xco2')
data_laiwu = data_laiwu.sort_values(by=['distance']).reindex()
data_laiwu.head(3)
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
d_good_temp = data_laiwu
d_good = data_laiwu
x = data_laiwu['distance']
y = data_laiwu['xco2']
# weighted arithmetic mean (corrected - check the section below)
# mean = sum(x * y) / sum(y)
# sigma = np.sqrt(sum(y * (x - mean)**2) / sum(y))
# spatial window for the detection (km)
window = 200
good_find = 0
popt_saved, pcov_saved = [], []
soundings, sigmas = [], []
def func(x, m, b, A, sig):
return m * x + b + A / (sig * (2 * np.pi)**0.5) * np.exp(-x**2 / (2*sig**2))
try:
for i, j in enumerate(data_laiwu.index):
if i > 10:
#print(i)
if str(i)[-1] != '0':
continue
med_temp = np.median(data_laiwu['xco2'])
std_temp = np.std(data_laiwu['xco2'])
data_laiwu['xco2_enhancement'] = data_laiwu['xco2'] - med_temp
p0 = (0.,med_temp,30*data_laiwu.loc[j,'xco2_enhancement'],10.)
d_centered = data_laiwu['distance'] - data_laiwu.loc[j, 'distance']
popt, pcov = curve_fit(func, d_centered, data_laiwu['xco2'], sigma = data_laiwu['xco2_uncert'], p0 = p0, maxfev=20000)
sig = abs(popt[3]) # sigma of the Gaussian (km)
if sig < 2 : continue # too narrow
if 3*sig > window / 2.: continue # too large
delta = popt[2]/(popt[3]*(2 * np.pi)**0.5) # height of the peak (ppm)
if delta < 0: continue # depletion
d_plume = d_good_temp[(d_centered >= -2*sig) & (d_centered <= 2*sig)]
d_backg = d_good_temp[(d_centered < -2*sig) | (d_centered > 2*sig)]
d_peak = d_good_temp[(d_centered >= -4*sig) & (d_centered <= 4*sig)]
d_peak_distance = d_peak['distance'] - d_good.loc[j, 'distance']
# we want at least 1 1-km-sounding per km on average on both sides of the peak within 2 sigmas and between 2 and 3 sigmas
if len(d_good_temp[(d_centered >= -1*sig) & (d_centered <= 0)]) < int(sig): continue
if len(d_good_temp[(d_centered <= 1*sig) & (d_centered >= 0)]) < int(sig): continue
if len(d_good_temp[(d_centered >= -3*sig) & (d_centered <= -2*sig)]) < int(sig): continue
if len(d_good_temp[(d_centered <= 3*sig) & (d_centered >= 2*sig)]) < int(sig): continue
# check the quality of the fit
R = np.corrcoef(func(d_peak_distance,*popt), d_peak['xco2'])
if R[0,1]**2 < 0.25 : continue
good_find += 1
print('index',j, 'Number of good fit',good_find, 'Sigma:', sig, 'Ampleur de l\'émission de CO²:',delta,'Coef de coreflation',R)
soundings.append(j)
sigmas.append(sig)
popt_saved.append(popt)
pcov_saved.append(pcov)
except RuntimeError:
# curve_fit failed
print('LOST', d_good.loc[j, 'sounding_id'])
# print(x)
# print(popt)
print(func(50, *popt))
%matplotlib inline
from matplotlib.pyplot import figure
figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')
index=np.argmin(sigmas)
popt=popt_saved[index]
print('Best m, b, A, sig = ', popt)
plt.scatter(x, y, c=y, s=3, label='data')
plt.plot(x, func(x = x-data_laiwu.loc[soundings[index], 'distance'], m=popt[0], b=popt[1], A=popt[2], sig=popt[3]), 'r', label='fit')
plt.legend()
plt.title('OCO 2 data')
plt.xlabel('Distance')
plt.ylabel('CO²')
plt.show()
%matplotlib inline
from matplotlib.pyplot import figure
figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')
x = data_laiwu['distance']
y = data_laiwu['xco2']
index=np.argmin(sigmas)
popt=popt_saved[index]
print('Best m, b, A, sig = ', popt)
m=popt[0]
b=popt[1]
A=popt[2]
sig=popt[3]
y = y - m * x - b
plt.scatter(x, y, c=y, s=3, label='data')
plt.plot(x, func(x = x-data_laiwu.loc[soundings[index], 'distance'], m=0, b=0, A=popt[2], sig=popt[3]), 'r', label='fit')
plt.legend()
plt.title('OCO 2 data')
plt.xlabel('Distance')
plt.ylabel('CO²')
plt.show()
# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
# Jake Vanderplas <vanderplas@astro.washington.edu>
# Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>s
# License: BSD 3 clause
import numpy as np
from matplotlib import pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
np.random.seed(1)
# def f(x):
# """The function to predict."""
# return x * np.sin(x)
# # now the noisy case
# X = np.linspace(0.1, 9.9, 20)
# X = np.atleast_2d(X).T
# # Observations and noise
# y = f(X).ravel()
dy = 0.5 + 1.0 * np.random.random(y.shape)
# noise = np.random.normal(0, dy)
# y += noise
# Instantiate a Gaussian Process model
gp = GaussianProcessRegressor(kernel=kernel, alpha=dy ** 2,
n_restarts_optimizer=10)
#x.to_array
X = np.array(x).reshape(-1, 1)
X.shape
# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(X, return_std=True)
# Plot the function, the prediction and the 95% confidence interval based on
# the MSE
plt.figure()
#plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$')
plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label='Observations')
plt.plot(x, y_pred, 'b-', label='Prediction')
plt.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_pred - 1.9600 * sigma,
(y_pred + 1.9600 * sigma)[::-1]]),
alpha=.5, fc='b', ec='None', label='95% confidence interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(-10, 20)
plt.legend(loc='upper left')
plt.show()
!pip install GPflow
# https://blog.dominodatalab.com/fitting-gaussian-process-models-python/
import GPflow
k = GPflow.kernels.Matern32(1, variance=1, lengthscales=1.2)
m = GPflow.gpr.GPR(X, Y, kern=k)
m.likelihood.variance = 0.01
m.optimize()
!pip install lmfit
LMfit GaussianModel
https://lmfit.github.io/lmfit-py/model.html#lmfit.model.Model.fit
import numpy as np
from lmfit import Model
from lmfit.models import GaussianModel, ConstantModel
import matplotlib.pyplot as plt
xval = np.array(x)
yval = np.array(y)
#err = np.array(e)
peak = GaussianModel()
offset = ConstantModel()
model = peak + offset
pars = offset.make_params(c=np.median(y))
pars += peak.guess(yval, x=xval, amplitude=-0.5)
result = model.fit(yval, pars, x=xval) # , weights=1/err
print(result.fit_report())
plt.plot(xval, yval, 'ro', ms=6)
plt.plot(xval, result.best_fit, 'b--')
import numpy as np
from lmfit import Model
from lmfit.models import GaussianModel, ConstantModel
import matplotlib.pyplot as plt
xval = np.array(data_laiwu['distance'])
yval = np.array(data_laiwu['xco2'])
err = np.array(data_laiwu['xco2_uncert'])
peak = GaussianModel()
offset = ConstantModel()
model = peak + offset
pars = offset.make_params(c=np.median(y))
pars += peak.guess(yval, x=xval, amplitude=-5)
print(pars)
result = model.fit(yval, pars, x=xval, weights=1/err)
print(result.fit_report())
plt.plot(xval, yval, 'ro', ms=6)
plt.plot(xval, result.best_fit, 'b--')
# <examples/doc_model_gaussian.py>
import matplotlib.pyplot as plt
from numpy import exp, loadtxt, pi, sqrt
from lmfit import Model
x = data_laiwu['distance']
y = data_laiwu['xco2']
def gaussian(x, amp, cen, wid):
"""1-d gaussian: gaussian(x, amp, cen, wid)"""
return (amp / (sqrt(2*pi) * wid)) * exp(-(x-cen)**2 / (2*wid**2))
gmodel = Model(gaussian)
print('parameter names: {}'.format(gmodel.param_names))
print('independent variables: {}'.format(gmodel.independent_vars))
result = gmodel.fit(y, x=x, amp=500, cen=402, wid=1)
print(result.fit_report())
plt.plot(x, y, 'bo')
plt.plot(x, result.init_fit, 'k--', label='initial fit')
plt.plot(x, result.best_fit, 'r-', label='best fit')
plt.legend(loc='best')
plt.show()
# We retrieve the Figure 2.A
draw_map(data_laiwu, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_laiwu, lon_min=116.5, lon_max=118.2, lat_min=35.4, lat_max=37.1, frontier=True, size_point=5)
# import matplotlib.pyplot as plt
# import numpy as np
# import cartopy.crs as ccrs
# import cartopy.feature as cfeature
# def sample_data(shape=(20, 30)):
# """
# Return ``(x, y, u, v, crs)`` of some vector data
# computed mathematically. The returned crs will be a rotated
# pole CRS, meaning that the vectors will be unevenly spaced in
# regular PlateCarree space.
# """
# crs = ccrs.RotatedPole(pole_longitude=177.5, pole_latitude=37.5)
# x = np.linspace(311.9, 391.1, shape[1])
# y = np.linspace(-23.6, 24.8, shape[0])
# x2d, y2d = np.meshgrid(x, y)
# u = 10 * (2 * np.cos(2 * np.deg2rad(x2d) + 3 * np.deg2rad(y2d + 30)) ** 2)
# v = 20 * np.cos(6 * np.deg2rad(x2d))
# return x, y, u, v, crs
# def main():
# fig = plt.figure()
# ax = fig.add_subplot(1, 1, 1, projection=ccrs.Orthographic(-10, 45))
# ax.add_feature(cfeature.OCEAN, zorder=0)
# ax.add_feature(cfeature.LAND, zorder=0, edgecolor='black')
# ax.set_global()
# ax.gridlines()
# x, y, u, v, vector_crs = sample_data()
# ax.quiver(x, y, u, v, transform=vector_crs)
# plt.show()
# if __name__ == '__main__':
# main()
draw_map: Function to draw the map and the observations (relief style). The column names can be specified in the arguments.
Parameters:
- (DataFrame) data: the dataset to map.
- (string) x : the name of the longitude column. default: 'longitide'
- (string) y: the name of the latitude column. default: 'latitude'
- (string) c: the name of the XCO2 column (or other measure wanted to be plotted). default: 'xco2'
- (int) lon_min : the minimum longitude. default: -180
- (int) lon_max: the maximum longitude. default: 180
- (int) lat_min: the minimum latitude. default: -90
- (int) lat_max: the maximum latitude. default: 90
- (int) size_point: size of the point to plot (useful if we zoom in). default: 1
- (Bool) frontier: whether or not to draw the countries borders. default: False
def draw_map(data, x="longitude", y="latitude", c="xco2", lon_min=-180, lon_max=180, lat_min=-90, lat_max=90, size_point=1, frontier=False):
plt.figure(figsize=(15, 10), edgecolor='w')
m = Basemap(llcrnrlat=lat_min, urcrnrlat=lat_max, llcrnrlon=lon_min, urcrnrlon=lon_max)
m.shadedrelief()
parallels = np.arange(-80.,81,10.)
m.drawparallels(parallels,labels=[False,True,True,False])
meridians = np.arange(10.,351.,20.)
m.drawmeridians(meridians,labels=[True,False,False,True])
normal = matplotlib.colors.LogNorm(vmin=data[c].min(), vmax=data[c].max())
m.scatter(data[x], data[y], c=data[c], cmap=plt.cm.jet, s=size_point, norm=normal)
if (frontier):
m.drawcountries(linewidth=0.5)
m.drawcoastlines(linewidth=0.7)
plt.show()