Link Search Menu Expand Document

面源資料庫轉CAMx排放nc

Table of contents

背景

  • 為消除資料庫在時間與空間的相依性,需要消除資料庫類別-縣市之維度,此處即針對面源的時間變異係數與類別-縣市之對照表(nc_fac.json)進行計算,以備未來套用。主要問題出現在環保署提供的時變係數檔,空間資料為中文名稱,與主資料庫是鄉鎮區代碼2碼編號不能對應,分3個程式說明:
    • 時間變異係數(csv)檔案前處理
    • csv檔案之產生
    • csv檔案應用到面源排放量資料庫,並展開至全年逐時之序列,存成nc_fac.json
  • 排放量整體處理原則參見處理程序總綱、針對面源之處理龐大.dbf檔案之讀取重新計算網格座標,為此處之前處理。

程式說明

引用模組

kuang@114-32-164-198 /Users/TEDS/teds10_camx/HourlyWeighted/area
$ cat -n area_YYMMinc.py 
     1	"""usage:
     2	python YYMM(year and month both in 2 digits)
     3	1.Domain is determined by the template chosen.
     4	2.One file(month) at a time. The resultant file will not be overwritten.
     5	3.nc files may be corrupted if not properly written. must be remove before redoing.
     6	4.nc_fac.json file is needed, df_Admw.py must be excuted earlier.
     7	"""
     8	import numpy as np
     9	from pandas import *
    10	from calendar import monthrange
    11	import sys, os, subprocess
    12	import netCDF4
    13	from datetime import datetime, timedelta
    14	from include2 import rd_ASnPRnCBM_A, WGS_TWD, tune_UTM
    15	from include3 import dt2jul, jul2dt, disc, add_PMS, add_VOC
    16	from mostfreqword import mostfreqword
    17	import json
    18	import warnings
    19	
    20	if not sys.warnoptions:
    21	    import warnings
    22	    warnings.simplefilter("ignore")
    23	

讀入引數、計算起訖日期

    24	#Main
    25	ym=sys.argv[1]
    26	mm=sys.argv[1][2:4]
    27	mo=int(mm)
    28	yr=2000+int(sys.argv[1][:2])
    29	if (yr-2016)%3 !=0 or 0==mo or mo>12:sys.exit('wrong ym='+ym)
    30	teds=str((yr-2016)//3+10) #TEDS version increase every 3 years
    31	P='./'
    32	
    33	#time and space initiates
    34	ntm=(monthrange(yr,mo)[1]+2)*24+1
    35	bdate=datetime(yr,mo,1)+timedelta(days=-1+8./24)
    36	edate=bdate+timedelta(days=monthrange(yr,mo)[1]+3)

nc模版之準備

  • nc模版如何形成?
    • 修改CAMx提供的範例檔案
    • 使用pncgen轉換一個既有的uamiv檔案
    • 從CMAQ排放量檔案轉換而來
    37	#prepare the template
    38	fname='fortBE.413_teds'+teds+'.area'+mm+'.nc'
    39	try:
    40	  nc = netCDF4.Dataset(fname, 'r+')
    41	except:
    42	  os.system('cp '+P+'template_d4.nc '+fname)
    43	  nc = netCDF4.Dataset(fname, 'r+')
    44	V=[list(filter(lambda x:nc.variables[x].ndim==j, [i for i in nc.variables])) for j in [1,2,3,4]]
    45	nt,nlay,nrow,ncol=nc.variables[V[3][0]].shape
    46	nv=len(V[3])
    47	nc.SDATE,nc.STIME=dt2jul(bdate)
    48	nc.EDATE,nc.ETIME=dt2jul(edate)
    49	nc.NOTE='grid Emission'
    50	nc.NOTE=nc.NOTE+(60-len(nc.NOTE))*' '
    51	#Name-names may encounter conflicts with newer versions of NCFs and PseudoNetCDFs.
    52	#nc.NAME='EMISSIONS '
    53	if 'ETFLAG' not in V[2]:
    54	  zz=nc.createVariable('ETFLAG',"i4",("TSTEP","VAR","DATE-TIME"))
  • 延長模版時間維度的長度
    55	if nt!=ntm or (nc.variables['TFLAG'][0,0,0]!=nc.SDATE and nc.variables['TFLAG'][0,0,1]!=nc.STIME):
    56	  for t in range(ntm):
    57	    sdate,stime=dt2jul(bdate+timedelta(days=t/24.))
    58	    nc.variables['TFLAG'][t,:,0]=[sdate for i in range(nv)]
    59	    nc.variables['TFLAG'][t,:,1]=[stime for i in range(nv)]
    60	    ndate,ntime=dt2jul(bdate+timedelta(days=(t+1)/24.))
    61	    nc.variables['ETFLAG'][t,:,0]=[ndate for i in range(nv)]
    62	    nc.variables['ETFLAG'][t,:,1]=[ntime for i in range(nv)]
  • 所有項目歸零。否則會出現mask導致錯誤。
      63	for v in V[3]:
      64	  nc.variables[v][:]=0.
      65	sdatetime=[jul2dt(nc.variables['TFLAG'][t,0,:]) for t in range(ntm)]
    

污染物名稱、對照表

    66	col=['PM','PM25', 'CO', 'NOX', 'NMHC', 'SOX','NH3'] #1NO   2NO2   3SO2   4NH3   5CCRS   6FCRS   7PAR
    67	cole=['EM_'+i for i in col] #1NO   2NO2   3SO2   4NH3   5CCRS   6FCRS   7PAR
    68	#define the crustals/primary sources
    69	colc=['CCRS','FCRS','CPRM','FPRM']
    70	c2v={i:i for i in colc}
    71	c2m={i:1 for i in colc}
    72	colv='OLE PAR TOL XYL FORM ALD2 ETH ISOP NR ETHA MEOH ETOH IOLE TERP ALDX PRPA BENZ ETHY ACET KET'.split()
    73	NC=len(colv)
    74	c2v.update({i:i for i in colv if i not in ['NR','ETHY']})
    75	c2m.update({i:1 for i in colv if i not in ['NR','ETHY']})
    76	c2v.update({'EM_SOX':'SO2','EM_NOX':'NO','EM_CO':'CO','EM_NH3':'NH3'})
    77	c2m.update({'EM_SOX':64,'EM_NOX':46,'EM_CO':28,'EM_NH3':17})
    78	
    79	

開啟面源及氨氣排放量資料庫

    80	#import the gridded area sources
    81	fname=P+'areagrid'+teds+'LL.csv'
    82	df = read_csv(fname)
    83	minx,miny=min(df.UTME),min(df.UTMN)
    84	df.UTME=round(df.UTME-minx,-3)
    85	df.UTMN=round(df.UTMN-miny,-3)
    86	df['YX']=np.array(df.UTMN+df.UTME/1000,dtype=int)
    87	
    88	#add nh3 separately
    89	YX_DICT=pivot_table(df,index=['YX'],values='DICT',aggfunc=mostfreqword).reset_index()
    90	YX_DICT={i:j for i,j in zip(YX_DICT.YX,YX_DICT.DICT)}
  • 氨氣排放量讀取
    91	df['EM_NH3']=0.
    92	fname=P+'nh3.csv'
    93	nh3=read_csv(fname)
    94	nh3 = tune_UTM(nh3)
    95	nh3['NSC']='nh3'
    96	nh3['NSC_SUB']='b'
    97	if 'nsc2' in df.columns: nh3['nsc2']='nh3b'
    98	nh3.UTME=round(nh3.UTME-minx,-3)
    99	nh3.UTMN=round(nh3.UTMN-miny,-3)
   100	nh3['YX']=np.array(nh3.UTMN+nh3.UTME/1000,dtype=int)
   101	nh3=nh3.loc[nh3.YX.map(lambda x:x in YX_DICT)].reset_index(drop=True)
   102	nh3['DICT']=[YX_DICT[i] for i in nh3.YX]
   103	for c in df.columns:
   104	  if c not in nh3.columns:
   105	    nh3[c]=0
   106	df=df.append(nh3,ignore_index=True)
   107	nh3=0#clean_up of mem
   108	
   109	#The two levels of the NCS are grouped as one.
   110	if 'nsc2' not in df.columns:
   111	  df.loc[df['NSC_SUB'].map(lambda x: (type(x)==float and np.isnan(x)==True) or ( x==' ')),'NSC_SUB']='b'
   112	  df['nsc2']=[str(x)+str(y) for x,y in zip(df['NSC'],df['NSC_SUB'])]
   113	df.loc[df['DICT'].map(lambda x:np.isnan(x)==True),'DICT']=5300.
   114	if 'CNTY' not in df.columns:
   115	  df['CNTY']=[str(int(s/100)) for s in list(df['DICT'])]
  • 加總縣市邊界上的重複資料
   116	coli=['CNTY', 'nsc2','YX']
   117	df=pivot_table(df,index=coli,values=cole,aggfunc=np.sum).reset_index()
   118	

粒狀物與VOCs之劃分

   119	#note the df is discarded
   120	df=add_PMS(df)
   121	#add the VOC columns and reset to zero
   122	if 'EM_NMHC' in df.columns:
   123	  nmhc=df.loc[df.EM_NMHC>0]
   124	  if len(nmhc)>0:
   125	    for c in colv:
   126	      df[c]=np.zeros(len(df))
   127	    nsc2=set(nmhc.nsc2)
   128	    for n in nsc2:
   129	      df=add_VOC(df,n)
   130	#Summed up for all the categories. The results are filled into the nc template
   131	if set(c2v).issubset(set(df.columns)):
   132	  df=pivot_table(df,index=['CNTY', 'YX', 'nsc2'],values=list(c2v),aggfunc=sum).reset_index()
   133	else:
   134	  sys.exit('not pivot')
   135	

儲存排放量矩陣TPY

   136	#df store to matrix 
   137	for c in ['CNTY','nsc2','YX']:
   138	  exec(c+'=list(set(df.'+c+'))')
   139	  exec(c+'.sort()')
   140	  exec('n'+c+'=len('+c+')')
   141	  exec('d'+c+'={'+c+'[i]:i for i in range(n'+c+')}')
   142	  exec('df["i'+c+'"]=[d'+c+'[i] for i in df.'+c+']')
   143	list_c2v=list(c2v)
   144	list_c2v.sort()
   145	NLC=len(c2v)
   146	TPY=np.zeros(shape=(NLC,nCNTY,nnsc2,nYX))
   147	for i in range(NLC):
   148	  c=list_c2v[i]
   149	  TPY[i,df.iCNTY[:],df.insc2[:],df.iYX[:]]=df[c][:]
   150	

時間變化係數矩陣fac

  • 讀取對照表json檔,將展開成為完整的DataFrame,最後形成矩陣fac以利相乘
   151	#processing all the time-variation and constant categories
   152	with open('nc_fac.json', 'r', newline='') as jsonfile:
   153	  nc_fac=json.load(jsonfile)
   154	
  • 全年的時間標籤(dts),用在序列數據的拮取
   155	yr=2016+(int(teds)-10)*3
   156	bdate0=datetime(yr,1,1)-timedelta(days=1)
   157	nd365=365
   158	if yr%4==0:nd365=366
   159	nty=(nd365+2)*24
   160	dts=[bdate0+timedelta(days=i/24.) for i in range(nty)]
   161	
  • 具有時間變化的排放類別
   162	#time-variant part of nsc2
   163	ll=list(nc_fac)
   164	df=DataFrame({'nsc2':[i.split('_')[0] for i in ll],\
   165	'CNTY':[i.split('_')[1] for i in ll],\
   166	'fac':[nc_fac[i] for i in ll]})
   167	nc_fac=0#clean_up of mem
   168	#time variation files use ??b to represent all kind of NSC_SUB
   169	for n2 in set(df.nsc2)-set(nsc2):
   170	  a=df.loc[df.nsc2==n2].reset_index(drop=True)
   171	  nns=[i for i in nsc2 if i[:-1]==n2[:-1]]
   172	  for n in nns:
   173	    a.nsc2=n
   174	    df=df.append(a,ignore_index=True)
   175	df=df.loc[df.nsc2.map(lambda x:x in nsc2)].reset_index(drop=True) #drop these surrogate nsc2
   176	df=df.loc[df.CNTY.map(lambda x:x in CNTY)].reset_index(drop=True) #drop 53
   177	nfac=len(df)
   178	var2=np.zeros(shape=(nfac,nty),dtype=int)
   179	fac =np.zeros(shape=(nCNTY,nnsc2,nty))
   180	for c in ['CNTY','nsc2']:
   181	  exec('var2[:,:]=np.array([d'+c+'[i] for i in df.'+c+'])[:,None]')
   182	  exec('i'+c+'=var2[:,:].flatten()')
   183	var2[:,:]=np.arange(nty)[None,:];    it=var2.flatten()
   184	fac1=np.array([np.array(i) for i in df.fac]).flatten()
   185	fac[iCNTY[:],insc2[:],it[:]]=fac1[:]
   186	fac1=0#clean_up of mem
   187	
  • 常數類別,其時變係數為一定值
   188	#constant part of nsc2
   189	insc2=np.array([dnsc2[n] for n in set(nsc2)-set(df.nsc2)])
   190	nnsc2=len(insc2)
   191	var3=np.zeros(shape=(nCNTY,nnsc2,nty),dtype=int)
   192	var3[:,:,:]=np.arange(nCNTY)[:,None,None];iCNTY=var3.flatten()
   193	var3[:,:,:]=           insc2[None,:,None];insc2=var3.flatten()
   194	var3[:,:,:]=  np.arange(nty)[None,None,:];it   =var3.flatten()
   195	fac[iCNTY[:],insc2[:],it[:]]=1./nty
   196	
  • 由全年序列中,按照dts拮取本月之時變係數備用
   197	#cutting for desired month from whole year time-factors
   198	ib=dts.index(bdate)
   199	dts=dts[ib:ib+ntm]
   200	fac=fac[:,:,ib:ib+ntm] #clean_up of mem
   201	
  • 採用np.tensordot矩陣內積,消除中間的2個維度(nsc2, cnty),成為單純的(物種、空間、時間)3維度矩陣
   202	#eliminate the nsc2 and CNTY dimensions (multiply-and-sum by tensordot)
   203	df,var2,var3,iCNTY,insc2,it=0,0,0,0,0,0 			#clean_up of mem
   204	aTPY = np.tensordot(TPY,fac, axes=([1,2],[0,1])) 	#in shape of (isp, nYX, ntm)
   205	TPY,fac=0,0											#clean_up of mem
   206	

網格化與存檔

   207	idx=np.where(np.sum(aTPY[:,:,:],axis=0)>0)
   208	dd,ic={},0
   209	for c in list_c2v:
   210	  dd[c]=aTPY[ic,idx[0][:],idx[1][:]]
   211	  ic+=1
   212	dd['YX']=[YX[i] for i in idx[0]]
   213	dd['idt']=idx[1]
   214	df=DataFrame(dd)
   215	df['UTME']=(df.YX%1000)*1000+minx
   216	df['UTMN']=(df.YX//1000)*1000+miny
   217	df=disc(df,nc)											#disc after tensordot
   218	
   219	fac=1000.*1000./nd365/24 #ton/yr to g/hr
   220	  
   221	#Filling to the template
   222	var3=np.zeros(shape=(ntm,nrow,ncol))
   223	for c in c2v:
   224	  if c not in df.columns:continue
   225	  if sum(df[c])==0.:continue
   226	  if c2v[c] not in V[3]:continue
   227	  #T/Y to gram/hour (gmole for SNC and VOCs)
   228	  dfc=df.loc[df[c]>0].reset_index(drop=True)
   229	  var3[:]=0.
   230	  var3[dfc.idt,dfc.IY,dfc.IX]=dfc[c]*fac/c2m[c]
   231	  nc.variables[c2v[c]][:,0,:,:]=var3[:,:,:]
   232	

程式之執行

  • 依月份呼叫即可
  • machine-dependancy
    • 如要改寫成uamiv檔案,系統必須要有pncgen程式
    • 因pandas及no.tensordot會自己啟動多工運作,同時執行3個月份node01~03尚能消化(CPU~4500%),如太多月份同時運作,系統資源將會耗盡。不但拖慢速度,結果也不正確
for m in {01..04};do sub python area_YYMM.py 19$m >&/dev/null;done
for m in {05..08};do sub python area_YYMM.py 19$m >&/dev/null;done
for m in {09..12};do sub python area_YYMM.py 19$m >&/dev/null;done

檔案下載

Reference