Skip to content

Commit

Permalink
version 2.6.0 with multi-processing gnssir2 beta version
Browse files Browse the repository at this point in the history
  • Loading branch information
kristinemlarson committed Mar 13, 2024
1 parent aef05ea commit d5797bb
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 14 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).


## 2.6.0

beta version of multi-processing for gnssir. It is called gnssir2 for now. I will
merge with gnssir after more testing.

## 2.5.4

started experimenting with parallel processing. NOT official yet.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# gnssrefl v2.5.4
# gnssrefl v2.6.0

[![PyPI Version](https://img.shields.io/pypi/v/gnssrefl.svg)](https://pypi.python.org/pypi/gnssrefl)

Expand Down
89 changes: 82 additions & 7 deletions gnssrefl/gnssir_cl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def parse_arguments():
parser.add_argument("-mmdd", default=None, type=str, help="Boolean, add columns for month,day,hour,minute")
parser.add_argument("-dec", default=1, type=int, help="decimate SNR file to this sampling rate before computing periodograms")
parser.add_argument("-newarcs", default=None, type=str, help="This no longer has any meaning")
parser.add_argument("-par", default=None, type=int, help="Number of processes ?")
parser.add_argument("-par", default=None, type=int, help="Number of processes to spawn (up to 10)")


args = parser.parse_args().__dict__
Expand All @@ -64,6 +64,12 @@ def gnssir(station: str, year: int, doy: int, snr: int = 66, plt: bool = False,
gnssir is the main driver for estimating reflector heights. The user is required to
have set up an analysis strategy using gnssir_input.
beta version of parallel processing is now onine. If you set -par to an integer >=2, it will do processing across years, i.e. you
won't see any improvement unless you are trying to analyze two years of data - in which case it will
be twice as fast, three years of data, three times as fast. I have added an option par=-99 which
only works for now on a single year. but it will create up to 10 simultaneous processes, so very slick.
Ultimately of course I will put these together and make this the official version. Huge thank you to
Aaryan Rampal for getting this up and running.
Examples
--------
Expand Down Expand Up @@ -162,6 +168,12 @@ def gnssir(station: str, year: int, doy: int, snr: int = 66, plt: bool = False,
periodograms are computed. 1 sec is default (i.e. no decimating)
newarcs : bool, optional
this input no longer has any meaning
par : int
parallel processing parameter. If an integer from 2-10, it sets up
2-10 processes for years, i.e. you should add at least the same number of
processes as you have years. For par = -99, it sets
up 10 processes for jobs within that single year. Ultimately these modes will be
merged so that the -99 option goes away.
"""

Expand Down Expand Up @@ -322,13 +334,42 @@ def gnssir(station: str, year: int, doy: int, snr: int = 66, plt: bool = False,
for year in year_list:
process_year(year, **additional_args)
else:
print('Using process year with pools')
pool = multiprocessing.Pool(processes=par)
partial_process_year = partial(process_year, **additional_args)
if par > 10:
print('For now we will only allow ten simultaneous processes. Submit again. Exiting.')
sys.exit()
if (par == -99):
print('Using pools with multiple lists of dates. For now only one year can be accommodating')
if (year != year_end):
print('Submit within one year for now')
sys.exit()
numproc = 10
# hardwire for five processes now
# try sending this instead of just args
#d = { 0: [2020, 251, 260], 1:[2020, 261, 270], 2: [2020, 271, 280], 3:[2020, 281, 290], 4:[2020,291,300] }
# may end up using fewer processes ... so return numproc for that
d,numproc=guts2.make_parallel_proc_lists(year, doy, doy_end, numproc)
print(numproc)
print(d)

index_list = list(range(numproc))

pool = multiprocessing.Pool(processes=numproc)
partial_process_yearD = partial(process_year_dictionary, args=args,datelist=d)
pool.map(partial_process_yearD,index_list)

pool.close()
pool.join()

else:
print('Using process year with pools')
pool = multiprocessing.Pool(processes=par)
partial_process_year = partial(process_year, **additional_args)
print(year_list)
type(partial_process_year)

pool.map(partial_process_year, year_list)
pool.close()
pool.join()
pool.map(partial_process_year, year_list)
pool.close()
pool.join()

t2 = time.time()
print('Time to compute ', round(t2-t1,2))
Expand Down Expand Up @@ -376,6 +417,40 @@ def process_year(year, year_end, year_st, doy, doy_end, args):
except:
warnings.warn(f'error processing {year} {doy}');

def process_year_dictionary(index,args,datelist):
"""
Code that does the processing for a specific year. Refactored to separate
function to allow for parallel processes
Parameters
----------
index: int
value from '0' to '4', telling you which element of args to use
args : dict
arguments passed into gnssir through commandline (or python)
should have the new arguments for sublists
datelist : dict
list of dates you want to analyze in simple year, doy1, doy2 format
should have up to 10 sets of dates, from 0 to 9, e.g. for five processes
dd = { 0: [2020, 251, 260], 1:[2020, 261, 270], 2: [2020, 271, 280], 3:[2020, 281, 290], 4:[2020,291,300] }
"""


year = datelist[index][0]; d1 = datelist[index][1] ; d2 = datelist[index][2]

doy_list = list(range(d1, d2+1))

# now store year and doy in args dictionary, which is somewhat silly
args['year'] = year
for doy in doy_list:
print(f'processing {year} {doy}');
args['doy']=doy

guts2.gnssir_guts_v2(**args)

# warnings.warn(f'error processing {year} {doy}');



def main():
Expand Down
55 changes: 50 additions & 5 deletions gnssrefl/gnssir_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import json
import matplotlib.pyplot as plt
import math
import numpy as np
import os
import scipy.interpolate
Expand Down Expand Up @@ -133,7 +134,9 @@ def gnssir_guts_v2(station,year,doy, snr_type, extension,lsp):
# rate for the receiver, so you should not assume this value is relevant to your case.
minNumPts = 20
p,T,irefr,humidity = set_refraction_params(station, dmjd, lsp)
print('Refraction parameters (press, temp, humidity, ModelNum)',np.round(p,3),np.round(T,3),np.round(humidity,3),irefr)
# removing print statement for now. should add screenstats parameter
if screenstats:
print('Refraction parameters (press, temp, humidity, ModelNum)',np.round(p,3),np.round(T,3),np.round(humidity,3),irefr)

if (irefr == 3) or (irefr == 4):
TempK = T + 273.15 # T is in celsius ... I think.
Expand Down Expand Up @@ -174,7 +177,7 @@ def gnssir_guts_v2(station,year,doy, snr_type, extension,lsp):
print('Reading from: ', obsfile)
print('Results will be written to:', fname)
minObsE = min(snrD[:,1])
print('Minimum observed elevation angle in this file ', minObsE, '/requested e1 and e2 ', e1,e2)
print('Min observed elev. angle ', station, year, doy, minObsE, '/requested e1 and e2 ', e1,e2)
# only apply this test for simple e1 and e2
if len(ellist) == 0:
if minObsE > (e1 + ediff):
Expand All @@ -196,9 +199,11 @@ def gnssir_guts_v2(station,year,doy, snr_type, extension,lsp):
ele = snrD[:,1]
else :
if irefr == 1:
print('Standard Bennett refraction correction')
if screenstats:
print('Standard Bennett refraction correction')
else:
print('Standard Bennett refraction correction, time-varying')
if screenstats:
print('Standard Bennett refraction correction, time-varying')
ele = snrD[:,1]
ele=apply_refraction_corr(lsp,ele,p,T)

Expand Down Expand Up @@ -1002,7 +1007,7 @@ def rewrite_azel(azval2):
print('Not going to allow more than four azimuth regions ...')
sys.exit()

print('Using azimuths: ', azelout)
#print('Using azimuths: ', azelout)
return azelout

def check_azim_compliance(initA,azlist):
Expand Down Expand Up @@ -1133,3 +1138,43 @@ def new_rise_set_again(elv,azm,dates, e1, e2, ediff,sat, screenstats ):
tv = np.append(tv, [newl],axis=0)

return tv

def make_parallel_proc_lists(year, doy1, doy2, nproc):
"""
make lists of dates for parallel processing to spawn multiple jobs
Parameters
==========
year : int
year of processing
doy1 : int
start day of year
doy 2 : int
end day of year
Returns
=======
datelist : dict
list of dates formatted as year doy1 doy2
numproc : int
number of datelists, thus number of processes to be used
"""
# d = { 0: [2020, 251, 260], 1:[2020, 261, 270], 2: [2020, 271, 280], 3:[2020, 281, 290], 4:[2020,291,300] }
# number of days for spacing ...
Ndays = math.ceil((doy2-doy1)/nproc)
#print(Ndays)
d = {}
i=0
for day in range(doy1, doy2+1, Ndays):
end_day = day+Ndays-1
if (end_day > doy2):
l = [year, day, doy2]
else:
l = [year, day, end_day]
d[i] = l
i=i+1

datelist = d
numproc = i
return datelist, numproc
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
]
setup(
name="gnssrefl",
version="2.5.4",
version="2.6.0",
author="Kristine Larson",
author_email="[email protected]",
description="A GNSS reflectometry software package ",
Expand Down

0 comments on commit d5797bb

Please sign in to comment.