;-------------------------------------------------------------
;+
; NAME:
;       WORDARRAY
; PURPOSE:
;       Convert text string or string array to 1-d array of words.
; CATEGORY:
; CALLING SEQUENCE:
;       wordarray, instring, outlist
; INPUTS:
;       instring = string or string array to process.  in
; KEYWORD PARAMETERS:
;       Keywords:
;         IGNORE=string of characters to ignore (array allowed).
;           These characters are removed before processing.
;           Ex: wordarray,in,out,ignore=',;()'
;               wordarray,in,out,ignore=[',',';','(',')']
;         DELIMITERS = word delimiter characters, like IGNORE.
;         /WHITE means include white space (spaces,tabs) along
;           with the specified delimiters.
;         NUMBER=num Number of elements in returned array.
; OUTPUTS:
;       outlist = 1-d array of words in instring.      out
; COMMON BLOCKS:
; NOTES:
;       Notes: Words are assumed delimited by given delimiters
;        (defaults are spaces and/or tabs)
;        Non-delimiters are returned as part of the words.
;        Delimiters not needed at the front and end of the strings.
;        See commalist for a near inverse.
; MODIFICATION HISTORY:
;       R. Sterner, 29 Nov, 1989
;       BLG --- Modified June 22,1991 to include tabs as delimiters
;       R. Sterner, 11 Dec, 1992 --- fixed to handle pure white space.
;       R. Sterner, 27 Jan, 1993 --- dropped reference to array.
;       R. Sterner, 1998 Apr 1 --- Added DELIMITER.  Modified IGNORE.
;       R. Sterner, 1998 Jul 31 --- Added NUMBER=nwds.
;
; Copyright (C) 1989, Johns Hopkins University/Applied Physics Laboratory
; This software may be used, copied, or redistributed as long as it is not
; sold and this copyright notice is reproduced on each copy made.  This
; routine is provided as is without any express or implied warranties
; whatsoever.  Other limitations apply as described in the file disclaimer.txt.
;-
;-------------------------------------------------------------
 
    pro wordarray, in, out, ignore=ign, delimiters=del, $
      white=white, number=nwds, help=hlp
 
    if (n_params(0) lt 2) or keyword_set(hlp) then begin
      print,' Convert text string or string array to 1-d array of words.'
      print,' wordarray, instring, outlist'
      print,'   instring = string or string array to process.  in'
      print,'   outlist = 1-d array of words in instring.      out'
      print,' Keywords:'
      print,'   IGNORE=string of characters to ignore (array allowed).'
      print,'     These characters are removed before processing.'
      print,"     Ex: wordarray,in,out,ignore=',;()'"
      print,"         wordarray,in,out,ignore=[',',';','(',')']"
      print,'   DELIMITERS = word delimiter characters, like IGNORE.'
      print,'   /WHITE means include white space (spaces,tabs) along'
      print,'     with the specified delimiters.'
      print,'   NUMBER=num Number of elements in returned array.'
      print,' Notes: Words are assumed delimited by given delimiters'
      print,'  (defaults are spaces and/or tabs)'
      print,'  Non-delimiters are returned as part of the words.'
      print,'  Delimiters not needed at the front and end of the strings.' 
      print,'  See commalist for a near inverse.'
      return
    endif
 
    ;-----  Deal with characters to ignore  --------
    nig = n_elements(ign)
    ign2 = ''
    if nig gt 0 then for i=0,nig-1 do ign2=ign2+ign(i)  ; Want 1 string.
    bign2 = byte(ign2)                    ; As byte array.
 
    ;-----  Deal with delimiters  -------------
    ndl = n_elements(del)
    if ndl eq 0 then bdel2=[9B,32B]        ; Default is Tab and Space.
    if ndl gt 0 then begin
      del2 = ''
      if ndl gt 0 then for i=0,ndl-1 do del2=del2+del(i)  ; Want 1 string.
      bdel2 = byte(del2)                    ; As byte array.
      if keyword_set(white) then bdel2=[bdel2,9B,32B]   ; Add white space.
    endif
 
    ;------  Set up a marker characters (CTR-A, CTR-B)  --------
    mrk  = 1B        ; Marker character for delimiters.
    mrk2 = 2B        ; Marker character for ignore.
    smrk  = string(mrk)    ; Same as a string.
 
    ;------  Mark ends of lines  ---------
    t = smrk + in + smrk        ; Force delimiters on ends
 
    ;------  Convert to byte array and ignore line breaks  -----
    b = byte(t)            ; Convert to byte array.
    w = where(b ne 0, count)    ; Find non-null chars.
    if count gt 0 then b = b(w)    ; Extract non-null characters.
 
    ;------  Deal with characters to be ignored  --------------
    for i=0,n_elements(bign2)-1 do begin    ; Loop through chars to ignore.
      w = where(b eq bign2(i), c)    ; Look for i'th one.
      if c gt 0 then b(w)=mrk2    ; Mark any found.
    endfor
    w = where(b ne mrk2, c)        ; Any ignore marks?
    if c gt 0 then b=b(w)        ; Yes, drop them.
 
    ;------  Deal with word delimiters  ----------------------
        for i=0,n_elements(bdel2)-1 do begin    ; Loop through delimiter chars.
          w = where(b eq bdel2(i), c)   ; Look for i'th one.
          if c gt 0 then b(w)=mrk       ; Mark any found.
        endfor
 
    ;------  Look for non-delimiter characters  ------------
    x = b ne mrk            ; non-delimiter chars.
    x = [0,x,0]            ; tack 0s at ends.
    if total(x) eq 0 then begin    ; All white space.
      out = ''
      return
    endif
 
    ;-------  Find word/noon-word transitions  -------------
    y = (x-shift(x,1)) eq 1        ; Look for transitions.
    z = where(shift(y,-1) eq 1)
    y2 = (x-shift(x,-1)) eq 1
    z2 = where(shift(y2,1) eq 1)
 
    ;--------  Word number, locations, and lengths  --------
    nwds = total(y)            ; Total words in IN.
    loc = z                ; Word start positions.
    len = z2 - z - 1        ; Word lengths.
 
    ;--------  Setup and fill in output array  ------------
    out = bytarr(max(len), nwds)    ; Set up output array.
    if nwds gt 1 then begin
      for i = 0L, nwds-1L do begin
        out(0,i) = b(loc(i):(loc(i)+len(i)-1L))
      endfor
      out = string(out)
    endif else begin
      out(0) = b(loc(0):(loc(0)+len(0)-1L))
      out = string(out)
    endelse
 
    return
 
    end