[3] | 1 | # Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
|
---|
| 2 | #
|
---|
| 3 | # This software is distributable under the terms of the GNU
|
---|
| 4 | # General Public License (GPL) v2, the text of which can be found at
|
---|
| 5 | # http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
|
---|
| 6 | # using this module constitutes acceptance of the terms of this License.
|
---|
| 7 | #
|
---|
| 8 | # Disclaimer
|
---|
| 9 | #
|
---|
| 10 | # This software is provided "as-is". There are no expressed or implied
|
---|
| 11 | # warranties of any kind, including, but not limited to, the warranties
|
---|
| 12 | # of merchantability and fittness for a given application. In no event
|
---|
| 13 | # shall Gary Strangman be liable for any direct, indirect, incidental,
|
---|
| 14 | # special, exemplary or consequential damages (including, but not limited
|
---|
| 15 | # to, loss of use, data or profits, or business interruption) however
|
---|
| 16 | # caused and on any theory of liability, whether in contract, strict
|
---|
| 17 | # liability or tort (including negligence or otherwise) arising in any way
|
---|
| 18 | # out of the use of this software, even if advised of the possibility of
|
---|
| 19 | # such damage.
|
---|
| 20 | #
|
---|
| 21 | # Comments and/or additions are welcome (send e-mail to:
|
---|
| 22 | # strang@nmr.mgh.harvard.edu).
|
---|
| 23 | #
|
---|
| 24 | """
|
---|
| 25 | pstat.py module
|
---|
| 26 |
|
---|
| 27 | #################################################
|
---|
| 28 | ####### Written by: Gary Strangman ###########
|
---|
| 29 | ####### Last modified: Jun 29, 2001 ###########
|
---|
| 30 | #################################################
|
---|
| 31 |
|
---|
| 32 | This module provides some useful list and array manipulation routines
|
---|
| 33 | modeled after those found in the |Stat package by Gary Perlman, plus a
|
---|
| 34 | number of other useful list/file manipulation functions. The list-based
|
---|
| 35 | functions include:
|
---|
| 36 |
|
---|
| 37 | abut (source,*args)
|
---|
| 38 | simpleabut (source, addon)
|
---|
| 39 | colex (listoflists,cnums)
|
---|
| 40 | collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
|
---|
| 41 | dm (listoflists,criterion)
|
---|
| 42 | flat (l)
|
---|
| 43 | linexand (listoflists,columnlist,valuelist)
|
---|
| 44 | linexor (listoflists,columnlist,valuelist)
|
---|
| 45 | linedelimited (inlist,delimiter)
|
---|
| 46 | lineincols (inlist,colsize)
|
---|
| 47 | lineincustcols (inlist,colsizes)
|
---|
| 48 | list2string (inlist)
|
---|
| 49 | makelol(inlist)
|
---|
| 50 | makestr(x)
|
---|
| 51 | printcc (lst,extra=2)
|
---|
| 52 | printincols (listoflists,colsize)
|
---|
| 53 | pl (listoflists)
|
---|
| 54 | printl(listoflists)
|
---|
| 55 | replace (lst,oldval,newval)
|
---|
| 56 | recode (inlist,listmap,cols='all')
|
---|
| 57 | remap (listoflists,criterion)
|
---|
| 58 | roundlist (inlist,num_digits_to_round_floats_to)
|
---|
| 59 | sortby(listoflists,sortcols)
|
---|
| 60 | unique (inlist)
|
---|
| 61 | duplicates(inlist)
|
---|
| 62 | writedelimited (listoflists, delimiter, file, writetype='w')
|
---|
| 63 |
|
---|
| 64 | Some of these functions have alternate versions which are defined only if
|
---|
| 65 | Numeric (NumPy) can be imported. These functions are generally named as
|
---|
| 66 | above, with an 'a' prefix.
|
---|
| 67 |
|
---|
| 68 | aabut (source, *args)
|
---|
| 69 | acolex (a,indices,axis=1)
|
---|
| 70 | acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
|
---|
| 71 | adm (a,criterion)
|
---|
| 72 | alinexand (a,columnlist,valuelist)
|
---|
| 73 | alinexor (a,columnlist,valuelist)
|
---|
| 74 | areplace (a,oldval,newval)
|
---|
| 75 | arecode (a,listmap,col='all')
|
---|
| 76 | arowcompare (row1, row2)
|
---|
| 77 | arowsame (row1, row2)
|
---|
| 78 | asortrows(a,axis=0)
|
---|
| 79 | aunique(inarray)
|
---|
| 80 | aduplicates(inarray)
|
---|
| 81 |
|
---|
| 82 | Currently, the code is all but completely un-optimized. In many cases, the
|
---|
| 83 | array versions of functions amount simply to aliases to built-in array
|
---|
| 84 | functions/methods. Their inclusion here is for function name consistency.
|
---|
| 85 | """
|
---|
| 86 |
|
---|
| 87 | ## CHANGE LOG:
|
---|
| 88 | ## ==========
|
---|
| 89 | ## 01-11-15 ... changed list2string() to accept a delimiter
|
---|
| 90 | ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
|
---|
| 91 | ## 01-05-31 ... added duplicates() and aduplicates() functions
|
---|
| 92 | ## 00-12-28 ... license made GPL, docstring and import requirements
|
---|
| 93 | ## 99-11-01 ... changed version to 0.3
|
---|
| 94 | ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
|
---|
| 95 | ## 03/27/99 ... added areplace function, made replace fcn recursive
|
---|
| 96 | ## 12/31/98 ... added writefc function for ouput to fixed column sizes
|
---|
| 97 | ## 12/07/98 ... fixed import problem (failed on collapse() fcn)
|
---|
| 98 | ## added __version__ variable (now 0.2)
|
---|
| 99 | ## 12/05/98 ... updated doc-strings
|
---|
| 100 | ## added features to collapse() function
|
---|
| 101 | ## added flat() function for lists
|
---|
| 102 | ## fixed a broken asortrows()
|
---|
| 103 | ## 11/16/98 ... fixed minor bug in aput for 1D arrays
|
---|
| 104 | ##
|
---|
| 105 | ## 11/08/98 ... fixed aput to output large arrays correctly
|
---|
| 106 |
|
---|
| 107 | import stats # required 3rd party module
|
---|
| 108 | import string, copy
|
---|
| 109 | from types import *
|
---|
| 110 |
|
---|
| 111 | __version__ = 0.4
|
---|
| 112 |
|
---|
| 113 | ###=========================== LIST FUNCTIONS ==========================
|
---|
| 114 | ###
|
---|
| 115 | ### Here are the list functions, DEFINED FOR ALL SYSTEMS.
|
---|
| 116 | ### Array functions (for NumPy-enabled computers) appear below.
|
---|
| 117 | ###
|
---|
| 118 |
|
---|
| 119 | def abut (source,*args):
|
---|
| 120 | """
|
---|
| 121 | Like the |Stat abut command. It concatenates two lists side-by-side
|
---|
| 122 | and returns the result. '2D' lists are also accomodated for either argument
|
---|
| 123 | (source or addon). CAUTION: If one list is shorter, it will be repeated
|
---|
| 124 | until it is as long as the longest list. If this behavior is not desired,
|
---|
| 125 | use pstat.simpleabut().
|
---|
| 126 |
|
---|
| 127 | Usage: abut(source, args) where args=any # of lists
|
---|
| 128 | Returns: a list of lists as long as the LONGEST list past, source on the
|
---|
| 129 | 'left', lists in <args> attached consecutively on the 'right'
|
---|
| 130 | """
|
---|
| 131 |
|
---|
| 132 | if type(source) not in [ListType,TupleType]:
|
---|
| 133 | source = [source]
|
---|
| 134 | for addon in args:
|
---|
| 135 | if type(addon) not in [ListType,TupleType]:
|
---|
| 136 | addon = [addon]
|
---|
| 137 | if len(addon) < len(source): # is source list longer?
|
---|
| 138 | if len(source) % len(addon) == 0: # are they integer multiples?
|
---|
| 139 | repeats = len(source)/len(addon) # repeat addon n times
|
---|
| 140 | origadd = copy.deepcopy(addon)
|
---|
| 141 | for i in range(repeats-1):
|
---|
| 142 | addon = addon + origadd
|
---|
| 143 | else:
|
---|
| 144 | repeats = len(source)/len(addon)+1 # repeat addon x times,
|
---|
| 145 | origadd = copy.deepcopy(addon) # x is NOT an integer
|
---|
| 146 | for i in range(repeats-1):
|
---|
| 147 | addon = addon + origadd
|
---|
| 148 | addon = addon[0:len(source)]
|
---|
| 149 | elif len(source) < len(addon): # is addon list longer?
|
---|
| 150 | if len(addon) % len(source) == 0: # are they integer multiples?
|
---|
| 151 | repeats = len(addon)/len(source) # repeat source n times
|
---|
| 152 | origsour = copy.deepcopy(source)
|
---|
| 153 | for i in range(repeats-1):
|
---|
| 154 | source = source + origsour
|
---|
| 155 | else:
|
---|
| 156 | repeats = len(addon)/len(source)+1 # repeat source x times,
|
---|
| 157 | origsour = copy.deepcopy(source) # x is NOT an integer
|
---|
| 158 | for i in range(repeats-1):
|
---|
| 159 | source = source + origsour
|
---|
| 160 | source = source[0:len(addon)]
|
---|
| 161 |
|
---|
| 162 | source = simpleabut(source,addon)
|
---|
| 163 | return source
|
---|
| 164 |
|
---|
| 165 |
|
---|
| 166 | def simpleabut (source, addon):
|
---|
| 167 | """
|
---|
| 168 | Concatenates two lists as columns and returns the result. '2D' lists
|
---|
| 169 | are also accomodated for either argument (source or addon). This DOES NOT
|
---|
| 170 | repeat either list to make the 2 lists of equal length. Beware of list pairs
|
---|
| 171 | with different lengths ... the resulting list will be the length of the
|
---|
| 172 | FIRST list passed.
|
---|
| 173 |
|
---|
| 174 | Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
|
---|
| 175 | Returns: a list of lists as long as source, with source on the 'left' and
|
---|
| 176 | addon on the 'right'
|
---|
| 177 | """
|
---|
| 178 | if type(source) not in [ListType,TupleType]:
|
---|
| 179 | source = [source]
|
---|
| 180 | if type(addon) not in [ListType,TupleType]:
|
---|
| 181 | addon = [addon]
|
---|
| 182 | minlen = min(len(source),len(addon))
|
---|
| 183 | list = copy.deepcopy(source) # start abut process
|
---|
| 184 | if type(source[0]) not in [ListType,TupleType]:
|
---|
| 185 | if type(addon[0]) not in [ListType,TupleType]:
|
---|
| 186 | for i in range(minlen):
|
---|
| 187 | list[i] = [source[i]] + [addon[i]] # source/addon = column
|
---|
| 188 | else:
|
---|
| 189 | for i in range(minlen):
|
---|
| 190 | list[i] = [source[i]] + addon[i] # addon=list-of-lists
|
---|
| 191 | else:
|
---|
| 192 | if type(addon[0]) not in [ListType,TupleType]:
|
---|
| 193 | for i in range(minlen):
|
---|
| 194 | list[i] = source[i] + [addon[i]] # source=list-of-lists
|
---|
| 195 | else:
|
---|
| 196 | for i in range(minlen):
|
---|
| 197 | list[i] = source[i] + addon[i] # source/addon = list-of-lists
|
---|
| 198 | source = list
|
---|
| 199 | return source
|
---|
| 200 |
|
---|
| 201 |
|
---|
| 202 | def colex (listoflists,cnums):
|
---|
| 203 | """
|
---|
| 204 | Extracts from listoflists the columns specified in the list 'cnums'
|
---|
| 205 | (cnums can be an integer, a sequence of integers, or a string-expression that
|
---|
| 206 | corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
|
---|
| 207 | columns 3 onward from the listoflists).
|
---|
| 208 |
|
---|
| 209 | Usage: colex (listoflists,cnums)
|
---|
| 210 | Returns: a list-of-lists corresponding to the columns from listoflists
|
---|
| 211 | specified by cnums, in the order the column numbers appear in cnums
|
---|
| 212 | """
|
---|
| 213 | global index
|
---|
| 214 | column = 0
|
---|
| 215 | if type(cnums) in [ListType,TupleType]: # if multiple columns to get
|
---|
| 216 | index = cnums[0]
|
---|
| 217 | column = map(lambda x: x[index], listoflists)
|
---|
| 218 | for col in cnums[1:]:
|
---|
| 219 | index = col
|
---|
| 220 | column = abut(column,map(lambda x: x[index], listoflists))
|
---|
| 221 | elif type(cnums) == StringType: # if an 'x[3:]' type expr.
|
---|
| 222 | evalstring = 'map(lambda x: x'+cnums+', listoflists)'
|
---|
| 223 | column = eval(evalstring)
|
---|
| 224 | else: # else it's just 1 col to get
|
---|
| 225 | index = cnums
|
---|
| 226 | column = map(lambda x: x[index], listoflists)
|
---|
| 227 | return column
|
---|
| 228 |
|
---|
| 229 |
|
---|
| 230 | def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
|
---|
| 231 | """
|
---|
| 232 | Averages data in collapsecol, keeping all unique items in keepcols
|
---|
| 233 | (using unique, which keeps unique LISTS of column numbers), retaining the
|
---|
| 234 | unique sets of values in keepcols, the mean for each. Setting fcn1
|
---|
| 235 | and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
|
---|
| 236 | will append those results (e.g., the sterr, N) after each calculated mean.
|
---|
| 237 | cfcn is the collapse function to apply (defaults to mean, defined here in the
|
---|
| 238 | pstat module to avoid circular imports with stats.py, but harmonicmean or
|
---|
| 239 | others could be passed).
|
---|
| 240 |
|
---|
| 241 | Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
|
---|
| 242 | Returns: a list of lists with all unique permutations of entries appearing in
|
---|
| 243 | columns ("conditions") specified by keepcols, abutted with the result of
|
---|
| 244 | cfcn (if cfcn=None, defaults to the mean) of each column specified by
|
---|
| 245 | collapsecols.
|
---|
| 246 | """
|
---|
| 247 | def collmean (inlist):
|
---|
| 248 | s = 0
|
---|
| 249 | for item in inlist:
|
---|
| 250 | s = s + item
|
---|
| 251 | return s/float(len(inlist))
|
---|
| 252 |
|
---|
| 253 | if type(keepcols) not in [ListType,TupleType]:
|
---|
| 254 | keepcols = [keepcols]
|
---|
| 255 | if type(collapsecols) not in [ListType,TupleType]:
|
---|
| 256 | collapsecols = [collapsecols]
|
---|
| 257 | if cfcn == None:
|
---|
| 258 | cfcn = collmean
|
---|
| 259 | if keepcols == []:
|
---|
| 260 | means = [0]*len(collapsecols)
|
---|
| 261 | for i in range(len(collapsecols)):
|
---|
| 262 | avgcol = colex(listoflists,collapsecols[i])
|
---|
| 263 | means[i] = cfcn(avgcol)
|
---|
| 264 | if fcn1:
|
---|
| 265 | try:
|
---|
| 266 | test = fcn1(avgcol)
|
---|
| 267 | except:
|
---|
| 268 | test = 'N/A'
|
---|
| 269 | means[i] = [means[i], test]
|
---|
| 270 | if fcn2:
|
---|
| 271 | try:
|
---|
| 272 | test = fcn2(avgcol)
|
---|
| 273 | except:
|
---|
| 274 | test = 'N/A'
|
---|
| 275 | try:
|
---|
| 276 | means[i] = means[i] + [len(avgcol)]
|
---|
| 277 | except TypeError:
|
---|
| 278 | means[i] = [means[i],len(avgcol)]
|
---|
| 279 | return means
|
---|
| 280 | else:
|
---|
| 281 | values = colex(listoflists,keepcols)
|
---|
| 282 | uniques = unique(values)
|
---|
| 283 | uniques.sort()
|
---|
| 284 | newlist = []
|
---|
| 285 | if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
|
---|
| 286 | for item in uniques:
|
---|
| 287 | if type(item) not in [ListType,TupleType]: item =[item]
|
---|
| 288 | tmprows = linexand(listoflists,keepcols,item)
|
---|
| 289 | for col in collapsecols:
|
---|
| 290 | avgcol = colex(tmprows,col)
|
---|
| 291 | item.append(cfcn(avgcol))
|
---|
| 292 | if fcn1 <> None:
|
---|
| 293 | try:
|
---|
| 294 | test = fcn1(avgcol)
|
---|
| 295 | except:
|
---|
| 296 | test = 'N/A'
|
---|
| 297 | item.append(test)
|
---|
| 298 | if fcn2 <> None:
|
---|
| 299 | try:
|
---|
| 300 | test = fcn2(avgcol)
|
---|
| 301 | except:
|
---|
| 302 | test = 'N/A'
|
---|
| 303 | item.append(test)
|
---|
| 304 | newlist.append(item)
|
---|
| 305 | return newlist
|
---|
| 306 |
|
---|
| 307 |
|
---|
| 308 | def dm (listoflists,criterion):
|
---|
| 309 | """
|
---|
| 310 | Returns rows from the passed list of lists that meet the criteria in
|
---|
| 311 | the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
|
---|
| 312 | will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
|
---|
| 313 | with column 2 equal to the string 'N').
|
---|
| 314 |
|
---|
| 315 | Usage: dm (listoflists, criterion)
|
---|
| 316 | Returns: rows from listoflists that meet the specified criterion.
|
---|
| 317 | """
|
---|
| 318 | function = 'filter(lambda x: '+criterion+',listoflists)'
|
---|
| 319 | lines = eval(function)
|
---|
| 320 | return lines
|
---|
| 321 |
|
---|
| 322 |
|
---|
| 323 | def flat(l):
|
---|
| 324 | """
|
---|
| 325 | Returns the flattened version of a '2D' list. List-correlate to the a.flat()
|
---|
| 326 | method of NumPy arrays.
|
---|
| 327 |
|
---|
| 328 | Usage: flat(l)
|
---|
| 329 | """
|
---|
| 330 | newl = []
|
---|
| 331 | for i in range(len(l)):
|
---|
| 332 | for j in range(len(l[i])):
|
---|
| 333 | newl.append(l[i][j])
|
---|
| 334 | return newl
|
---|
| 335 |
|
---|
| 336 |
|
---|
| 337 | def linexand (listoflists,columnlist,valuelist):
|
---|
| 338 | """
|
---|
| 339 | Returns the rows of a list of lists where col (from columnlist) = val
|
---|
| 340 | (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
|
---|
| 341 | len(columnlist) must equal len(valuelist).
|
---|
| 342 |
|
---|
| 343 | Usage: linexand (listoflists,columnlist,valuelist)
|
---|
| 344 | Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
|
---|
| 345 | """
|
---|
| 346 | if type(columnlist) not in [ListType,TupleType]:
|
---|
| 347 | columnlist = [columnlist]
|
---|
| 348 | if type(valuelist) not in [ListType,TupleType]:
|
---|
| 349 | valuelist = [valuelist]
|
---|
| 350 | criterion = ''
|
---|
| 351 | for i in range(len(columnlist)):
|
---|
| 352 | if type(valuelist[i])==StringType:
|
---|
| 353 | critval = '\'' + valuelist[i] + '\''
|
---|
| 354 | else:
|
---|
| 355 | critval = str(valuelist[i])
|
---|
| 356 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
|
---|
| 357 | criterion = criterion[0:-3] # remove the "and" after the last crit
|
---|
| 358 | function = 'filter(lambda x: '+criterion+',listoflists)'
|
---|
| 359 | lines = eval(function)
|
---|
| 360 | return lines
|
---|
| 361 |
|
---|
| 362 |
|
---|
| 363 | def linexor (listoflists,columnlist,valuelist):
|
---|
| 364 | """
|
---|
| 365 | Returns the rows of a list of lists where col (from columnlist) = val
|
---|
| 366 | (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
|
---|
| 367 | One value is required for each column in columnlist. If only one value
|
---|
| 368 | exists for columnlist but multiple values appear in valuelist, the
|
---|
| 369 | valuelist values are all assumed to pertain to the same column.
|
---|
| 370 |
|
---|
| 371 | Usage: linexor (listoflists,columnlist,valuelist)
|
---|
| 372 | Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
|
---|
| 373 | """
|
---|
| 374 | if type(columnlist) not in [ListType,TupleType]:
|
---|
| 375 | columnlist = [columnlist]
|
---|
| 376 | if type(valuelist) not in [ListType,TupleType]:
|
---|
| 377 | valuelist = [valuelist]
|
---|
| 378 | criterion = ''
|
---|
| 379 | if len(columnlist) == 1 and len(valuelist) > 1:
|
---|
| 380 | columnlist = columnlist*len(valuelist)
|
---|
| 381 | for i in range(len(columnlist)): # build an exec string
|
---|
| 382 | if type(valuelist[i])==StringType:
|
---|
| 383 | critval = '\'' + valuelist[i] + '\''
|
---|
| 384 | else:
|
---|
| 385 | critval = str(valuelist[i])
|
---|
| 386 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
|
---|
| 387 | criterion = criterion[0:-2] # remove the "or" after the last crit
|
---|
| 388 | function = 'filter(lambda x: '+criterion+',listoflists)'
|
---|
| 389 | lines = eval(function)
|
---|
| 390 | return lines
|
---|
| 391 |
|
---|
| 392 |
|
---|
| 393 | def linedelimited (inlist,delimiter):
|
---|
| 394 | """
|
---|
| 395 | Returns a string composed of elements in inlist, with each element
|
---|
| 396 | separated by 'delimiter.' Used by function writedelimited. Use '\t'
|
---|
| 397 | for tab-delimiting.
|
---|
| 398 |
|
---|
| 399 | Usage: linedelimited (inlist,delimiter)
|
---|
| 400 | """
|
---|
| 401 | outstr = ''
|
---|
| 402 | for item in inlist:
|
---|
| 403 | if type(item) <> StringType:
|
---|
| 404 | item = str(item)
|
---|
| 405 | outstr = outstr + item + delimiter
|
---|
| 406 | outstr = outstr[0:-1]
|
---|
| 407 | return outstr
|
---|
| 408 |
|
---|
| 409 |
|
---|
| 410 | def lineincols (inlist,colsize):
|
---|
| 411 | """
|
---|
| 412 | Returns a string composed of elements in inlist, with each element
|
---|
| 413 | right-aligned in columns of (fixed) colsize.
|
---|
| 414 |
|
---|
| 415 | Usage: lineincols (inlist,colsize) where colsize is an integer
|
---|
| 416 | """
|
---|
| 417 | outstr = ''
|
---|
| 418 | for item in inlist:
|
---|
| 419 | if type(item) <> StringType:
|
---|
| 420 | item = str(item)
|
---|
| 421 | size = len(item)
|
---|
| 422 | if size <= colsize:
|
---|
| 423 | for i in range(colsize-size):
|
---|
| 424 | outstr = outstr + ' '
|
---|
| 425 | outstr = outstr + item
|
---|
| 426 | else:
|
---|
| 427 | outstr = outstr + item[0:colsize+1]
|
---|
| 428 | return outstr
|
---|
| 429 |
|
---|
| 430 |
|
---|
| 431 | def lineincustcols (inlist,colsizes):
|
---|
| 432 | """
|
---|
| 433 | Returns a string composed of elements in inlist, with each element
|
---|
| 434 | right-aligned in a column of width specified by a sequence colsizes. The
|
---|
| 435 | length of colsizes must be greater than or equal to the number of columns
|
---|
| 436 | in inlist.
|
---|
| 437 |
|
---|
| 438 | Usage: lineincustcols (inlist,colsizes)
|
---|
| 439 | Returns: formatted string created from inlist
|
---|
| 440 | """
|
---|
| 441 | outstr = ''
|
---|
| 442 | for i in range(len(inlist)):
|
---|
| 443 | if type(inlist[i]) <> StringType:
|
---|
| 444 | item = str(inlist[i])
|
---|
| 445 | else:
|
---|
| 446 | item = inlist[i]
|
---|
| 447 | size = len(item)
|
---|
| 448 | if size <= colsizes[i]:
|
---|
| 449 | for j in range(colsizes[i]-size):
|
---|
| 450 | outstr = outstr + ' '
|
---|
| 451 | outstr = outstr + item
|
---|
| 452 | else:
|
---|
| 453 | outstr = outstr + item[0:colsizes[i]+1]
|
---|
| 454 | return outstr
|
---|
| 455 |
|
---|
| 456 |
|
---|
| 457 | def list2string (inlist,delimit=' '):
|
---|
| 458 | """
|
---|
| 459 | Converts a 1D list to a single long string for file output, using
|
---|
| 460 | the string.join function.
|
---|
| 461 |
|
---|
| 462 | Usage: list2string (inlist,delimit=' ')
|
---|
| 463 | Returns: the string created from inlist
|
---|
| 464 | """
|
---|
| 465 | stringlist = map(makestr,inlist)
|
---|
| 466 | return string.join(stringlist,delimit)
|
---|
| 467 |
|
---|
| 468 |
|
---|
| 469 | def makelol(inlist):
|
---|
| 470 | """
|
---|
| 471 | Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
|
---|
| 472 | want to use put() to write a 1D list one item per line in the file.
|
---|
| 473 |
|
---|
| 474 | Usage: makelol(inlist)
|
---|
| 475 | Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
|
---|
| 476 | """
|
---|
| 477 | x = []
|
---|
| 478 | for item in inlist:
|
---|
| 479 | x.append([item])
|
---|
| 480 | return x
|
---|
| 481 |
|
---|
| 482 |
|
---|
| 483 | def makestr (x):
|
---|
| 484 | if type(x) <> StringType:
|
---|
| 485 | x = str(x)
|
---|
| 486 | return x
|
---|
| 487 |
|
---|
| 488 |
|
---|
| 489 | def printcc (lst,extra=2):
|
---|
| 490 | """
|
---|
| 491 | Prints a list of lists in columns, customized by the max size of items
|
---|
| 492 | within the columns (max size of items in col, plus 'extra' number of spaces).
|
---|
| 493 | Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
|
---|
| 494 | respectively.
|
---|
| 495 |
|
---|
| 496 | Usage: printcc (lst,extra=2)
|
---|
| 497 | Returns: None
|
---|
| 498 | """
|
---|
| 499 | if type(lst[0]) not in [ListType,TupleType]:
|
---|
| 500 | lst = [lst]
|
---|
| 501 | rowstokill = []
|
---|
| 502 | list2print = copy.deepcopy(lst)
|
---|
| 503 | for i in range(len(lst)):
|
---|
| 504 | if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
|
---|
| 505 | rowstokill = rowstokill + [i]
|
---|
| 506 | rowstokill.reverse() # delete blank rows from the end
|
---|
| 507 | for row in rowstokill:
|
---|
| 508 | del list2print[row]
|
---|
| 509 | maxsize = [0]*len(list2print[0])
|
---|
| 510 | for col in range(len(list2print[0])):
|
---|
| 511 | items = colex(list2print,col)
|
---|
| 512 | items = map(makestr,items)
|
---|
| 513 | maxsize[col] = max(map(len,items)) + extra
|
---|
| 514 | for row in lst:
|
---|
| 515 | if row == ['\n'] or row == '\n' or row == '' or row == ['']:
|
---|
| 516 | print
|
---|
| 517 | elif row == ['dashes'] or row == 'dashes':
|
---|
| 518 | dashes = [0]*len(maxsize)
|
---|
| 519 | for j in range(len(maxsize)):
|
---|
| 520 | dashes[j] = '-'*(maxsize[j]-2)
|
---|
| 521 | print lineincustcols(dashes,maxsize)
|
---|
| 522 | else:
|
---|
| 523 | print lineincustcols(row,maxsize)
|
---|
| 524 | return None
|
---|
| 525 |
|
---|
| 526 |
|
---|
| 527 | def printincols (listoflists,colsize):
|
---|
| 528 | """
|
---|
| 529 | Prints a list of lists in columns of (fixed) colsize width, where
|
---|
| 530 | colsize is an integer.
|
---|
| 531 |
|
---|
| 532 | Usage: printincols (listoflists,colsize)
|
---|
| 533 | Returns: None
|
---|
| 534 | """
|
---|
| 535 | for row in listoflists:
|
---|
| 536 | print lineincols(row,colsize)
|
---|
| 537 | return None
|
---|
| 538 |
|
---|
| 539 |
|
---|
| 540 | def pl (listoflists):
|
---|
| 541 | """
|
---|
| 542 | Prints a list of lists, 1 list (row) at a time.
|
---|
| 543 |
|
---|
| 544 | Usage: pl(listoflists)
|
---|
| 545 | Returns: None
|
---|
| 546 | """
|
---|
| 547 | for row in listoflists:
|
---|
| 548 | if row[-1] == '\n':
|
---|
| 549 | print row,
|
---|
| 550 | else:
|
---|
| 551 | print row
|
---|
| 552 | return None
|
---|
| 553 |
|
---|
| 554 |
|
---|
| 555 | def printl(listoflists):
|
---|
| 556 | """Alias for pl."""
|
---|
| 557 | pl(listoflists)
|
---|
| 558 | return
|
---|
| 559 |
|
---|
| 560 |
|
---|
| 561 | def replace (inlst,oldval,newval):
|
---|
| 562 | """
|
---|
| 563 | Replaces all occurrences of 'oldval' with 'newval', recursively.
|
---|
| 564 |
|
---|
| 565 | Usage: replace (inlst,oldval,newval)
|
---|
| 566 | """
|
---|
| 567 | lst = inlst*1
|
---|
| 568 | for i in range(len(lst)):
|
---|
| 569 | if type(lst[i]) not in [ListType,TupleType]:
|
---|
| 570 | if lst[i]==oldval: lst[i]=newval
|
---|
| 571 | else:
|
---|
| 572 | lst[i] = replace(lst[i],oldval,newval)
|
---|
| 573 | return lst
|
---|
| 574 |
|
---|
| 575 |
|
---|
| 576 | def recode (inlist,listmap,cols=None):
|
---|
| 577 | """
|
---|
| 578 | Changes the values in a list to a new set of values (useful when
|
---|
| 579 | you need to recode data from (e.g.) strings to numbers. cols defaults
|
---|
| 580 | to None (meaning all columns are recoded).
|
---|
| 581 |
|
---|
| 582 | Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
|
---|
| 583 | Returns: inlist with the appropriate values replaced with new ones
|
---|
| 584 | """
|
---|
| 585 | lst = copy.deepcopy(inlist)
|
---|
| 586 | if cols != None:
|
---|
| 587 | if type(cols) not in [ListType,TupleType]:
|
---|
| 588 | cols = [cols]
|
---|
| 589 | for col in cols:
|
---|
| 590 | for row in range(len(lst)):
|
---|
| 591 | try:
|
---|
| 592 | idx = colex(listmap,0).index(lst[row][col])
|
---|
| 593 | lst[row][col] = listmap[idx][1]
|
---|
| 594 | except ValueError:
|
---|
| 595 | pass
|
---|
| 596 | else:
|
---|
| 597 | for row in range(len(lst)):
|
---|
| 598 | for col in range(len(lst)):
|
---|
| 599 | try:
|
---|
| 600 | idx = colex(listmap,0).index(lst[row][col])
|
---|
| 601 | lst[row][col] = listmap[idx][1]
|
---|
| 602 | except ValueError:
|
---|
| 603 | pass
|
---|
| 604 | return lst
|
---|
| 605 |
|
---|
| 606 |
|
---|
| 607 | def remap (listoflists,criterion):
|
---|
| 608 | """
|
---|
| 609 | Remaps values in a given column of a 2D list (listoflists). This requires
|
---|
| 610 | a criterion as a function of 'x' so that the result of the following is
|
---|
| 611 | returned ... map(lambda x: 'criterion',listoflists).
|
---|
| 612 |
|
---|
| 613 | Usage: remap(listoflists,criterion) criterion=string
|
---|
| 614 | Returns: remapped version of listoflists
|
---|
| 615 | """
|
---|
| 616 | function = 'map(lambda x: '+criterion+',listoflists)'
|
---|
| 617 | lines = eval(function)
|
---|
| 618 | return lines
|
---|
| 619 |
|
---|
| 620 |
|
---|
| 621 | def roundlist (inlist,digits):
|
---|
| 622 | """
|
---|
| 623 | Goes through each element in a 1D or 2D inlist, and applies the following
|
---|
| 624 | function to all elements of FloatType ... round(element,digits).
|
---|
| 625 |
|
---|
| 626 | Usage: roundlist(inlist,digits)
|
---|
| 627 | Returns: list with rounded floats
|
---|
| 628 | """
|
---|
| 629 | if type(inlist[0]) in [IntType, FloatType]:
|
---|
| 630 | inlist = [inlist]
|
---|
| 631 | l = inlist*1
|
---|
| 632 | for i in range(len(l)):
|
---|
| 633 | for j in range(len(l[i])):
|
---|
| 634 | if type(l[i][j])==FloatType:
|
---|
| 635 | l[i][j] = round(l[i][j],digits)
|
---|
| 636 | return l
|
---|
| 637 |
|
---|
| 638 |
|
---|
| 639 | def sortby(listoflists,sortcols):
|
---|
| 640 | """
|
---|
| 641 | Sorts a list of lists on the column(s) specified in the sequence
|
---|
| 642 | sortcols.
|
---|
| 643 |
|
---|
| 644 | Usage: sortby(listoflists,sortcols)
|
---|
| 645 | Returns: sorted list, unchanged column ordering
|
---|
| 646 | """
|
---|
| 647 | newlist = abut(colex(listoflists,sortcols),listoflists)
|
---|
| 648 | newlist.sort()
|
---|
| 649 | try:
|
---|
| 650 | numcols = len(sortcols)
|
---|
| 651 | except TypeError:
|
---|
| 652 | numcols = 1
|
---|
| 653 | crit = '[' + str(numcols) + ':]'
|
---|
| 654 | newlist = colex(newlist,crit)
|
---|
| 655 | return newlist
|
---|
| 656 |
|
---|
| 657 |
|
---|
| 658 | def unique (inlist):
|
---|
| 659 | """
|
---|
| 660 | Returns all unique items in the passed list. If the a list-of-lists
|
---|
| 661 | is passed, unique LISTS are found (i.e., items in the first dimension are
|
---|
| 662 | compared).
|
---|
| 663 |
|
---|
| 664 | Usage: unique (inlist)
|
---|
| 665 | Returns: the unique elements (or rows) in inlist
|
---|
| 666 | """
|
---|
| 667 | uniques = []
|
---|
| 668 | for item in inlist:
|
---|
| 669 | if item not in uniques:
|
---|
| 670 | uniques.append(item)
|
---|
| 671 | return uniques
|
---|
| 672 |
|
---|
| 673 | def duplicates(inlist):
|
---|
| 674 | """
|
---|
| 675 | Returns duplicate items in the FIRST dimension of the passed list.
|
---|
| 676 |
|
---|
| 677 | Usage: duplicates (inlist)
|
---|
| 678 | """
|
---|
| 679 | dups = []
|
---|
| 680 | for i in range(len(inlist)):
|
---|
| 681 | if inlist[i] in inlist[i+1:]:
|
---|
| 682 | dups.append(inlist[i])
|
---|
| 683 | return dups
|
---|
| 684 |
|
---|
| 685 |
|
---|
| 686 | def nonrepeats(inlist):
|
---|
| 687 | """
|
---|
| 688 | Returns items that are NOT duplicated in the first dim of the passed list.
|
---|
| 689 |
|
---|
| 690 | Usage: nonrepeats (inlist)
|
---|
| 691 | """
|
---|
| 692 | nonrepeats = []
|
---|
| 693 | for i in range(len(inlist)):
|
---|
| 694 | if inlist.count(inlist[i]) == 1:
|
---|
| 695 | nonrepeats.append(inlist[i])
|
---|
| 696 | return nonrepeats
|
---|
| 697 |
|
---|
| 698 |
|
---|
| 699 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 700 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 701 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 702 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 703 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 704 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 705 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 706 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 707 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 708 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 709 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 710 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 711 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 712 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 713 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 714 | #=================== PSTAT ARRAY FUNCTIONS =====================
|
---|
| 715 |
|
---|
| 716 | try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
|
---|
| 717 | import Numeric
|
---|
| 718 | N = Numeric
|
---|
| 719 |
|
---|
| 720 | def aabut (source, *args):
|
---|
| 721 | """
|
---|
| 722 | Like the |Stat abut command. It concatenates two arrays column-wise
|
---|
| 723 | and returns the result. CAUTION: If one array is shorter, it will be
|
---|
| 724 | repeated until it is as long as the other.
|
---|
| 725 |
|
---|
| 726 | Usage: aabut (source, args) where args=any # of arrays
|
---|
| 727 | Returns: an array as long as the LONGEST array past, source appearing on the
|
---|
| 728 | 'left', arrays in <args> attached on the 'right'.
|
---|
| 729 | """
|
---|
| 730 | if len(source.shape)==1:
|
---|
| 731 | width = 1
|
---|
| 732 | source = N.resize(source,[source.shape[0],width])
|
---|
| 733 | else:
|
---|
| 734 | width = source.shape[1]
|
---|
| 735 | for addon in args:
|
---|
| 736 | if len(addon.shape)==1:
|
---|
| 737 | width = 1
|
---|
| 738 | addon = N.resize(addon,[source.shape[0],width])
|
---|
| 739 | else:
|
---|
| 740 | width = source.shape[1]
|
---|
| 741 | if len(addon) < len(source):
|
---|
| 742 | addon = N.resize(addon,[source.shape[0],addon.shape[1]])
|
---|
| 743 | elif len(source) < len(addon):
|
---|
| 744 | source = N.resize(source,[addon.shape[0],source.shape[1]])
|
---|
| 745 | source = N.concatenate((source,addon),1)
|
---|
| 746 | return source
|
---|
| 747 |
|
---|
| 748 |
|
---|
| 749 | def acolex (a,indices,axis=1):
|
---|
| 750 | """
|
---|
| 751 | Extracts specified indices (a list) from passed array, along passed
|
---|
| 752 | axis (column extraction is default). BEWARE: A 1D array is presumed to be a
|
---|
| 753 | column-array (and that the whole array will be returned as a column).
|
---|
| 754 |
|
---|
| 755 | Usage: acolex (a,indices,axis=1)
|
---|
| 756 | Returns: the columns of a specified by indices
|
---|
| 757 | """
|
---|
| 758 | if type(indices) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 759 | indices = [indices]
|
---|
| 760 | if len(N.shape(a)) == 1:
|
---|
| 761 | cols = N.resize(a,[a.shape[0],1])
|
---|
| 762 | else:
|
---|
| 763 | cols = N.take(a,indices,axis)
|
---|
| 764 | return cols
|
---|
| 765 |
|
---|
| 766 |
|
---|
| 767 | def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
|
---|
| 768 | """
|
---|
| 769 | Averages data in collapsecol, keeping all unique items in keepcols
|
---|
| 770 | (using unique, which keeps unique LISTS of column numbers), retaining
|
---|
| 771 | the unique sets of values in keepcols, the mean for each. If stderror or
|
---|
| 772 | N of the mean are desired, set either or both parameters to 1.
|
---|
| 773 |
|
---|
| 774 | Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
|
---|
| 775 | Returns: unique 'conditions' specified by the contents of columns specified
|
---|
| 776 | by keepcols, abutted with the mean(s) of column(s) specified by
|
---|
| 777 | collapsecols
|
---|
| 778 | """
|
---|
| 779 | def acollmean (inarray):
|
---|
| 780 | return N.sum(N.ravel(inarray))
|
---|
| 781 |
|
---|
| 782 | if cfcn == None:
|
---|
| 783 | cfcn = acollmean
|
---|
| 784 | if keepcols == []:
|
---|
| 785 | avgcol = acolex(a,collapsecols)
|
---|
| 786 | means = N.sum(avgcol)/float(len(avgcol))
|
---|
| 787 | if fcn1<>None:
|
---|
| 788 | try:
|
---|
| 789 | test = fcn1(avgcol)
|
---|
| 790 | except:
|
---|
| 791 | test = N.array(['N/A']*len(means))
|
---|
| 792 | means = aabut(means,test)
|
---|
| 793 | if fcn2<>None:
|
---|
| 794 | try:
|
---|
| 795 | test = fcn2(avgcol)
|
---|
| 796 | except:
|
---|
| 797 | test = N.array(['N/A']*len(means))
|
---|
| 798 | means = aabut(means,test)
|
---|
| 799 | return means
|
---|
| 800 | else:
|
---|
| 801 | if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 802 | keepcols = [keepcols]
|
---|
| 803 | values = colex(a,keepcols) # so that "item" can be appended (below)
|
---|
| 804 | uniques = unique(values) # get a LIST, so .sort keeps rows intact
|
---|
| 805 | uniques.sort()
|
---|
| 806 | newlist = []
|
---|
| 807 | for item in uniques:
|
---|
| 808 | if type(item) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 809 | item =[item]
|
---|
| 810 | tmprows = alinexand(a,keepcols,item)
|
---|
| 811 | for col in collapsecols:
|
---|
| 812 | avgcol = acolex(tmprows,col)
|
---|
| 813 | item.append(acollmean(avgcol))
|
---|
| 814 | if fcn1<>None:
|
---|
| 815 | try:
|
---|
| 816 | test = fcn1(avgcol)
|
---|
| 817 | except:
|
---|
| 818 | test = 'N/A'
|
---|
| 819 | item.append(test)
|
---|
| 820 | if fcn2<>None:
|
---|
| 821 | try:
|
---|
| 822 | test = fcn2(avgcol)
|
---|
| 823 | except:
|
---|
| 824 | test = 'N/A'
|
---|
| 825 | item.append(test)
|
---|
| 826 | newlist.append(item)
|
---|
| 827 | try:
|
---|
| 828 | new_a = N.array(newlist)
|
---|
| 829 | except TypeError:
|
---|
| 830 | new_a = N.array(newlist,'O')
|
---|
| 831 | return new_a
|
---|
| 832 |
|
---|
| 833 |
|
---|
| 834 | def adm (a,criterion):
|
---|
| 835 | """
|
---|
| 836 | Returns rows from the passed list of lists that meet the criteria in
|
---|
| 837 | the passed criterion expression (a string as a function of x).
|
---|
| 838 |
|
---|
| 839 | Usage: adm (a,criterion) where criterion is like 'x[2]==37'
|
---|
| 840 | """
|
---|
| 841 | function = 'filter(lambda x: '+criterion+',a)'
|
---|
| 842 | lines = eval(function)
|
---|
| 843 | try:
|
---|
| 844 | lines = N.array(lines)
|
---|
| 845 | except:
|
---|
| 846 | lines = N.array(lines,'O')
|
---|
| 847 | return lines
|
---|
| 848 |
|
---|
| 849 |
|
---|
| 850 | def isstring(x):
|
---|
| 851 | if type(x)==StringType:
|
---|
| 852 | return 1
|
---|
| 853 | else:
|
---|
| 854 | return 0
|
---|
| 855 |
|
---|
| 856 |
|
---|
| 857 | def alinexand (a,columnlist,valuelist):
|
---|
| 858 | """
|
---|
| 859 | Returns the rows of an array where col (from columnlist) = val
|
---|
| 860 | (from valuelist). One value is required for each column in columnlist.
|
---|
| 861 |
|
---|
| 862 | Usage: alinexand (a,columnlist,valuelist)
|
---|
| 863 | Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
|
---|
| 864 | """
|
---|
| 865 | if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 866 | columnlist = [columnlist]
|
---|
| 867 | if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 868 | valuelist = [valuelist]
|
---|
| 869 | criterion = ''
|
---|
| 870 | for i in range(len(columnlist)):
|
---|
| 871 | if type(valuelist[i])==StringType:
|
---|
| 872 | critval = '\'' + valuelist[i] + '\''
|
---|
| 873 | else:
|
---|
| 874 | critval = str(valuelist[i])
|
---|
| 875 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
|
---|
| 876 | criterion = criterion[0:-3] # remove the "and" after the last crit
|
---|
| 877 | return adm(a,criterion)
|
---|
| 878 |
|
---|
| 879 |
|
---|
| 880 | def alinexor (a,columnlist,valuelist):
|
---|
| 881 | """
|
---|
| 882 | Returns the rows of an array where col (from columnlist) = val (from
|
---|
| 883 | valuelist). One value is required for each column in columnlist.
|
---|
| 884 | The exception is if either columnlist or valuelist has only 1 value,
|
---|
| 885 | in which case that item will be expanded to match the length of the
|
---|
| 886 | other list.
|
---|
| 887 |
|
---|
| 888 | Usage: alinexor (a,columnlist,valuelist)
|
---|
| 889 | Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
|
---|
| 890 | """
|
---|
| 891 | if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 892 | columnlist = [columnlist]
|
---|
| 893 | if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
|
---|
| 894 | valuelist = [valuelist]
|
---|
| 895 | criterion = ''
|
---|
| 896 | if len(columnlist) == 1 and len(valuelist) > 1:
|
---|
| 897 | columnlist = columnlist*len(valuelist)
|
---|
| 898 | elif len(valuelist) == 1 and len(columnlist) > 1:
|
---|
| 899 | valuelist = valuelist*len(columnlist)
|
---|
| 900 | for i in range(len(columnlist)):
|
---|
| 901 | if type(valuelist[i])==StringType:
|
---|
| 902 | critval = '\'' + valuelist[i] + '\''
|
---|
| 903 | else:
|
---|
| 904 | critval = str(valuelist[i])
|
---|
| 905 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
|
---|
| 906 | criterion = criterion[0:-2] # remove the "or" after the last crit
|
---|
| 907 | return adm(a,criterion)
|
---|
| 908 |
|
---|
| 909 |
|
---|
| 910 | def areplace (a,oldval,newval):
|
---|
| 911 | """
|
---|
| 912 | Replaces all occurrences of oldval with newval in array a.
|
---|
| 913 |
|
---|
| 914 | Usage: areplace(a,oldval,newval)
|
---|
| 915 | """
|
---|
| 916 | newa = N.not_equal(a,oldval)*a
|
---|
| 917 | return newa+N.equal(a,oldval)*newval
|
---|
| 918 |
|
---|
| 919 |
|
---|
| 920 | def arecode (a,listmap,col='all'):
|
---|
| 921 | """
|
---|
| 922 | Remaps the values in an array to a new set of values (useful when
|
---|
| 923 | you need to recode data from (e.g.) strings to numbers as most stats
|
---|
| 924 | packages require. Can work on SINGLE columns, or 'all' columns at once.
|
---|
| 925 |
|
---|
| 926 | Usage: arecode (a,listmap,col='all')
|
---|
| 927 | Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
|
---|
| 928 | """
|
---|
| 929 | ashape = a.shape
|
---|
| 930 | if col == 'all':
|
---|
| 931 | work = a.flat
|
---|
| 932 | else:
|
---|
| 933 | work = acolex(a,col)
|
---|
| 934 | work = work.flat
|
---|
| 935 | for pair in listmap:
|
---|
| 936 | if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
|
---|
| 937 | work = N.array(work,'O')
|
---|
| 938 | a = N.array(a,'O')
|
---|
| 939 | for i in range(len(work)):
|
---|
| 940 | if work[i]==pair[0]:
|
---|
| 941 | work[i] = pair[1]
|
---|
| 942 | if col == 'all':
|
---|
| 943 | return N.reshape(work,ashape)
|
---|
| 944 | else:
|
---|
| 945 | return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
|
---|
| 946 | else: # must be a non-Object type array and replacement
|
---|
| 947 | work = N.where(N.equal(work,pair[0]),pair[1],work)
|
---|
| 948 | return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
|
---|
| 949 |
|
---|
| 950 |
|
---|
| 951 | def arowcompare(row1, row2):
|
---|
| 952 | """
|
---|
| 953 | Compares two rows from an array, regardless of whether it is an
|
---|
| 954 | array of numbers or of python objects (which requires the cmp function).
|
---|
| 955 |
|
---|
| 956 | Usage: arowcompare(row1,row2)
|
---|
| 957 | Returns: an array of equal length containing 1s where the two rows had
|
---|
| 958 | identical elements and 0 otherwise
|
---|
| 959 | """
|
---|
| 960 | if row1.typecode()=='O' or row2.typecode=='O':
|
---|
| 961 | cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
|
---|
| 962 | else:
|
---|
| 963 | cmpvect = N.equal(row1,row2)
|
---|
| 964 | return cmpvect
|
---|
| 965 |
|
---|
| 966 |
|
---|
| 967 | def arowsame(row1, row2):
|
---|
| 968 | """
|
---|
| 969 | Compares two rows from an array, regardless of whether it is an
|
---|
| 970 | array of numbers or of python objects (which requires the cmp function).
|
---|
| 971 |
|
---|
| 972 | Usage: arowsame(row1,row2)
|
---|
| 973 | Returns: 1 if the two rows are identical, 0 otherwise.
|
---|
| 974 | """
|
---|
| 975 | cmpval = N.alltrue(arowcompare(row1,row2))
|
---|
| 976 | return cmpval
|
---|
| 977 |
|
---|
| 978 |
|
---|
| 979 | def asortrows(a,axis=0):
|
---|
| 980 | """
|
---|
| 981 | Sorts an array "by rows". This differs from the Numeric.sort() function,
|
---|
| 982 | which sorts elements WITHIN the given axis. Instead, this function keeps
|
---|
| 983 | the elements along the given axis intact, but shifts them 'up or down'
|
---|
| 984 | relative to one another.
|
---|
| 985 |
|
---|
| 986 | Usage: asortrows(a,axis=0)
|
---|
| 987 | Returns: sorted version of a
|
---|
| 988 | """
|
---|
| 989 | if axis != 0:
|
---|
| 990 | a = N.swapaxes(a, axis, 0)
|
---|
| 991 | l = a.tolist()
|
---|
| 992 | l.sort() # or l.sort(_sort)
|
---|
| 993 | y = N.array(l)
|
---|
| 994 | if axis != 0:
|
---|
| 995 | y = N.swapaxes(y, axis, 0)
|
---|
| 996 | return y
|
---|
| 997 |
|
---|
| 998 |
|
---|
| 999 | def aunique(inarray):
|
---|
| 1000 | """
|
---|
| 1001 | Returns unique items in the FIRST dimension of the passed array. Only
|
---|
| 1002 | works on arrays NOT including string items.
|
---|
| 1003 |
|
---|
| 1004 | Usage: aunique (inarray)
|
---|
| 1005 | """
|
---|
| 1006 | uniques = N.array([inarray[0]])
|
---|
| 1007 | if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
|
---|
| 1008 | for item in inarray[1:]:
|
---|
| 1009 | if N.add.reduce(N.equal(uniques,item).flat) == 0:
|
---|
| 1010 | try:
|
---|
| 1011 | uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
|
---|
| 1012 | except TypeError:
|
---|
| 1013 | uniques = N.concatenate([uniques,N.array([item])])
|
---|
| 1014 | else: # IT MUST BE A 2+D ARRAY
|
---|
| 1015 | if inarray.typecode() != 'O': # not an Object array
|
---|
| 1016 | for item in inarray[1:]:
|
---|
| 1017 | if not N.sum(N.alltrue(N.equal(uniques,item),1)):
|
---|
| 1018 | try:
|
---|
| 1019 | uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
|
---|
| 1020 | except TypeError: # the item to add isn't a list
|
---|
| 1021 | uniques = N.concatenate([uniques,N.array([item])])
|
---|
| 1022 | else:
|
---|
| 1023 | pass # this item is already in the uniques array
|
---|
| 1024 | else: # must be an Object array, alltrue/equal functions don't work
|
---|
| 1025 | for item in inarray[1:]:
|
---|
| 1026 | newflag = 1
|
---|
| 1027 | for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
|
---|
| 1028 | test = N.sum(abs(N.array(map(cmp,item,unq))))
|
---|
| 1029 | if test == 0: # if item identical to any 1 row in uniques
|
---|
| 1030 | newflag = 0 # then not a novel item to add
|
---|
| 1031 | break
|
---|
| 1032 | if newflag == 1:
|
---|
| 1033 | try:
|
---|
| 1034 | uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
|
---|
| 1035 | except TypeError: # the item to add isn't a list
|
---|
| 1036 | uniques = N.concatenate([uniques,N.array([item])])
|
---|
| 1037 | return uniques
|
---|
| 1038 |
|
---|
| 1039 |
|
---|
| 1040 | def aduplicates(inarray):
|
---|
| 1041 | """
|
---|
| 1042 | Returns duplicate items in the FIRST dimension of the passed array. Only
|
---|
| 1043 | works on arrays NOT including string items.
|
---|
| 1044 |
|
---|
| 1045 | Usage: aunique (inarray)
|
---|
| 1046 | """
|
---|
| 1047 | inarray = N.array(inarray)
|
---|
| 1048 | if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
|
---|
| 1049 | dups = []
|
---|
| 1050 | inarray = inarray.tolist()
|
---|
| 1051 | for i in range(len(inarray)):
|
---|
| 1052 | if inarray[i] in inarray[i+1:]:
|
---|
| 1053 | dups.append(inarray[i])
|
---|
| 1054 | dups = aunique(dups)
|
---|
| 1055 | else: # IT MUST BE A 2+D ARRAY
|
---|
| 1056 | dups = []
|
---|
| 1057 | aslist = inarray.tolist()
|
---|
| 1058 | for i in range(len(aslist)):
|
---|
| 1059 | if aslist[i] in aslist[i+1:]:
|
---|
| 1060 | dups.append(aslist[i])
|
---|
| 1061 | dups = unique(dups)
|
---|
| 1062 | dups = N.array(dups)
|
---|
| 1063 | return dups
|
---|
| 1064 |
|
---|
| 1065 | except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
|
---|
| 1066 | pass
|
---|