Context Navigation

pstat.py

リビジョン 3, 37.5 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
2	#
3	# This software is distributable under the terms of the GNU
4	# General Public License (GPL) v2, the text of which can be found at
5	# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
6	# using this module constitutes acceptance of the terms of this License.
7	#
8	# Disclaimer
9	#
10	# This software is provided "as-is". There are no expressed or implied
11	# warranties of any kind, including, but not limited to, the warranties
12	# of merchantability and fittness for a given application. In no event
13	# shall Gary Strangman be liable for any direct, indirect, incidental,
14	# special, exemplary or consequential damages (including, but not limited
15	# to, loss of use, data or profits, or business interruption) however
16	# caused and on any theory of liability, whether in contract, strict
17	# liability or tort (including negligence or otherwise) arising in any way
18	# out of the use of this software, even if advised of the possibility of
19	# such damage.
20	#
21	# Comments and/or additions are welcome (send e-mail to:
22	# strang@nmr.mgh.harvard.edu).
23	#
24	"""
25	pstat.py module
26
27	#################################################
28	####### Written by: Gary Strangman ###########
29	####### Last modified: Jun 29, 2001 ###########
30	#################################################
31
32	This module provides some useful list and array manipulation routines
33	modeled after those found in the \|Stat package by Gary Perlman, plus a
34	number of other useful list/file manipulation functions. The list-based
35	functions include:
36
37	abut (source,*args)
38	simpleabut (source, addon)
39	colex (listoflists,cnums)
40	collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
41	dm (listoflists,criterion)
42	flat (l)
43	linexand (listoflists,columnlist,valuelist)
44	linexor (listoflists,columnlist,valuelist)
45	linedelimited (inlist,delimiter)
46	lineincols (inlist,colsize)
47	lineincustcols (inlist,colsizes)
48	list2string (inlist)
49	makelol(inlist)
50	makestr(x)
51	printcc (lst,extra=2)
52	printincols (listoflists,colsize)
53	pl (listoflists)
54	printl(listoflists)
55	replace (lst,oldval,newval)
56	recode (inlist,listmap,cols='all')
57	remap (listoflists,criterion)
58	roundlist (inlist,num_digits_to_round_floats_to)
59	sortby(listoflists,sortcols)
60	unique (inlist)
61	duplicates(inlist)
62	writedelimited (listoflists, delimiter, file, writetype='w')
63
64	Some of these functions have alternate versions which are defined only if
65	Numeric (NumPy) can be imported. These functions are generally named as
66	above, with an 'a' prefix.
67
68	aabut (source, *args)
69	acolex (a,indices,axis=1)
70	acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
71	adm (a,criterion)
72	alinexand (a,columnlist,valuelist)
73	alinexor (a,columnlist,valuelist)
74	areplace (a,oldval,newval)
75	arecode (a,listmap,col='all')
76	arowcompare (row1, row2)
77	arowsame (row1, row2)
78	asortrows(a,axis=0)
79	aunique(inarray)
80	aduplicates(inarray)
81
82	Currently, the code is all but completely un-optimized. In many cases, the
83	array versions of functions amount simply to aliases to built-in array
84	functions/methods. Their inclusion here is for function name consistency.
85	"""
86
87	## CHANGE LOG:
88	## ==========
89	## 01-11-15 ... changed list2string() to accept a delimiter
90	## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
91	## 01-05-31 ... added duplicates() and aduplicates() functions
92	## 00-12-28 ... license made GPL, docstring and import requirements
93	## 99-11-01 ... changed version to 0.3
94	## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
95	## 03/27/99 ... added areplace function, made replace fcn recursive
96	## 12/31/98 ... added writefc function for ouput to fixed column sizes
97	## 12/07/98 ... fixed import problem (failed on collapse() fcn)
98	## added __version__ variable (now 0.2)
99	## 12/05/98 ... updated doc-strings
100	## added features to collapse() function
101	## added flat() function for lists
102	## fixed a broken asortrows()
103	## 11/16/98 ... fixed minor bug in aput for 1D arrays
104	##
105	## 11/08/98 ... fixed aput to output large arrays correctly
106
107	import stats # required 3rd party module
108	import string, copy
109	from types import *
110
111	__version__ = 0.4
112
113	###=========================== LIST FUNCTIONS ==========================
114	###
115	### Here are the list functions, DEFINED FOR ALL SYSTEMS.
116	### Array functions (for NumPy-enabled computers) appear below.
117	###
118
119	def abut (source,*args):
120	"""
121	Like the \|Stat abut command. It concatenates two lists side-by-side
122	and returns the result. '2D' lists are also accomodated for either argument
123	(source or addon). CAUTION: If one list is shorter, it will be repeated
124	until it is as long as the longest list. If this behavior is not desired,
125	use pstat.simpleabut().
126
127	Usage: abut(source, args) where args=any # of lists
128	Returns: a list of lists as long as the LONGEST list past, source on the
129	'left', lists in <args> attached consecutively on the 'right'
130	"""
131
132	if type(source) not in [ListType,TupleType]:
133	source = [source]
134	for addon in args:
135	if type(addon) not in [ListType,TupleType]:
136	addon = [addon]
137	if len(addon) < len(source): # is source list longer?
138	if len(source) % len(addon) == 0: # are they integer multiples?
139	repeats = len(source)/len(addon) # repeat addon n times
140	origadd = copy.deepcopy(addon)
141	for i in range(repeats-1):
142	addon = addon + origadd
143	else:
144	repeats = len(source)/len(addon)+1 # repeat addon x times,
145	origadd = copy.deepcopy(addon) # x is NOT an integer
146	for i in range(repeats-1):
147	addon = addon + origadd
148	addon = addon[0:len(source)]
149	elif len(source) < len(addon): # is addon list longer?
150	if len(addon) % len(source) == 0: # are they integer multiples?
151	repeats = len(addon)/len(source) # repeat source n times
152	origsour = copy.deepcopy(source)
153	for i in range(repeats-1):
154	source = source + origsour
155	else:
156	repeats = len(addon)/len(source)+1 # repeat source x times,
157	origsour = copy.deepcopy(source) # x is NOT an integer
158	for i in range(repeats-1):
159	source = source + origsour
160	source = source[0:len(addon)]
161
162	source = simpleabut(source,addon)
163	return source
164
165
166	def simpleabut (source, addon):
167	"""
168	Concatenates two lists as columns and returns the result. '2D' lists
169	are also accomodated for either argument (source or addon). This DOES NOT
170	repeat either list to make the 2 lists of equal length. Beware of list pairs
171	with different lengths ... the resulting list will be the length of the
172	FIRST list passed.
173
174	Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
175	Returns: a list of lists as long as source, with source on the 'left' and
176	addon on the 'right'
177	"""
178	if type(source) not in [ListType,TupleType]:
179	source = [source]
180	if type(addon) not in [ListType,TupleType]:
181	addon = [addon]
182	minlen = min(len(source),len(addon))
183	list = copy.deepcopy(source) # start abut process
184	if type(source[0]) not in [ListType,TupleType]:
185	if type(addon[0]) not in [ListType,TupleType]:
186	for i in range(minlen):
187	list[i] = [source[i]] + [addon[i]] # source/addon = column
188	else:
189	for i in range(minlen):
190	list[i] = [source[i]] + addon[i] # addon=list-of-lists
191	else:
192	if type(addon[0]) not in [ListType,TupleType]:
193	for i in range(minlen):
194	list[i] = source[i] + [addon[i]] # source=list-of-lists
195	else:
196	for i in range(minlen):
197	list[i] = source[i] + addon[i] # source/addon = list-of-lists
198	source = list
199	return source
200
201
202	def colex (listoflists,cnums):
203	"""
204	Extracts from listoflists the columns specified in the list 'cnums'
205	(cnums can be an integer, a sequence of integers, or a string-expression that
206	corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
207	columns 3 onward from the listoflists).
208
209	Usage: colex (listoflists,cnums)
210	Returns: a list-of-lists corresponding to the columns from listoflists
211	specified by cnums, in the order the column numbers appear in cnums
212	"""
213	global index
214	column = 0
215	if type(cnums) in [ListType,TupleType]: # if multiple columns to get
216	index = cnums[0]
217	column = map(lambda x: x[index], listoflists)
218	for col in cnums[1:]:
219	index = col
220	column = abut(column,map(lambda x: x[index], listoflists))
221	elif type(cnums) == StringType: # if an 'x[3:]' type expr.
222	evalstring = 'map(lambda x: x'+cnums+', listoflists)'
223	column = eval(evalstring)
224	else: # else it's just 1 col to get
225	index = cnums
226	column = map(lambda x: x[index], listoflists)
227	return column
228
229
230	def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
231	"""
232	Averages data in collapsecol, keeping all unique items in keepcols
233	(using unique, which keeps unique LISTS of column numbers), retaining the
234	unique sets of values in keepcols, the mean for each. Setting fcn1
235	and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
236	will append those results (e.g., the sterr, N) after each calculated mean.
237	cfcn is the collapse function to apply (defaults to mean, defined here in the
238	pstat module to avoid circular imports with stats.py, but harmonicmean or
239	others could be passed).
240
241	Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
242	Returns: a list of lists with all unique permutations of entries appearing in
243	columns ("conditions") specified by keepcols, abutted with the result of
244	cfcn (if cfcn=None, defaults to the mean) of each column specified by
245	collapsecols.
246	"""
247	def collmean (inlist):
248	s = 0
249	for item in inlist:
250	s = s + item
251	return s/float(len(inlist))
252
253	if type(keepcols) not in [ListType,TupleType]:
254	keepcols = [keepcols]
255	if type(collapsecols) not in [ListType,TupleType]:
256	collapsecols = [collapsecols]
257	if cfcn == None:
258	cfcn = collmean
259	if keepcols == []:
260	means = [0]*len(collapsecols)
261	for i in range(len(collapsecols)):
262	avgcol = colex(listoflists,collapsecols[i])
263	means[i] = cfcn(avgcol)
264	if fcn1:
265	try:
266	test = fcn1(avgcol)
267	except:
268	test = 'N/A'
269	means[i] = [means[i], test]
270	if fcn2:
271	try:
272	test = fcn2(avgcol)
273	except:
274	test = 'N/A'
275	try:
276	means[i] = means[i] + [len(avgcol)]
277	except TypeError:
278	means[i] = [means[i],len(avgcol)]
279	return means
280	else:
281	values = colex(listoflists,keepcols)
282	uniques = unique(values)
283	uniques.sort()
284	newlist = []
285	if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
286	for item in uniques:
287	if type(item) not in [ListType,TupleType]: item =[item]
288	tmprows = linexand(listoflists,keepcols,item)
289	for col in collapsecols:
290	avgcol = colex(tmprows,col)
291	item.append(cfcn(avgcol))
292	if fcn1 <> None:
293	try:
294	test = fcn1(avgcol)
295	except:
296	test = 'N/A'
297	item.append(test)
298	if fcn2 <> None:
299	try:
300	test = fcn2(avgcol)
301	except:
302	test = 'N/A'
303	item.append(test)
304	newlist.append(item)
305	return newlist
306
307
308	def dm (listoflists,criterion):
309	"""
310	Returns rows from the passed list of lists that meet the criteria in
311	the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
312	will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
313	with column 2 equal to the string 'N').
314
315	Usage: dm (listoflists, criterion)
316	Returns: rows from listoflists that meet the specified criterion.
317	"""
318	function = 'filter(lambda x: '+criterion+',listoflists)'
319	lines = eval(function)
320	return lines
321
322
323	def flat(l):
324	"""
325	Returns the flattened version of a '2D' list. List-correlate to the a.flat()
326	method of NumPy arrays.
327
328	Usage: flat(l)
329	"""
330	newl = []
331	for i in range(len(l)):
332	for j in range(len(l[i])):
333	newl.append(l[i][j])
334	return newl
335
336
337	def linexand (listoflists,columnlist,valuelist):
338	"""
339	Returns the rows of a list of lists where col (from columnlist) = val
340	(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
341	len(columnlist) must equal len(valuelist).
342
343	Usage: linexand (listoflists,columnlist,valuelist)
344	Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
345	"""
346	if type(columnlist) not in [ListType,TupleType]:
347	columnlist = [columnlist]
348	if type(valuelist) not in [ListType,TupleType]:
349	valuelist = [valuelist]
350	criterion = ''
351	for i in range(len(columnlist)):
352	if type(valuelist[i])==StringType:
353	critval = '\'' + valuelist[i] + '\''
354	else:
355	critval = str(valuelist[i])
356	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
357	criterion = criterion[0:-3] # remove the "and" after the last crit
358	function = 'filter(lambda x: '+criterion+',listoflists)'
359	lines = eval(function)
360	return lines
361
362
363	def linexor (listoflists,columnlist,valuelist):
364	"""
365	Returns the rows of a list of lists where col (from columnlist) = val
366	(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
367	One value is required for each column in columnlist. If only one value
368	exists for columnlist but multiple values appear in valuelist, the
369	valuelist values are all assumed to pertain to the same column.
370
371	Usage: linexor (listoflists,columnlist,valuelist)
372	Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
373	"""
374	if type(columnlist) not in [ListType,TupleType]:
375	columnlist = [columnlist]
376	if type(valuelist) not in [ListType,TupleType]:
377	valuelist = [valuelist]
378	criterion = ''
379	if len(columnlist) == 1 and len(valuelist) > 1:
380	columnlist = columnlist*len(valuelist)
381	for i in range(len(columnlist)): # build an exec string
382	if type(valuelist[i])==StringType:
383	critval = '\'' + valuelist[i] + '\''
384	else:
385	critval = str(valuelist[i])
386	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
387	criterion = criterion[0:-2] # remove the "or" after the last crit
388	function = 'filter(lambda x: '+criterion+',listoflists)'
389	lines = eval(function)
390	return lines
391
392
393	def linedelimited (inlist,delimiter):
394	"""
395	Returns a string composed of elements in inlist, with each element
396	separated by 'delimiter.' Used by function writedelimited. Use '\t'
397	for tab-delimiting.
398
399	Usage: linedelimited (inlist,delimiter)
400	"""
401	outstr = ''
402	for item in inlist:
403	if type(item) <> StringType:
404	item = str(item)
405	outstr = outstr + item + delimiter
406	outstr = outstr[0:-1]
407	return outstr
408
409
410	def lineincols (inlist,colsize):
411	"""
412	Returns a string composed of elements in inlist, with each element
413	right-aligned in columns of (fixed) colsize.
414
415	Usage: lineincols (inlist,colsize) where colsize is an integer
416	"""
417	outstr = ''
418	for item in inlist:
419	if type(item) <> StringType:
420	item = str(item)
421	size = len(item)
422	if size <= colsize:
423	for i in range(colsize-size):
424	outstr = outstr + ' '
425	outstr = outstr + item
426	else:
427	outstr = outstr + item[0:colsize+1]
428	return outstr
429
430
431	def lineincustcols (inlist,colsizes):
432	"""
433	Returns a string composed of elements in inlist, with each element
434	right-aligned in a column of width specified by a sequence colsizes. The
435	length of colsizes must be greater than or equal to the number of columns
436	in inlist.
437
438	Usage: lineincustcols (inlist,colsizes)
439	Returns: formatted string created from inlist
440	"""
441	outstr = ''
442	for i in range(len(inlist)):
443	if type(inlist[i]) <> StringType:
444	item = str(inlist[i])
445	else:
446	item = inlist[i]
447	size = len(item)
448	if size <= colsizes[i]:
449	for j in range(colsizes[i]-size):
450	outstr = outstr + ' '
451	outstr = outstr + item
452	else:
453	outstr = outstr + item[0:colsizes[i]+1]
454	return outstr
455
456
457	def list2string (inlist,delimit=' '):
458	"""
459	Converts a 1D list to a single long string for file output, using
460	the string.join function.
461
462	Usage: list2string (inlist,delimit=' ')
463	Returns: the string created from inlist
464	"""
465	stringlist = map(makestr,inlist)
466	return string.join(stringlist,delimit)
467
468
469	def makelol(inlist):
470	"""
471	Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
472	want to use put() to write a 1D list one item per line in the file.
473
474	Usage: makelol(inlist)
475	Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
476	"""
477	x = []
478	for item in inlist:
479	x.append([item])
480	return x
481
482
483	def makestr (x):
484	if type(x) <> StringType:
485	x = str(x)
486	return x
487
488
489	def printcc (lst,extra=2):
490	"""
491	Prints a list of lists in columns, customized by the max size of items
492	within the columns (max size of items in col, plus 'extra' number of spaces).
493	Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
494	respectively.
495
496	Usage: printcc (lst,extra=2)
497	Returns: None
498	"""
499	if type(lst[0]) not in [ListType,TupleType]:
500	lst = [lst]
501	rowstokill = []
502	list2print = copy.deepcopy(lst)
503	for i in range(len(lst)):
504	if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
505	rowstokill = rowstokill + [i]
506	rowstokill.reverse() # delete blank rows from the end
507	for row in rowstokill:
508	del list2print[row]
509	maxsize = [0]*len(list2print[0])
510	for col in range(len(list2print[0])):
511	items = colex(list2print,col)
512	items = map(makestr,items)
513	maxsize[col] = max(map(len,items)) + extra
514	for row in lst:
515	if row == ['\n'] or row == '\n' or row == '' or row == ['']:
516	print
517	elif row == ['dashes'] or row == 'dashes':
518	dashes = [0]*len(maxsize)
519	for j in range(len(maxsize)):
520	dashes[j] = '-'*(maxsize[j]-2)
521	print lineincustcols(dashes,maxsize)
522	else:
523	print lineincustcols(row,maxsize)
524	return None
525
526
527	def printincols (listoflists,colsize):
528	"""
529	Prints a list of lists in columns of (fixed) colsize width, where
530	colsize is an integer.
531
532	Usage: printincols (listoflists,colsize)
533	Returns: None
534	"""
535	for row in listoflists:
536	print lineincols(row,colsize)
537	return None
538
539
540	def pl (listoflists):
541	"""
542	Prints a list of lists, 1 list (row) at a time.
543
544	Usage: pl(listoflists)
545	Returns: None
546	"""
547	for row in listoflists:
548	if row[-1] == '\n':
549	print row,
550	else:
551	print row
552	return None
553
554
555	def printl(listoflists):
556	"""Alias for pl."""
557	pl(listoflists)
558	return
559
560
561	def replace (inlst,oldval,newval):
562	"""
563	Replaces all occurrences of 'oldval' with 'newval', recursively.
564
565	Usage: replace (inlst,oldval,newval)
566	"""
567	lst = inlst*1
568	for i in range(len(lst)):
569	if type(lst[i]) not in [ListType,TupleType]:
570	if lst[i]==oldval: lst[i]=newval
571	else:
572	lst[i] = replace(lst[i],oldval,newval)
573	return lst
574
575
576	def recode (inlist,listmap,cols=None):
577	"""
578	Changes the values in a list to a new set of values (useful when
579	you need to recode data from (e.g.) strings to numbers. cols defaults
580	to None (meaning all columns are recoded).
581
582	Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
583	Returns: inlist with the appropriate values replaced with new ones
584	"""
585	lst = copy.deepcopy(inlist)
586	if cols != None:
587	if type(cols) not in [ListType,TupleType]:
588	cols = [cols]
589	for col in cols:
590	for row in range(len(lst)):
591	try:
592	idx = colex(listmap,0).index(lst[row][col])
593	lst[row][col] = listmap[idx][1]
594	except ValueError:
595	pass
596	else:
597	for row in range(len(lst)):
598	for col in range(len(lst)):
599	try:
600	idx = colex(listmap,0).index(lst[row][col])
601	lst[row][col] = listmap[idx][1]
602	except ValueError:
603	pass
604	return lst
605
606
607	def remap (listoflists,criterion):
608	"""
609	Remaps values in a given column of a 2D list (listoflists). This requires
610	a criterion as a function of 'x' so that the result of the following is
611	returned ... map(lambda x: 'criterion',listoflists).
612
613	Usage: remap(listoflists,criterion) criterion=string
614	Returns: remapped version of listoflists
615	"""
616	function = 'map(lambda x: '+criterion+',listoflists)'
617	lines = eval(function)
618	return lines
619
620
621	def roundlist (inlist,digits):
622	"""
623	Goes through each element in a 1D or 2D inlist, and applies the following
624	function to all elements of FloatType ... round(element,digits).
625
626	Usage: roundlist(inlist,digits)
627	Returns: list with rounded floats
628	"""
629	if type(inlist[0]) in [IntType, FloatType]:
630	inlist = [inlist]
631	l = inlist*1
632	for i in range(len(l)):
633	for j in range(len(l[i])):
634	if type(l[i][j])==FloatType:
635	l[i][j] = round(l[i][j],digits)
636	return l
637
638
639	def sortby(listoflists,sortcols):
640	"""
641	Sorts a list of lists on the column(s) specified in the sequence
642	sortcols.
643
644	Usage: sortby(listoflists,sortcols)
645	Returns: sorted list, unchanged column ordering
646	"""
647	newlist = abut(colex(listoflists,sortcols),listoflists)
648	newlist.sort()
649	try:
650	numcols = len(sortcols)
651	except TypeError:
652	numcols = 1
653	crit = '[' + str(numcols) + ':]'
654	newlist = colex(newlist,crit)
655	return newlist
656
657
658	def unique (inlist):
659	"""
660	Returns all unique items in the passed list. If the a list-of-lists
661	is passed, unique LISTS are found (i.e., items in the first dimension are
662	compared).
663
664	Usage: unique (inlist)
665	Returns: the unique elements (or rows) in inlist
666	"""
667	uniques = []
668	for item in inlist:
669	if item not in uniques:
670	uniques.append(item)
671	return uniques
672
673	def duplicates(inlist):
674	"""
675	Returns duplicate items in the FIRST dimension of the passed list.
676
677	Usage: duplicates (inlist)
678	"""
679	dups = []
680	for i in range(len(inlist)):
681	if inlist[i] in inlist[i+1:]:
682	dups.append(inlist[i])
683	return dups
684
685
686	def nonrepeats(inlist):
687	"""
688	Returns items that are NOT duplicated in the first dim of the passed list.
689
690	Usage: nonrepeats (inlist)
691	"""
692	nonrepeats = []
693	for i in range(len(inlist)):
694	if inlist.count(inlist[i]) == 1:
695	nonrepeats.append(inlist[i])
696	return nonrepeats
697
698
699	#=================== PSTAT ARRAY FUNCTIONS =====================
700	#=================== PSTAT ARRAY FUNCTIONS =====================
701	#=================== PSTAT ARRAY FUNCTIONS =====================
702	#=================== PSTAT ARRAY FUNCTIONS =====================
703	#=================== PSTAT ARRAY FUNCTIONS =====================
704	#=================== PSTAT ARRAY FUNCTIONS =====================
705	#=================== PSTAT ARRAY FUNCTIONS =====================
706	#=================== PSTAT ARRAY FUNCTIONS =====================
707	#=================== PSTAT ARRAY FUNCTIONS =====================
708	#=================== PSTAT ARRAY FUNCTIONS =====================
709	#=================== PSTAT ARRAY FUNCTIONS =====================
710	#=================== PSTAT ARRAY FUNCTIONS =====================
711	#=================== PSTAT ARRAY FUNCTIONS =====================
712	#=================== PSTAT ARRAY FUNCTIONS =====================
713	#=================== PSTAT ARRAY FUNCTIONS =====================
714	#=================== PSTAT ARRAY FUNCTIONS =====================
715
716	try: # DEFINE THESE ONLY IF NUMERIC IS AVAILABLE
717	import Numeric
718	N = Numeric
719
720	def aabut (source, *args):
721	"""
722	Like the \|Stat abut command. It concatenates two arrays column-wise
723	and returns the result. CAUTION: If one array is shorter, it will be
724	repeated until it is as long as the other.
725
726	Usage: aabut (source, args) where args=any # of arrays
727	Returns: an array as long as the LONGEST array past, source appearing on the
728	'left', arrays in <args> attached on the 'right'.
729	"""
730	if len(source.shape)==1:
731	width = 1
732	source = N.resize(source,[source.shape[0],width])
733	else:
734	width = source.shape[1]
735	for addon in args:
736	if len(addon.shape)==1:
737	width = 1
738	addon = N.resize(addon,[source.shape[0],width])
739	else:
740	width = source.shape[1]
741	if len(addon) < len(source):
742	addon = N.resize(addon,[source.shape[0],addon.shape[1]])
743	elif len(source) < len(addon):
744	source = N.resize(source,[addon.shape[0],source.shape[1]])
745	source = N.concatenate((source,addon),1)
746	return source
747
748
749	def acolex (a,indices,axis=1):
750	"""
751	Extracts specified indices (a list) from passed array, along passed
752	axis (column extraction is default). BEWARE: A 1D array is presumed to be a
753	column-array (and that the whole array will be returned as a column).
754
755	Usage: acolex (a,indices,axis=1)
756	Returns: the columns of a specified by indices
757	"""
758	if type(indices) not in [ListType,TupleType,N.ArrayType]:
759	indices = [indices]
760	if len(N.shape(a)) == 1:
761	cols = N.resize(a,[a.shape[0],1])
762	else:
763	cols = N.take(a,indices,axis)
764	return cols
765
766
767	def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
768	"""
769	Averages data in collapsecol, keeping all unique items in keepcols
770	(using unique, which keeps unique LISTS of column numbers), retaining
771	the unique sets of values in keepcols, the mean for each. If stderror or
772	N of the mean are desired, set either or both parameters to 1.
773
774	Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
775	Returns: unique 'conditions' specified by the contents of columns specified
776	by keepcols, abutted with the mean(s) of column(s) specified by
777	collapsecols
778	"""
779	def acollmean (inarray):
780	return N.sum(N.ravel(inarray))
781
782	if cfcn == None:
783	cfcn = acollmean
784	if keepcols == []:
785	avgcol = acolex(a,collapsecols)
786	means = N.sum(avgcol)/float(len(avgcol))
787	if fcn1<>None:
788	try:
789	test = fcn1(avgcol)
790	except:
791	test = N.array(['N/A']*len(means))
792	means = aabut(means,test)
793	if fcn2<>None:
794	try:
795	test = fcn2(avgcol)
796	except:
797	test = N.array(['N/A']*len(means))
798	means = aabut(means,test)
799	return means
800	else:
801	if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
802	keepcols = [keepcols]
803	values = colex(a,keepcols) # so that "item" can be appended (below)
804	uniques = unique(values) # get a LIST, so .sort keeps rows intact
805	uniques.sort()
806	newlist = []
807	for item in uniques:
808	if type(item) not in [ListType,TupleType,N.ArrayType]:
809	item =[item]
810	tmprows = alinexand(a,keepcols,item)
811	for col in collapsecols:
812	avgcol = acolex(tmprows,col)
813	item.append(acollmean(avgcol))
814	if fcn1<>None:
815	try:
816	test = fcn1(avgcol)
817	except:
818	test = 'N/A'
819	item.append(test)
820	if fcn2<>None:
821	try:
822	test = fcn2(avgcol)
823	except:
824	test = 'N/A'
825	item.append(test)
826	newlist.append(item)
827	try:
828	new_a = N.array(newlist)
829	except TypeError:
830	new_a = N.array(newlist,'O')
831	return new_a
832
833
834	def adm (a,criterion):
835	"""
836	Returns rows from the passed list of lists that meet the criteria in
837	the passed criterion expression (a string as a function of x).
838
839	Usage: adm (a,criterion) where criterion is like 'x[2]==37'
840	"""
841	function = 'filter(lambda x: '+criterion+',a)'
842	lines = eval(function)
843	try:
844	lines = N.array(lines)
845	except:
846	lines = N.array(lines,'O')
847	return lines
848
849
850	def isstring(x):
851	if type(x)==StringType:
852	return 1
853	else:
854	return 0
855
856
857	def alinexand (a,columnlist,valuelist):
858	"""
859	Returns the rows of an array where col (from columnlist) = val
860	(from valuelist). One value is required for each column in columnlist.
861
862	Usage: alinexand (a,columnlist,valuelist)
863	Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
864	"""
865	if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
866	columnlist = [columnlist]
867	if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
868	valuelist = [valuelist]
869	criterion = ''
870	for i in range(len(columnlist)):
871	if type(valuelist[i])==StringType:
872	critval = '\'' + valuelist[i] + '\''
873	else:
874	critval = str(valuelist[i])
875	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
876	criterion = criterion[0:-3] # remove the "and" after the last crit
877	return adm(a,criterion)
878
879
880	def alinexor (a,columnlist,valuelist):
881	"""
882	Returns the rows of an array where col (from columnlist) = val (from
883	valuelist). One value is required for each column in columnlist.
884	The exception is if either columnlist or valuelist has only 1 value,
885	in which case that item will be expanded to match the length of the
886	other list.
887
888	Usage: alinexor (a,columnlist,valuelist)
889	Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
890	"""
891	if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
892	columnlist = [columnlist]
893	if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
894	valuelist = [valuelist]
895	criterion = ''
896	if len(columnlist) == 1 and len(valuelist) > 1:
897	columnlist = columnlist*len(valuelist)
898	elif len(valuelist) == 1 and len(columnlist) > 1:
899	valuelist = valuelist*len(columnlist)
900	for i in range(len(columnlist)):
901	if type(valuelist[i])==StringType:
902	critval = '\'' + valuelist[i] + '\''
903	else:
904	critval = str(valuelist[i])
905	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
906	criterion = criterion[0:-2] # remove the "or" after the last crit
907	return adm(a,criterion)
908
909
910	def areplace (a,oldval,newval):
911	"""
912	Replaces all occurrences of oldval with newval in array a.
913
914	Usage: areplace(a,oldval,newval)
915	"""
916	newa = N.not_equal(a,oldval)*a
917	return newa+N.equal(a,oldval)*newval
918
919
920	def arecode (a,listmap,col='all'):
921	"""
922	Remaps the values in an array to a new set of values (useful when
923	you need to recode data from (e.g.) strings to numbers as most stats
924	packages require. Can work on SINGLE columns, or 'all' columns at once.
925
926	Usage: arecode (a,listmap,col='all')
927	Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
928	"""
929	ashape = a.shape
930	if col == 'all':
931	work = a.flat
932	else:
933	work = acolex(a,col)
934	work = work.flat
935	for pair in listmap:
936	if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
937	work = N.array(work,'O')
938	a = N.array(a,'O')
939	for i in range(len(work)):
940	if work[i]==pair[0]:
941	work[i] = pair[1]
942	if col == 'all':
943	return N.reshape(work,ashape)
944	else:
945	return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
946	else: # must be a non-Object type array and replacement
947	work = N.where(N.equal(work,pair[0]),pair[1],work)
948	return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
949
950
951	def arowcompare(row1, row2):
952	"""
953	Compares two rows from an array, regardless of whether it is an
954	array of numbers or of python objects (which requires the cmp function).
955
956	Usage: arowcompare(row1,row2)
957	Returns: an array of equal length containing 1s where the two rows had
958	identical elements and 0 otherwise
959	"""
960	if row1.typecode()=='O' or row2.typecode=='O':
961	cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
962	else:
963	cmpvect = N.equal(row1,row2)
964	return cmpvect
965
966
967	def arowsame(row1, row2):
968	"""
969	Compares two rows from an array, regardless of whether it is an
970	array of numbers or of python objects (which requires the cmp function).
971
972	Usage: arowsame(row1,row2)
973	Returns: 1 if the two rows are identical, 0 otherwise.
974	"""
975	cmpval = N.alltrue(arowcompare(row1,row2))
976	return cmpval
977
978
979	def asortrows(a,axis=0):
980	"""
981	Sorts an array "by rows". This differs from the Numeric.sort() function,
982	which sorts elements WITHIN the given axis. Instead, this function keeps
983	the elements along the given axis intact, but shifts them 'up or down'
984	relative to one another.
985
986	Usage: asortrows(a,axis=0)
987	Returns: sorted version of a
988	"""
989	if axis != 0:
990	a = N.swapaxes(a, axis, 0)
991	l = a.tolist()
992	l.sort() # or l.sort(_sort)
993	y = N.array(l)
994	if axis != 0:
995	y = N.swapaxes(y, axis, 0)
996	return y
997
998
999	def aunique(inarray):
1000	"""
1001	Returns unique items in the FIRST dimension of the passed array. Only
1002	works on arrays NOT including string items.
1003
1004	Usage: aunique (inarray)
1005	"""
1006	uniques = N.array([inarray[0]])
1007	if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
1008	for item in inarray[1:]:
1009	if N.add.reduce(N.equal(uniques,item).flat) == 0:
1010	try:
1011	uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
1012	except TypeError:
1013	uniques = N.concatenate([uniques,N.array([item])])
1014	else: # IT MUST BE A 2+D ARRAY
1015	if inarray.typecode() != 'O': # not an Object array
1016	for item in inarray[1:]:
1017	if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1018	try:
1019	uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1020	except TypeError: # the item to add isn't a list
1021	uniques = N.concatenate([uniques,N.array([item])])
1022	else:
1023	pass # this item is already in the uniques array
1024	else: # must be an Object array, alltrue/equal functions don't work
1025	for item in inarray[1:]:
1026	newflag = 1
1027	for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
1028	test = N.sum(abs(N.array(map(cmp,item,unq))))
1029	if test == 0: # if item identical to any 1 row in uniques
1030	newflag = 0 # then not a novel item to add
1031	break
1032	if newflag == 1:
1033	try:
1034	uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1035	except TypeError: # the item to add isn't a list
1036	uniques = N.concatenate([uniques,N.array([item])])
1037	return uniques
1038
1039
1040	def aduplicates(inarray):
1041	"""
1042	Returns duplicate items in the FIRST dimension of the passed array. Only
1043	works on arrays NOT including string items.
1044
1045	Usage: aunique (inarray)
1046	"""
1047	inarray = N.array(inarray)
1048	if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
1049	dups = []
1050	inarray = inarray.tolist()
1051	for i in range(len(inarray)):
1052	if inarray[i] in inarray[i+1:]:
1053	dups.append(inarray[i])
1054	dups = aunique(dups)
1055	else: # IT MUST BE A 2+D ARRAY
1056	dups = []
1057	aslist = inarray.tolist()
1058	for i in range(len(aslist)):
1059	if aslist[i] in aslist[i+1:]:
1060	dups.append(aslist[i])
1061	dups = unique(dups)
1062	dups = N.array(dups)
1063	return dups
1064
1065	except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
1066	pass

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx_extras/pstat.py

異なるフォーマットでダウンロード: