Context Navigation

pstat.py @ 3

リビジョン 3, 37.5 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

Rev	行番号
[3]	1	# Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
	2	#
	3	# This software is distributable under the terms of the GNU
	4	# General Public License (GPL) v2, the text of which can be found at
	5	# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
	6	# using this module constitutes acceptance of the terms of this License.
	7	#
	8	# Disclaimer
	9	#
	10	# This software is provided "as-is". There are no expressed or implied
	11	# warranties of any kind, including, but not limited to, the warranties
	12	# of merchantability and fittness for a given application. In no event
	13	# shall Gary Strangman be liable for any direct, indirect, incidental,
	14	# special, exemplary or consequential damages (including, but not limited
	15	# to, loss of use, data or profits, or business interruption) however
	16	# caused and on any theory of liability, whether in contract, strict
	17	# liability or tort (including negligence or otherwise) arising in any way
	18	# out of the use of this software, even if advised of the possibility of
	19	# such damage.
	20	#
	21	# Comments and/or additions are welcome (send e-mail to:
	22	# strang@nmr.mgh.harvard.edu).
	23	#
	24	"""
	25	pstat.py module
	26
	27	#################################################
	28	####### Written by: Gary Strangman ###########
	29	####### Last modified: Jun 29, 2001 ###########
	30	#################################################
	31
	32	This module provides some useful list and array manipulation routines
	33	modeled after those found in the \|Stat package by Gary Perlman, plus a
	34	number of other useful list/file manipulation functions. The list-based
	35	functions include:
	36
	37	abut (source,*args)
	38	simpleabut (source, addon)
	39	colex (listoflists,cnums)
	40	collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
	41	dm (listoflists,criterion)
	42	flat (l)
	43	linexand (listoflists,columnlist,valuelist)
	44	linexor (listoflists,columnlist,valuelist)
	45	linedelimited (inlist,delimiter)
	46	lineincols (inlist,colsize)
	47	lineincustcols (inlist,colsizes)
	48	list2string (inlist)
	49	makelol(inlist)
	50	makestr(x)
	51	printcc (lst,extra=2)
	52	printincols (listoflists,colsize)
	53	pl (listoflists)
	54	printl(listoflists)
	55	replace (lst,oldval,newval)
	56	recode (inlist,listmap,cols='all')
	57	remap (listoflists,criterion)
	58	roundlist (inlist,num_digits_to_round_floats_to)
	59	sortby(listoflists,sortcols)
	60	unique (inlist)
	61	duplicates(inlist)
	62	writedelimited (listoflists, delimiter, file, writetype='w')
	63
	64	Some of these functions have alternate versions which are defined only if
	65	Numeric (NumPy) can be imported. These functions are generally named as
	66	above, with an 'a' prefix.
	67
	68	aabut (source, *args)
	69	acolex (a,indices,axis=1)
	70	acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
	71	adm (a,criterion)
	72	alinexand (a,columnlist,valuelist)
	73	alinexor (a,columnlist,valuelist)
	74	areplace (a,oldval,newval)
	75	arecode (a,listmap,col='all')
	76	arowcompare (row1, row2)
	77	arowsame (row1, row2)
	78	asortrows(a,axis=0)
	79	aunique(inarray)
	80	aduplicates(inarray)
	81
	82	Currently, the code is all but completely un-optimized. In many cases, the
	83	array versions of functions amount simply to aliases to built-in array
	84	functions/methods. Their inclusion here is for function name consistency.
	85	"""
	86
	87	## CHANGE LOG:
	88	## ==========
	89	## 01-11-15 ... changed list2string() to accept a delimiter
	90	## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
	91	## 01-05-31 ... added duplicates() and aduplicates() functions
	92	## 00-12-28 ... license made GPL, docstring and import requirements
	93	## 99-11-01 ... changed version to 0.3
	94	## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
	95	## 03/27/99 ... added areplace function, made replace fcn recursive
	96	## 12/31/98 ... added writefc function for ouput to fixed column sizes
	97	## 12/07/98 ... fixed import problem (failed on collapse() fcn)
	98	## added __version__ variable (now 0.2)
	99	## 12/05/98 ... updated doc-strings
	100	## added features to collapse() function
	101	## added flat() function for lists
	102	## fixed a broken asortrows()
	103	## 11/16/98 ... fixed minor bug in aput for 1D arrays
	104	##
	105	## 11/08/98 ... fixed aput to output large arrays correctly
	106
	107	import stats # required 3rd party module
	108	import string, copy
	109	from types import *
	110
	111	__version__ = 0.4
	112
	113	###=========================== LIST FUNCTIONS ==========================
	114	###
	115	### Here are the list functions, DEFINED FOR ALL SYSTEMS.
	116	### Array functions (for NumPy-enabled computers) appear below.
	117	###
	118
	119	def abut (source,*args):
	120	"""
	121	Like the \|Stat abut command. It concatenates two lists side-by-side
	122	and returns the result. '2D' lists are also accomodated for either argument
	123	(source or addon). CAUTION: If one list is shorter, it will be repeated
	124	until it is as long as the longest list. If this behavior is not desired,
	125	use pstat.simpleabut().
	126
	127	Usage: abut(source, args) where args=any # of lists
	128	Returns: a list of lists as long as the LONGEST list past, source on the
	129	'left', lists in <args> attached consecutively on the 'right'
	130	"""
	131
	132	if type(source) not in [ListType,TupleType]:
	133	source = [source]
	134	for addon in args:
	135	if type(addon) not in [ListType,TupleType]:
	136	addon = [addon]
	137	if len(addon) < len(source): # is source list longer?
	138	if len(source) % len(addon) == 0: # are they integer multiples?
	139	repeats = len(source)/len(addon) # repeat addon n times
	140	origadd = copy.deepcopy(addon)
	141	for i in range(repeats-1):
	142	addon = addon + origadd
	143	else:
	144	repeats = len(source)/len(addon)+1 # repeat addon x times,
	145	origadd = copy.deepcopy(addon) # x is NOT an integer
	146	for i in range(repeats-1):
	147	addon = addon + origadd
	148	addon = addon[0:len(source)]
	149	elif len(source) < len(addon): # is addon list longer?
	150	if len(addon) % len(source) == 0: # are they integer multiples?
	151	repeats = len(addon)/len(source) # repeat source n times
	152	origsour = copy.deepcopy(source)
	153	for i in range(repeats-1):
	154	source = source + origsour
	155	else:
	156	repeats = len(addon)/len(source)+1 # repeat source x times,
	157	origsour = copy.deepcopy(source) # x is NOT an integer
	158	for i in range(repeats-1):
	159	source = source + origsour
	160	source = source[0:len(addon)]
	161
	162	source = simpleabut(source,addon)
	163	return source
	164
	165
	166	def simpleabut (source, addon):
	167	"""
	168	Concatenates two lists as columns and returns the result. '2D' lists
	169	are also accomodated for either argument (source or addon). This DOES NOT
	170	repeat either list to make the 2 lists of equal length. Beware of list pairs
	171	with different lengths ... the resulting list will be the length of the
	172	FIRST list passed.
	173
	174	Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
	175	Returns: a list of lists as long as source, with source on the 'left' and
	176	addon on the 'right'
	177	"""
	178	if type(source) not in [ListType,TupleType]:
	179	source = [source]
	180	if type(addon) not in [ListType,TupleType]:
	181	addon = [addon]
	182	minlen = min(len(source),len(addon))
	183	list = copy.deepcopy(source) # start abut process
	184	if type(source[0]) not in [ListType,TupleType]:
	185	if type(addon[0]) not in [ListType,TupleType]:
	186	for i in range(minlen):
	187	list[i] = [source[i]] + [addon[i]] # source/addon = column
	188	else:
	189	for i in range(minlen):
	190	list[i] = [source[i]] + addon[i] # addon=list-of-lists
	191	else:
	192	if type(addon[0]) not in [ListType,TupleType]:
	193	for i in range(minlen):
	194	list[i] = source[i] + [addon[i]] # source=list-of-lists
	195	else:
	196	for i in range(minlen):
	197	list[i] = source[i] + addon[i] # source/addon = list-of-lists
	198	source = list
	199	return source
	200
	201
	202	def colex (listoflists,cnums):
	203	"""
	204	Extracts from listoflists the columns specified in the list 'cnums'
	205	(cnums can be an integer, a sequence of integers, or a string-expression that
	206	corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
	207	columns 3 onward from the listoflists).
	208
	209	Usage: colex (listoflists,cnums)
	210	Returns: a list-of-lists corresponding to the columns from listoflists
	211	specified by cnums, in the order the column numbers appear in cnums
	212	"""
	213	global index
	214	column = 0
	215	if type(cnums) in [ListType,TupleType]: # if multiple columns to get
	216	index = cnums[0]
	217	column = map(lambda x: x[index], listoflists)
	218	for col in cnums[1:]:
	219	index = col
	220	column = abut(column,map(lambda x: x[index], listoflists))
	221	elif type(cnums) == StringType: # if an 'x[3:]' type expr.
	222	evalstring = 'map(lambda x: x'+cnums+', listoflists)'
	223	column = eval(evalstring)
	224	else: # else it's just 1 col to get
	225	index = cnums
	226	column = map(lambda x: x[index], listoflists)
	227	return column
	228
	229
	230	def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
	231	"""
	232	Averages data in collapsecol, keeping all unique items in keepcols
	233	(using unique, which keeps unique LISTS of column numbers), retaining the
	234	unique sets of values in keepcols, the mean for each. Setting fcn1
	235	and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
	236	will append those results (e.g., the sterr, N) after each calculated mean.
	237	cfcn is the collapse function to apply (defaults to mean, defined here in the
	238	pstat module to avoid circular imports with stats.py, but harmonicmean or
	239	others could be passed).
	240
	241	Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
	242	Returns: a list of lists with all unique permutations of entries appearing in
	243	columns ("conditions") specified by keepcols, abutted with the result of
	244	cfcn (if cfcn=None, defaults to the mean) of each column specified by
	245	collapsecols.
	246	"""
	247	def collmean (inlist):
	248	s = 0
	249	for item in inlist:
	250	s = s + item
	251	return s/float(len(inlist))
	252
	253	if type(keepcols) not in [ListType,TupleType]:
	254	keepcols = [keepcols]
	255	if type(collapsecols) not in [ListType,TupleType]:
	256	collapsecols = [collapsecols]
	257	if cfcn == None:
	258	cfcn = collmean
	259	if keepcols == []:
	260	means = [0]*len(collapsecols)
	261	for i in range(len(collapsecols)):
	262	avgcol = colex(listoflists,collapsecols[i])
	263	means[i] = cfcn(avgcol)
	264	if fcn1:
	265	try:
	266	test = fcn1(avgcol)
	267	except:
	268	test = 'N/A'
	269	means[i] = [means[i], test]
	270	if fcn2:
	271	try:
	272	test = fcn2(avgcol)
	273	except:
	274	test = 'N/A'
	275	try:
	276	means[i] = means[i] + [len(avgcol)]
	277	except TypeError:
	278	means[i] = [means[i],len(avgcol)]
	279	return means
	280	else:
	281	values = colex(listoflists,keepcols)
	282	uniques = unique(values)
	283	uniques.sort()
	284	newlist = []
	285	if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
	286	for item in uniques:
	287	if type(item) not in [ListType,TupleType]: item =[item]
	288	tmprows = linexand(listoflists,keepcols,item)
	289	for col in collapsecols:
	290	avgcol = colex(tmprows,col)
	291	item.append(cfcn(avgcol))
	292	if fcn1 <> None:
	293	try:
	294	test = fcn1(avgcol)
	295	except:
	296	test = 'N/A'
	297	item.append(test)
	298	if fcn2 <> None:
	299	try:
	300	test = fcn2(avgcol)
	301	except:
	302	test = 'N/A'
	303	item.append(test)
	304	newlist.append(item)
	305	return newlist
	306
	307
	308	def dm (listoflists,criterion):
	309	"""
	310	Returns rows from the passed list of lists that meet the criteria in
	311	the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
	312	will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
	313	with column 2 equal to the string 'N').
	314
	315	Usage: dm (listoflists, criterion)
	316	Returns: rows from listoflists that meet the specified criterion.
	317	"""
	318	function = 'filter(lambda x: '+criterion+',listoflists)'
	319	lines = eval(function)
	320	return lines
	321
	322
	323	def flat(l):
	324	"""
	325	Returns the flattened version of a '2D' list. List-correlate to the a.flat()
	326	method of NumPy arrays.
	327
	328	Usage: flat(l)
	329	"""
	330	newl = []
	331	for i in range(len(l)):
	332	for j in range(len(l[i])):
	333	newl.append(l[i][j])
	334	return newl
	335
	336
	337	def linexand (listoflists,columnlist,valuelist):
	338	"""
	339	Returns the rows of a list of lists where col (from columnlist) = val
	340	(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
	341	len(columnlist) must equal len(valuelist).
	342
	343	Usage: linexand (listoflists,columnlist,valuelist)
	344	Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
	345	"""
	346	if type(columnlist) not in [ListType,TupleType]:
	347	columnlist = [columnlist]
	348	if type(valuelist) not in [ListType,TupleType]:
	349	valuelist = [valuelist]
	350	criterion = ''
	351	for i in range(len(columnlist)):
	352	if type(valuelist[i])==StringType:
	353	critval = '\'' + valuelist[i] + '\''
	354	else:
	355	critval = str(valuelist[i])
	356	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
	357	criterion = criterion[0:-3] # remove the "and" after the last crit
	358	function = 'filter(lambda x: '+criterion+',listoflists)'
	359	lines = eval(function)
	360	return lines
	361
	362
	363	def linexor (listoflists,columnlist,valuelist):
	364	"""
	365	Returns the rows of a list of lists where col (from columnlist) = val
	366	(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
	367	One value is required for each column in columnlist. If only one value
	368	exists for columnlist but multiple values appear in valuelist, the
	369	valuelist values are all assumed to pertain to the same column.
	370
	371	Usage: linexor (listoflists,columnlist,valuelist)
	372	Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
	373	"""
	374	if type(columnlist) not in [ListType,TupleType]:
	375	columnlist = [columnlist]
	376	if type(valuelist) not in [ListType,TupleType]:
	377	valuelist = [valuelist]
	378	criterion = ''
	379	if len(columnlist) == 1 and len(valuelist) > 1:
	380	columnlist = columnlist*len(valuelist)
	381	for i in range(len(columnlist)): # build an exec string
	382	if type(valuelist[i])==StringType:
	383	critval = '\'' + valuelist[i] + '\''
	384	else:
	385	critval = str(valuelist[i])
	386	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
	387	criterion = criterion[0:-2] # remove the "or" after the last crit
	388	function = 'filter(lambda x: '+criterion+',listoflists)'
	389	lines = eval(function)
	390	return lines
	391
	392
	393	def linedelimited (inlist,delimiter):
	394	"""
	395	Returns a string composed of elements in inlist, with each element
	396	separated by 'delimiter.' Used by function writedelimited. Use '\t'
	397	for tab-delimiting.
	398
	399	Usage: linedelimited (inlist,delimiter)
	400	"""
	401	outstr = ''
	402	for item in inlist:
	403	if type(item) <> StringType:
	404	item = str(item)
	405	outstr = outstr + item + delimiter
	406	outstr = outstr[0:-1]
	407	return outstr
	408
	409
	410	def lineincols (inlist,colsize):
	411	"""
	412	Returns a string composed of elements in inlist, with each element
	413	right-aligned in columns of (fixed) colsize.
	414
	415	Usage: lineincols (inlist,colsize) where colsize is an integer
	416	"""
	417	outstr = ''
	418	for item in inlist:
	419	if type(item) <> StringType:
	420	item = str(item)
	421	size = len(item)
	422	if size <= colsize:
	423	for i in range(colsize-size):
	424	outstr = outstr + ' '
	425	outstr = outstr + item
	426	else:
	427	outstr = outstr + item[0:colsize+1]
	428	return outstr
	429
	430
	431	def lineincustcols (inlist,colsizes):
	432	"""
	433	Returns a string composed of elements in inlist, with each element
	434	right-aligned in a column of width specified by a sequence colsizes. The
	435	length of colsizes must be greater than or equal to the number of columns
	436	in inlist.
	437
	438	Usage: lineincustcols (inlist,colsizes)
	439	Returns: formatted string created from inlist
	440	"""
	441	outstr = ''
	442	for i in range(len(inlist)):
	443	if type(inlist[i]) <> StringType:
	444	item = str(inlist[i])
	445	else:
	446	item = inlist[i]
	447	size = len(item)
	448	if size <= colsizes[i]:
	449	for j in range(colsizes[i]-size):
	450	outstr = outstr + ' '
	451	outstr = outstr + item
	452	else:
	453	outstr = outstr + item[0:colsizes[i]+1]
	454	return outstr
	455
	456
	457	def list2string (inlist,delimit=' '):
	458	"""
	459	Converts a 1D list to a single long string for file output, using
	460	the string.join function.
	461
	462	Usage: list2string (inlist,delimit=' ')
	463	Returns: the string created from inlist
	464	"""
	465	stringlist = map(makestr,inlist)
	466	return string.join(stringlist,delimit)
	467
	468
	469	def makelol(inlist):
	470	"""
	471	Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
	472	want to use put() to write a 1D list one item per line in the file.
	473
	474	Usage: makelol(inlist)
	475	Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
	476	"""
	477	x = []
	478	for item in inlist:
	479	x.append([item])
	480	return x
	481
	482
	483	def makestr (x):
	484	if type(x) <> StringType:
	485	x = str(x)
	486	return x
	487
	488
	489	def printcc (lst,extra=2):
	490	"""
	491	Prints a list of lists in columns, customized by the max size of items
	492	within the columns (max size of items in col, plus 'extra' number of spaces).
	493	Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
	494	respectively.
	495
	496	Usage: printcc (lst,extra=2)
	497	Returns: None
	498	"""
	499	if type(lst[0]) not in [ListType,TupleType]:
	500	lst = [lst]
	501	rowstokill = []
	502	list2print = copy.deepcopy(lst)
	503	for i in range(len(lst)):
	504	if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
	505	rowstokill = rowstokill + [i]
	506	rowstokill.reverse() # delete blank rows from the end
	507	for row in rowstokill:
	508	del list2print[row]
	509	maxsize = [0]*len(list2print[0])
	510	for col in range(len(list2print[0])):
	511	items = colex(list2print,col)
	512	items = map(makestr,items)
	513	maxsize[col] = max(map(len,items)) + extra
	514	for row in lst:
	515	if row == ['\n'] or row == '\n' or row == '' or row == ['']:
	516	print
	517	elif row == ['dashes'] or row == 'dashes':
	518	dashes = [0]*len(maxsize)
	519	for j in range(len(maxsize)):
	520	dashes[j] = '-'*(maxsize[j]-2)
	521	print lineincustcols(dashes,maxsize)
	522	else:
	523	print lineincustcols(row,maxsize)
	524	return None
	525
	526
	527	def printincols (listoflists,colsize):
	528	"""
	529	Prints a list of lists in columns of (fixed) colsize width, where
	530	colsize is an integer.
	531
	532	Usage: printincols (listoflists,colsize)
	533	Returns: None
	534	"""
	535	for row in listoflists:
	536	print lineincols(row,colsize)
	537	return None
	538
	539
	540	def pl (listoflists):
	541	"""
	542	Prints a list of lists, 1 list (row) at a time.
	543
	544	Usage: pl(listoflists)
	545	Returns: None
	546	"""
	547	for row in listoflists:
	548	if row[-1] == '\n':
	549	print row,
	550	else:
	551	print row
	552	return None
	553
	554
	555	def printl(listoflists):
	556	"""Alias for pl."""
	557	pl(listoflists)
	558	return
	559
	560
	561	def replace (inlst,oldval,newval):
	562	"""
	563	Replaces all occurrences of 'oldval' with 'newval', recursively.
	564
	565	Usage: replace (inlst,oldval,newval)
	566	"""
	567	lst = inlst*1
	568	for i in range(len(lst)):
	569	if type(lst[i]) not in [ListType,TupleType]:
	570	if lst[i]==oldval: lst[i]=newval
	571	else:
	572	lst[i] = replace(lst[i],oldval,newval)
	573	return lst
	574
	575
	576	def recode (inlist,listmap,cols=None):
	577	"""
	578	Changes the values in a list to a new set of values (useful when
	579	you need to recode data from (e.g.) strings to numbers. cols defaults
	580	to None (meaning all columns are recoded).
	581
	582	Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
	583	Returns: inlist with the appropriate values replaced with new ones
	584	"""
	585	lst = copy.deepcopy(inlist)
	586	if cols != None:
	587	if type(cols) not in [ListType,TupleType]:
	588	cols = [cols]
	589	for col in cols:
	590	for row in range(len(lst)):
	591	try:
	592	idx = colex(listmap,0).index(lst[row][col])
	593	lst[row][col] = listmap[idx][1]
	594	except ValueError:
	595	pass
	596	else:
	597	for row in range(len(lst)):
	598	for col in range(len(lst)):
	599	try:
	600	idx = colex(listmap,0).index(lst[row][col])
	601	lst[row][col] = listmap[idx][1]
	602	except ValueError:
	603	pass
	604	return lst
	605
	606
	607	def remap (listoflists,criterion):
	608	"""
	609	Remaps values in a given column of a 2D list (listoflists). This requires
	610	a criterion as a function of 'x' so that the result of the following is
	611	returned ... map(lambda x: 'criterion',listoflists).
	612
	613	Usage: remap(listoflists,criterion) criterion=string
	614	Returns: remapped version of listoflists
	615	"""
	616	function = 'map(lambda x: '+criterion+',listoflists)'
	617	lines = eval(function)
	618	return lines
	619
	620
	621	def roundlist (inlist,digits):
	622	"""
	623	Goes through each element in a 1D or 2D inlist, and applies the following
	624	function to all elements of FloatType ... round(element,digits).
	625
	626	Usage: roundlist(inlist,digits)
	627	Returns: list with rounded floats
	628	"""
	629	if type(inlist[0]) in [IntType, FloatType]:
	630	inlist = [inlist]
	631	l = inlist*1
	632	for i in range(len(l)):
	633	for j in range(len(l[i])):
	634	if type(l[i][j])==FloatType:
	635	l[i][j] = round(l[i][j],digits)
	636	return l
	637
	638
	639	def sortby(listoflists,sortcols):
	640	"""
	641	Sorts a list of lists on the column(s) specified in the sequence
	642	sortcols.
	643
	644	Usage: sortby(listoflists,sortcols)
	645	Returns: sorted list, unchanged column ordering
	646	"""
	647	newlist = abut(colex(listoflists,sortcols),listoflists)
	648	newlist.sort()
	649	try:
	650	numcols = len(sortcols)
	651	except TypeError:
	652	numcols = 1
	653	crit = '[' + str(numcols) + ':]'
	654	newlist = colex(newlist,crit)
	655	return newlist
	656
	657
	658	def unique (inlist):
	659	"""
	660	Returns all unique items in the passed list. If the a list-of-lists
	661	is passed, unique LISTS are found (i.e., items in the first dimension are
	662	compared).
	663
	664	Usage: unique (inlist)
	665	Returns: the unique elements (or rows) in inlist
	666	"""
	667	uniques = []
	668	for item in inlist:
	669	if item not in uniques:
	670	uniques.append(item)
	671	return uniques
	672
	673	def duplicates(inlist):
	674	"""
	675	Returns duplicate items in the FIRST dimension of the passed list.
	676
	677	Usage: duplicates (inlist)
	678	"""
	679	dups = []
	680	for i in range(len(inlist)):
	681	if inlist[i] in inlist[i+1:]:
	682	dups.append(inlist[i])
	683	return dups
	684
	685
	686	def nonrepeats(inlist):
	687	"""
	688	Returns items that are NOT duplicated in the first dim of the passed list.
	689
	690	Usage: nonrepeats (inlist)
	691	"""
	692	nonrepeats = []
	693	for i in range(len(inlist)):
	694	if inlist.count(inlist[i]) == 1:
	695	nonrepeats.append(inlist[i])
	696	return nonrepeats
	697
	698
	699	#=================== PSTAT ARRAY FUNCTIONS =====================
	700	#=================== PSTAT ARRAY FUNCTIONS =====================
	701	#=================== PSTAT ARRAY FUNCTIONS =====================
	702	#=================== PSTAT ARRAY FUNCTIONS =====================
	703	#=================== PSTAT ARRAY FUNCTIONS =====================
	704	#=================== PSTAT ARRAY FUNCTIONS =====================
	705	#=================== PSTAT ARRAY FUNCTIONS =====================
	706	#=================== PSTAT ARRAY FUNCTIONS =====================
	707	#=================== PSTAT ARRAY FUNCTIONS =====================
	708	#=================== PSTAT ARRAY FUNCTIONS =====================
	709	#=================== PSTAT ARRAY FUNCTIONS =====================
	710	#=================== PSTAT ARRAY FUNCTIONS =====================
	711	#=================== PSTAT ARRAY FUNCTIONS =====================
	712	#=================== PSTAT ARRAY FUNCTIONS =====================
	713	#=================== PSTAT ARRAY FUNCTIONS =====================
	714	#=================== PSTAT ARRAY FUNCTIONS =====================
	715
	716	try: # DEFINE THESE ONLY IF NUMERIC IS AVAILABLE
	717	import Numeric
	718	N = Numeric
	719
	720	def aabut (source, *args):
	721	"""
	722	Like the \|Stat abut command. It concatenates two arrays column-wise
	723	and returns the result. CAUTION: If one array is shorter, it will be
	724	repeated until it is as long as the other.
	725
	726	Usage: aabut (source, args) where args=any # of arrays
	727	Returns: an array as long as the LONGEST array past, source appearing on the
	728	'left', arrays in <args> attached on the 'right'.
	729	"""
	730	if len(source.shape)==1:
	731	width = 1
	732	source = N.resize(source,[source.shape[0],width])
	733	else:
	734	width = source.shape[1]
	735	for addon in args:
	736	if len(addon.shape)==1:
	737	width = 1
	738	addon = N.resize(addon,[source.shape[0],width])
	739	else:
	740	width = source.shape[1]
	741	if len(addon) < len(source):
	742	addon = N.resize(addon,[source.shape[0],addon.shape[1]])
	743	elif len(source) < len(addon):
	744	source = N.resize(source,[addon.shape[0],source.shape[1]])
	745	source = N.concatenate((source,addon),1)
	746	return source
	747
	748
	749	def acolex (a,indices,axis=1):
	750	"""
	751	Extracts specified indices (a list) from passed array, along passed
	752	axis (column extraction is default). BEWARE: A 1D array is presumed to be a
	753	column-array (and that the whole array will be returned as a column).
	754
	755	Usage: acolex (a,indices,axis=1)
	756	Returns: the columns of a specified by indices
	757	"""
	758	if type(indices) not in [ListType,TupleType,N.ArrayType]:
	759	indices = [indices]
	760	if len(N.shape(a)) == 1:
	761	cols = N.resize(a,[a.shape[0],1])
	762	else:
	763	cols = N.take(a,indices,axis)
	764	return cols
	765
	766
	767	def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
	768	"""
	769	Averages data in collapsecol, keeping all unique items in keepcols
	770	(using unique, which keeps unique LISTS of column numbers), retaining
	771	the unique sets of values in keepcols, the mean for each. If stderror or
	772	N of the mean are desired, set either or both parameters to 1.
	773
	774	Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
	775	Returns: unique 'conditions' specified by the contents of columns specified
	776	by keepcols, abutted with the mean(s) of column(s) specified by
	777	collapsecols
	778	"""
	779	def acollmean (inarray):
	780	return N.sum(N.ravel(inarray))
	781
	782	if cfcn == None:
	783	cfcn = acollmean
	784	if keepcols == []:
	785	avgcol = acolex(a,collapsecols)
	786	means = N.sum(avgcol)/float(len(avgcol))
	787	if fcn1<>None:
	788	try:
	789	test = fcn1(avgcol)
	790	except:
	791	test = N.array(['N/A']*len(means))
	792	means = aabut(means,test)
	793	if fcn2<>None:
	794	try:
	795	test = fcn2(avgcol)
	796	except:
	797	test = N.array(['N/A']*len(means))
	798	means = aabut(means,test)
	799	return means
	800	else:
	801	if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
	802	keepcols = [keepcols]
	803	values = colex(a,keepcols) # so that "item" can be appended (below)
	804	uniques = unique(values) # get a LIST, so .sort keeps rows intact
	805	uniques.sort()
	806	newlist = []
	807	for item in uniques:
	808	if type(item) not in [ListType,TupleType,N.ArrayType]:
	809	item =[item]
	810	tmprows = alinexand(a,keepcols,item)
	811	for col in collapsecols:
	812	avgcol = acolex(tmprows,col)
	813	item.append(acollmean(avgcol))
	814	if fcn1<>None:
	815	try:
	816	test = fcn1(avgcol)
	817	except:
	818	test = 'N/A'
	819	item.append(test)
	820	if fcn2<>None:
	821	try:
	822	test = fcn2(avgcol)
	823	except:
	824	test = 'N/A'
	825	item.append(test)
	826	newlist.append(item)
	827	try:
	828	new_a = N.array(newlist)
	829	except TypeError:
	830	new_a = N.array(newlist,'O')
	831	return new_a
	832
	833
	834	def adm (a,criterion):
	835	"""
	836	Returns rows from the passed list of lists that meet the criteria in
	837	the passed criterion expression (a string as a function of x).
	838
	839	Usage: adm (a,criterion) where criterion is like 'x[2]==37'
	840	"""
	841	function = 'filter(lambda x: '+criterion+',a)'
	842	lines = eval(function)
	843	try:
	844	lines = N.array(lines)
	845	except:
	846	lines = N.array(lines,'O')
	847	return lines
	848
	849
	850	def isstring(x):
	851	if type(x)==StringType:
	852	return 1
	853	else:
	854	return 0
	855
	856
	857	def alinexand (a,columnlist,valuelist):
	858	"""
	859	Returns the rows of an array where col (from columnlist) = val
	860	(from valuelist). One value is required for each column in columnlist.
	861
	862	Usage: alinexand (a,columnlist,valuelist)
	863	Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
	864	"""
	865	if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
	866	columnlist = [columnlist]
	867	if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
	868	valuelist = [valuelist]
	869	criterion = ''
	870	for i in range(len(columnlist)):
	871	if type(valuelist[i])==StringType:
	872	critval = '\'' + valuelist[i] + '\''
	873	else:
	874	critval = str(valuelist[i])
	875	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
	876	criterion = criterion[0:-3] # remove the "and" after the last crit
	877	return adm(a,criterion)
	878
	879
	880	def alinexor (a,columnlist,valuelist):
	881	"""
	882	Returns the rows of an array where col (from columnlist) = val (from
	883	valuelist). One value is required for each column in columnlist.
	884	The exception is if either columnlist or valuelist has only 1 value,
	885	in which case that item will be expanded to match the length of the
	886	other list.
	887
	888	Usage: alinexor (a,columnlist,valuelist)
	889	Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
	890	"""
	891	if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
	892	columnlist = [columnlist]
	893	if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
	894	valuelist = [valuelist]
	895	criterion = ''
	896	if len(columnlist) == 1 and len(valuelist) > 1:
	897	columnlist = columnlist*len(valuelist)
	898	elif len(valuelist) == 1 and len(columnlist) > 1:
	899	valuelist = valuelist*len(columnlist)
	900	for i in range(len(columnlist)):
	901	if type(valuelist[i])==StringType:
	902	critval = '\'' + valuelist[i] + '\''
	903	else:
	904	critval = str(valuelist[i])
	905	criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
	906	criterion = criterion[0:-2] # remove the "or" after the last crit
	907	return adm(a,criterion)
	908
	909
	910	def areplace (a,oldval,newval):
	911	"""
	912	Replaces all occurrences of oldval with newval in array a.
	913
	914	Usage: areplace(a,oldval,newval)
	915	"""
	916	newa = N.not_equal(a,oldval)*a
	917	return newa+N.equal(a,oldval)*newval
	918
	919
	920	def arecode (a,listmap,col='all'):
	921	"""
	922	Remaps the values in an array to a new set of values (useful when
	923	you need to recode data from (e.g.) strings to numbers as most stats
	924	packages require. Can work on SINGLE columns, or 'all' columns at once.
	925
	926	Usage: arecode (a,listmap,col='all')
	927	Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
	928	"""
	929	ashape = a.shape
	930	if col == 'all':
	931	work = a.flat
	932	else:
	933	work = acolex(a,col)
	934	work = work.flat
	935	for pair in listmap:
	936	if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
	937	work = N.array(work,'O')
	938	a = N.array(a,'O')
	939	for i in range(len(work)):
	940	if work[i]==pair[0]:
	941	work[i] = pair[1]
	942	if col == 'all':
	943	return N.reshape(work,ashape)
	944	else:
	945	return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
	946	else: # must be a non-Object type array and replacement
	947	work = N.where(N.equal(work,pair[0]),pair[1],work)
	948	return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
	949
	950
	951	def arowcompare(row1, row2):
	952	"""
	953	Compares two rows from an array, regardless of whether it is an
	954	array of numbers or of python objects (which requires the cmp function).
	955
	956	Usage: arowcompare(row1,row2)
	957	Returns: an array of equal length containing 1s where the two rows had
	958	identical elements and 0 otherwise
	959	"""
	960	if row1.typecode()=='O' or row2.typecode=='O':
	961	cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
	962	else:
	963	cmpvect = N.equal(row1,row2)
	964	return cmpvect
	965
	966
	967	def arowsame(row1, row2):
	968	"""
	969	Compares two rows from an array, regardless of whether it is an
	970	array of numbers or of python objects (which requires the cmp function).
	971
	972	Usage: arowsame(row1,row2)
	973	Returns: 1 if the two rows are identical, 0 otherwise.
	974	"""
	975	cmpval = N.alltrue(arowcompare(row1,row2))
	976	return cmpval
	977
	978
	979	def asortrows(a,axis=0):
	980	"""
	981	Sorts an array "by rows". This differs from the Numeric.sort() function,
	982	which sorts elements WITHIN the given axis. Instead, this function keeps
	983	the elements along the given axis intact, but shifts them 'up or down'
	984	relative to one another.
	985
	986	Usage: asortrows(a,axis=0)
	987	Returns: sorted version of a
	988	"""
	989	if axis != 0:
	990	a = N.swapaxes(a, axis, 0)
	991	l = a.tolist()
	992	l.sort() # or l.sort(_sort)
	993	y = N.array(l)
	994	if axis != 0:
	995	y = N.swapaxes(y, axis, 0)
	996	return y
	997
	998
	999	def aunique(inarray):
	1000	"""
	1001	Returns unique items in the FIRST dimension of the passed array. Only
	1002	works on arrays NOT including string items.
	1003
	1004	Usage: aunique (inarray)
	1005	"""
	1006	uniques = N.array([inarray[0]])
	1007	if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
	1008	for item in inarray[1:]:
	1009	if N.add.reduce(N.equal(uniques,item).flat) == 0:
	1010	try:
	1011	uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
	1012	except TypeError:
	1013	uniques = N.concatenate([uniques,N.array([item])])
	1014	else: # IT MUST BE A 2+D ARRAY
	1015	if inarray.typecode() != 'O': # not an Object array
	1016	for item in inarray[1:]:
	1017	if not N.sum(N.alltrue(N.equal(uniques,item),1)):
	1018	try:
	1019	uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
	1020	except TypeError: # the item to add isn't a list
	1021	uniques = N.concatenate([uniques,N.array([item])])
	1022	else:
	1023	pass # this item is already in the uniques array
	1024	else: # must be an Object array, alltrue/equal functions don't work
	1025	for item in inarray[1:]:
	1026	newflag = 1
	1027	for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
	1028	test = N.sum(abs(N.array(map(cmp,item,unq))))
	1029	if test == 0: # if item identical to any 1 row in uniques
	1030	newflag = 0 # then not a novel item to add
	1031	break
	1032	if newflag == 1:
	1033	try:
	1034	uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
	1035	except TypeError: # the item to add isn't a list
	1036	uniques = N.concatenate([uniques,N.array([item])])
	1037	return uniques
	1038
	1039
	1040	def aduplicates(inarray):
	1041	"""
	1042	Returns duplicate items in the FIRST dimension of the passed array. Only
	1043	works on arrays NOT including string items.
	1044
	1045	Usage: aunique (inarray)
	1046	"""
	1047	inarray = N.array(inarray)
	1048	if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
	1049	dups = []
	1050	inarray = inarray.tolist()
	1051	for i in range(len(inarray)):
	1052	if inarray[i] in inarray[i+1:]:
	1053	dups.append(inarray[i])
	1054	dups = aunique(dups)
	1055	else: # IT MUST BE A 2+D ARRAY
	1056	dups = []
	1057	aslist = inarray.tolist()
	1058	for i in range(len(aslist)):
	1059	if aslist[i] in aslist[i+1:]:
	1060	dups.append(aslist[i])
	1061	dups = unique(dups)
	1062	dups = N.array(dups)
	1063	return dups
	1064
	1065	except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
	1066	pass

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx_extras/pstat.py @ 3

異なるフォーマットでダウンロード: