[3] | 1 | # (c) 2005 Clark C. Evans and contributors
|
---|
| 2 | # This module is part of the Python Paste Project and is released under
|
---|
| 3 | # the MIT License: http://www.opensource.org/licenses/mit-license.php
|
---|
| 4 | # Some of this code was funded by: http://prometheusresearch.com
|
---|
| 5 | """
|
---|
| 6 | Date, Time, and Timespan Parsing Utilities
|
---|
| 7 |
|
---|
| 8 | This module contains parsing support to create "human friendly"
|
---|
| 9 | ``datetime`` object parsing. The explicit goal of these routines is
|
---|
| 10 | to provide a multi-format date/time support not unlike that found in
|
---|
| 11 | Microsoft Excel. In most approaches, the input is very "strict" to
|
---|
| 12 | prevent errors -- however, this approach is much more liberal since we
|
---|
| 13 | are assuming the user-interface is parroting back the normalized value
|
---|
| 14 | and thus the user has immediate feedback if the data is not typed in
|
---|
| 15 | correctly.
|
---|
| 16 |
|
---|
| 17 | ``parse_date`` and ``normalize_date``
|
---|
| 18 |
|
---|
| 19 | These functions take a value like '9 jan 2007' and returns either an
|
---|
| 20 | ``date`` object, or an ISO 8601 formatted date value such
|
---|
| 21 | as '2007-01-09'. There is an option to provide an Oracle database
|
---|
| 22 | style output as well, ``09 JAN 2007``, but this is not the default.
|
---|
| 23 |
|
---|
| 24 | This module always treats '/' delimiters as using US date order
|
---|
| 25 | (since the author's clients are US based), hence '1/9/2007' is
|
---|
| 26 | January 9th. Since this module treats the '-' as following
|
---|
| 27 | European order this supports both modes of data-entry; together
|
---|
| 28 | with immediate parroting back the result to the screen, the author
|
---|
| 29 | has found this approach to work well in pratice.
|
---|
| 30 |
|
---|
| 31 | ``parse_time`` and ``normalize_time``
|
---|
| 32 |
|
---|
| 33 | These functions take a value like '1 pm' and returns either an
|
---|
| 34 | ``time`` object, or an ISO 8601 formatted 24h clock time
|
---|
| 35 | such as '13:00'. There is an option to provide for US style time
|
---|
| 36 | values, '1:00 PM', however this is not the default.
|
---|
| 37 |
|
---|
| 38 | ``parse_datetime`` and ``normalize_datetime``
|
---|
| 39 |
|
---|
| 40 | These functions take a value like '9 jan 2007 at 1 pm' and returns
|
---|
| 41 | either an ``datetime`` object, or an ISO 8601 formatted
|
---|
| 42 | return (without the T) such as '2007-01-09 13:00'. There is an
|
---|
| 43 | option to provide for Oracle / US style, '09 JAN 2007 @ 1:00 PM',
|
---|
| 44 | however this is not the default.
|
---|
| 45 |
|
---|
| 46 | ``parse_delta`` and ``normalize_delta``
|
---|
| 47 |
|
---|
| 48 | These functions take a value like '1h 15m' and returns either an
|
---|
| 49 | ``timedelta`` object, or an 2-decimal fixed-point
|
---|
| 50 | numerical value in hours, such as '1.25'. The rationale is to
|
---|
| 51 | support meeting or time-billing lengths, not to be an accurate
|
---|
| 52 | representation in mili-seconds. As such not all valid
|
---|
| 53 | ``timedelta`` values will have a normalized representation.
|
---|
| 54 |
|
---|
| 55 | """
|
---|
| 56 | from datetime import timedelta, time, date
|
---|
| 57 | from time import localtime
|
---|
| 58 | import string
|
---|
| 59 |
|
---|
| 60 | __all__ = ['parse_timedelta', 'normalize_timedelta',
|
---|
| 61 | 'parse_time', 'normalize_time',
|
---|
| 62 | 'parse_date', 'normalize_date']
|
---|
| 63 |
|
---|
| 64 | def _number(val):
|
---|
| 65 | try:
|
---|
| 66 | return string.atoi(val)
|
---|
| 67 | except:
|
---|
| 68 | return None
|
---|
| 69 |
|
---|
| 70 | #
|
---|
| 71 | # timedelta
|
---|
| 72 | #
|
---|
| 73 | def parse_timedelta(val):
|
---|
| 74 | """
|
---|
| 75 | returns a ``timedelta`` object, or None
|
---|
| 76 | """
|
---|
| 77 | if not val:
|
---|
| 78 | return None
|
---|
| 79 | val = string.lower(val)
|
---|
| 80 | if "." in val:
|
---|
| 81 | val = float(val)
|
---|
| 82 | return timedelta(hours=int(val), minutes=60*(val % 1.0))
|
---|
| 83 | fHour = ("h" in val or ":" in val)
|
---|
| 84 | fMin = ("m" in val or ":" in val)
|
---|
| 85 | fFraction = "." in val
|
---|
| 86 | for noise in "minu:teshour()":
|
---|
| 87 | val = string.replace(val, noise, ' ')
|
---|
| 88 | val = string.strip(val)
|
---|
| 89 | val = string.split(val)
|
---|
| 90 | hr = 0.0
|
---|
| 91 | mi = 0
|
---|
| 92 | val.reverse()
|
---|
| 93 | if fHour:
|
---|
| 94 | hr = int(val.pop())
|
---|
| 95 | if fMin:
|
---|
| 96 | mi = int(val.pop())
|
---|
| 97 | if len(val) > 0 and not hr:
|
---|
| 98 | hr = int(val.pop())
|
---|
| 99 | return timedelta(hours=hr, minutes=mi)
|
---|
| 100 |
|
---|
| 101 | def normalize_timedelta(val):
|
---|
| 102 | """
|
---|
| 103 | produces a normalized string value of the timedelta
|
---|
| 104 |
|
---|
| 105 | This module returns a normalized time span value consisting of the
|
---|
| 106 | number of hours in fractional form. For example '1h 15min' is
|
---|
| 107 | formatted as 01.25.
|
---|
| 108 | """
|
---|
| 109 | if type(val) == str:
|
---|
| 110 | val = parse_timedelta(val)
|
---|
| 111 | if not val:
|
---|
| 112 | return ''
|
---|
| 113 | hr = val.seconds/3600
|
---|
| 114 | mn = (val.seconds % 3600)/60
|
---|
| 115 | return "%d.%02d" % (hr, mn * 100/60)
|
---|
| 116 |
|
---|
| 117 | #
|
---|
| 118 | # time
|
---|
| 119 | #
|
---|
| 120 | def parse_time(val):
|
---|
| 121 | if not val:
|
---|
| 122 | return None
|
---|
| 123 | hr = mi = 0
|
---|
| 124 | val = string.lower(val)
|
---|
| 125 | amflag = (-1 != string.find(val, 'a')) # set if AM is found
|
---|
| 126 | pmflag = (-1 != string.find(val, 'p')) # set if PM is found
|
---|
| 127 | for noise in ":amp.":
|
---|
| 128 | val = string.replace(val, noise, ' ')
|
---|
| 129 | val = string.split(val)
|
---|
| 130 | if len(val) > 1:
|
---|
| 131 | hr = int(val[0])
|
---|
| 132 | mi = int(val[1])
|
---|
| 133 | else:
|
---|
| 134 | val = val[0]
|
---|
| 135 | if len(val) < 1:
|
---|
| 136 | pass
|
---|
| 137 | elif 'now' == val:
|
---|
| 138 | tm = localtime()
|
---|
| 139 | hr = tm[3]
|
---|
| 140 | mi = tm[4]
|
---|
| 141 | elif 'noon' == val:
|
---|
| 142 | hr = 12
|
---|
| 143 | elif len(val) < 3:
|
---|
| 144 | hr = int(val)
|
---|
| 145 | if not amflag and not pmflag and hr < 7:
|
---|
| 146 | hr += 12
|
---|
| 147 | elif len(val) < 5:
|
---|
| 148 | hr = int(val[:-2])
|
---|
| 149 | mi = int(val[-2:])
|
---|
| 150 | else:
|
---|
| 151 | hr = int(val[:1])
|
---|
| 152 | if amflag and hr >= 12:
|
---|
| 153 | hr = hr - 12
|
---|
| 154 | if pmflag and hr < 12:
|
---|
| 155 | hr = hr + 12
|
---|
| 156 | return time(hr, mi)
|
---|
| 157 |
|
---|
| 158 | def normalize_time(value, ampm):
|
---|
| 159 | if not value:
|
---|
| 160 | return ''
|
---|
| 161 | if type(value) == str:
|
---|
| 162 | value = parse_time(value)
|
---|
| 163 | if not ampm:
|
---|
| 164 | return "%02d:%02d" % (value.hour, value.minute)
|
---|
| 165 | hr = value.hour
|
---|
| 166 | am = "AM"
|
---|
| 167 | if hr < 1 or hr > 23:
|
---|
| 168 | hr = 12
|
---|
| 169 | elif hr >= 12:
|
---|
| 170 | am = "PM"
|
---|
| 171 | if hr > 12:
|
---|
| 172 | hr = hr - 12
|
---|
| 173 | return "%02d:%02d %s" % (hr, value.minute, am)
|
---|
| 174 |
|
---|
| 175 | #
|
---|
| 176 | # Date Processing
|
---|
| 177 | #
|
---|
| 178 |
|
---|
| 179 | _one_day = timedelta(days=1)
|
---|
| 180 |
|
---|
| 181 | _str2num = {'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6,
|
---|
| 182 | 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12 }
|
---|
| 183 |
|
---|
| 184 | def _month(val):
|
---|
| 185 | for (key, mon) in _str2num.items():
|
---|
| 186 | if key in val:
|
---|
| 187 | return mon
|
---|
| 188 | raise TypeError("unknown month '%s'" % val)
|
---|
| 189 |
|
---|
| 190 | _days_in_month = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
|
---|
| 191 | 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31,
|
---|
| 192 | }
|
---|
| 193 | _num2str = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
|
---|
| 194 | 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec',
|
---|
| 195 | }
|
---|
| 196 | _wkdy = ("mon", "tue", "wed", "thu", "fri", "sat", "sun")
|
---|
| 197 |
|
---|
| 198 | def parse_date(val):
|
---|
| 199 | if not(val):
|
---|
| 200 | return None
|
---|
| 201 | val = string.lower(val)
|
---|
| 202 | now = None
|
---|
| 203 |
|
---|
| 204 | # optimized check for YYYY-MM-DD
|
---|
| 205 | strict = val.split("-")
|
---|
| 206 | if len(strict) == 3:
|
---|
| 207 | (y, m, d) = strict
|
---|
| 208 | if "+" in d:
|
---|
| 209 | d = d.split("+")[0]
|
---|
| 210 | if " " in d:
|
---|
| 211 | d = d.split(" ")[0]
|
---|
| 212 | try:
|
---|
| 213 | now = date(int(y), int(m), int(d))
|
---|
| 214 | val = "xxx" + val[10:]
|
---|
| 215 | except ValueError:
|
---|
| 216 | pass
|
---|
| 217 |
|
---|
| 218 | # allow for 'now', 'mon', 'tue', etc.
|
---|
| 219 | if not now:
|
---|
| 220 | chk = val[:3]
|
---|
| 221 | if chk in ('now','tod'):
|
---|
| 222 | now = date.today()
|
---|
| 223 | elif chk in _wkdy:
|
---|
| 224 | now = date.today()
|
---|
| 225 | idx = list(_wkdy).index(chk) + 1
|
---|
| 226 | while now.isoweekday() != idx:
|
---|
| 227 | now += _one_day
|
---|
| 228 |
|
---|
| 229 | # allow dates to be modified via + or - /w number of days, so
|
---|
| 230 | # that now+3 is three days from now
|
---|
| 231 | if now:
|
---|
| 232 | tail = val[3:].strip()
|
---|
| 233 | tail = tail.replace("+"," +").replace("-"," -")
|
---|
| 234 | for item in tail.split():
|
---|
| 235 | try:
|
---|
| 236 | days = int(item)
|
---|
| 237 | except ValueError:
|
---|
| 238 | pass
|
---|
| 239 | else:
|
---|
| 240 | now += timedelta(days=days)
|
---|
| 241 | return now
|
---|
| 242 |
|
---|
| 243 | # ok, standard parsing
|
---|
| 244 | yr = mo = dy = None
|
---|
| 245 | for noise in ('/', '-', ',', '*'):
|
---|
| 246 | val = string.replace(val, noise, ' ')
|
---|
| 247 | for noise in _wkdy:
|
---|
| 248 | val = string.replace(val, noise, ' ')
|
---|
| 249 | out = []
|
---|
| 250 | last = False
|
---|
| 251 | ldig = False
|
---|
| 252 | for ch in val:
|
---|
| 253 | if ch.isdigit():
|
---|
| 254 | if last and not ldig:
|
---|
| 255 | out.append(' ')
|
---|
| 256 | last = ldig = True
|
---|
| 257 | else:
|
---|
| 258 | if ldig:
|
---|
| 259 | out.append(' ')
|
---|
| 260 | ldig = False
|
---|
| 261 | last = True
|
---|
| 262 | out.append(ch)
|
---|
| 263 | val = string.split("".join(out))
|
---|
| 264 | if 3 == len(val):
|
---|
| 265 | a = _number(val[0])
|
---|
| 266 | b = _number(val[1])
|
---|
| 267 | c = _number(val[2])
|
---|
| 268 | if len(val[0]) == 4:
|
---|
| 269 | yr = a
|
---|
| 270 | if b: # 1999 6 23
|
---|
| 271 | mo = b
|
---|
| 272 | dy = c
|
---|
| 273 | else: # 1999 Jun 23
|
---|
| 274 | mo = _month(val[1])
|
---|
| 275 | dy = c
|
---|
| 276 | elif a > 0:
|
---|
| 277 | yr = c
|
---|
| 278 | if len(val[2]) < 4:
|
---|
| 279 | raise TypeError("four digit year required")
|
---|
| 280 | if b: # 6 23 1999
|
---|
| 281 | dy = b
|
---|
| 282 | mo = a
|
---|
| 283 | else: # 23 Jun 1999
|
---|
| 284 | dy = a
|
---|
| 285 | mo = _month(val[1])
|
---|
| 286 | else: # Jun 23, 2000
|
---|
| 287 | dy = b
|
---|
| 288 | yr = c
|
---|
| 289 | if len(val[2]) < 4:
|
---|
| 290 | raise TypeError("four digit year required")
|
---|
| 291 | mo = _month(val[0])
|
---|
| 292 | elif 2 == len(val):
|
---|
| 293 | a = _number(val[0])
|
---|
| 294 | b = _number(val[1])
|
---|
| 295 | if a > 999:
|
---|
| 296 | yr = a
|
---|
| 297 | dy = 1
|
---|
| 298 | if b > 0: # 1999 6
|
---|
| 299 | mo = b
|
---|
| 300 | else: # 1999 Jun
|
---|
| 301 | mo = _month(val[1])
|
---|
| 302 | elif a > 0:
|
---|
| 303 | if b > 999: # 6 1999
|
---|
| 304 | mo = a
|
---|
| 305 | yr = b
|
---|
| 306 | dy = 1
|
---|
| 307 | elif b > 0: # 6 23
|
---|
| 308 | mo = a
|
---|
| 309 | dy = b
|
---|
| 310 | else: # 23 Jun
|
---|
| 311 | dy = a
|
---|
| 312 | mo = _month(val[1])
|
---|
| 313 | else:
|
---|
| 314 | if b > 999: # Jun 2001
|
---|
| 315 | yr = b
|
---|
| 316 | dy = 1
|
---|
| 317 | else: # Jun 23
|
---|
| 318 | dy = b
|
---|
| 319 | mo = _month(val[0])
|
---|
| 320 | elif 1 == len(val):
|
---|
| 321 | val = val[0]
|
---|
| 322 | if not val.isdigit():
|
---|
| 323 | mo = _month(val)
|
---|
| 324 | if mo is not None:
|
---|
| 325 | dy = 1
|
---|
| 326 | else:
|
---|
| 327 | v = _number(val)
|
---|
| 328 | val = str(v)
|
---|
| 329 | if 8 == len(val): # 20010623
|
---|
| 330 | yr = _number(val[:4])
|
---|
| 331 | mo = _number(val[4:6])
|
---|
| 332 | dy = _number(val[6:])
|
---|
| 333 | elif len(val) in (3,4):
|
---|
| 334 | if v > 1300: # 2004
|
---|
| 335 | yr = v
|
---|
| 336 | mo = 1
|
---|
| 337 | dy = 1
|
---|
| 338 | else: # 1202
|
---|
| 339 | mo = _number(val[:-2])
|
---|
| 340 | dy = _number(val[-2:])
|
---|
| 341 | elif v < 32:
|
---|
| 342 | dy = v
|
---|
| 343 | else:
|
---|
| 344 | raise TypeError("four digit year required")
|
---|
| 345 | tm = localtime()
|
---|
| 346 | if mo is None:
|
---|
| 347 | mo = tm[1]
|
---|
| 348 | if dy is None:
|
---|
| 349 | dy = tm[2]
|
---|
| 350 | if yr is None:
|
---|
| 351 | yr = tm[0]
|
---|
| 352 | return date(yr, mo, dy)
|
---|
| 353 |
|
---|
| 354 | def normalize_date(val, iso8601=True):
|
---|
| 355 | if not val:
|
---|
| 356 | return ''
|
---|
| 357 | if type(val) == str:
|
---|
| 358 | val = parse_date(val)
|
---|
| 359 | if iso8601:
|
---|
| 360 | return "%4d-%02d-%02d" % (val.year, val.month, val.day)
|
---|
| 361 | return "%02d %s %4d" % (val.day, _num2str[val.month], val.year)
|
---|