J'ai une colonne de données pandas appelée 'Date' avec des entrées du format: '% Y% m% d% H% M% H% M' (premier% H% M est l'heure locale & le second% H% M est UTC).Convertir la colonne Pandas en données
Je souhaite convertir le format au format:% Y-% m-% d_% H% M (en conservant l'UTC% H% M).
obs_df = pd.read_csv(obs, names= ['WBAN','Date','Extinc Coeff', 'D/N', 'Dir 2min av wind',
'Spd 2min av wind(kts)', 'Dir max 5min av wind','Spd_max_5min_av_wind(kts)',
'Constant','Runway vis range'], usecols= ['WBAN', 'Date',
'Spd_max_5min_av_wind(kts)'],
na_filter=False)
Voici ce que l'dataframe ressemble:
Date WBAN Spd_max_5min_av_wind(kts)
0 2014100108481348 KACK 19
1 2014100108491349 KACK 18
2 2014100108501350 KACK 20
3 2014100108511351 KACK 19
4 2014100108521352 KACK 17
Et voici ce que j'ai essayé:
import datetime as dt
obs_df['Date'] = obs_df['Date'].apply(lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M%H%M'))
et
obs_df['Date'] = pd.to_datetime(obs_df['Date'], format = '%Y%m%d%H%M%H%M')
Voici l'erreur que je suis arrivé pour les deux tentatives:
---------------------------------------------------------------------------
error Traceback (most recent call last)
<ipython-input-200-27e83cc1348d> in <module>()
----> 1 obs_df['Date'] = obs_df['Date'].apply(lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M%H%M'))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
2353 else:
2354 values = self.asobject
-> 2355 mapped = lib.map_infer(values, f, convert=convert_dtype)
2356
2357 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer (pandas/_libs/lib.c:66645)()
<ipython-input-200-27e83cc1348d> in <lambda>(x)
----> 1 obs_df['Date'] = obs_df['Date'].apply(lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M%H%M'))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/_strptime.py in _strptime_datetime(cls, data_string, format)
498 """Return a class cls instance based on the input string and the
499 format string."""
--> 500 tt, fraction = _strptime(data_string, format)
501 tzname, gmtoff = tt[-2:]
502 args = tt[:6] + (fraction,)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/_strptime.py in _strptime(data_string, format)
318 if not format_regex:
319 try:
--> 320 format_regex = _TimeRE_cache.compile(format)
321 # KeyError raised when a bad format is found; can be specified as
322 # \\, in which case it was a stray % but with a space after it
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/_strptime.py in compile(self, format)
266 def compile(self, format):
267 """Return a compiled re object for the format string."""
--> 268 return re_compile(self.pattern(format), IGNORECASE)
269
270 _cache_lock = _thread_allocate_lock()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/re.py in compile(pattern, flags)
222 def compile(pattern, flags=0):
223 "Compile a regular expression pattern, returning a pattern object."
--> 224 return _compile(pattern, flags)
225
226 def purge():
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/re.py in _compile(pattern, flags)
291 if not sre_compile.isstring(pattern):
292 raise TypeError("first argument must be string or compiled pattern")
--> 293 p = sre_compile.compile(pattern, flags)
294 if not (flags & DEBUG):
295 if len(_cache) >= _MAXCACHE:
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_compile.py in compile(p, flags)
534 if isstring(p):
535 pattern = p
--> 536 p = sre_parse.parse(p, flags)
537 else:
538 pattern = None
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_parse.py in parse(str, flags, pattern)
827 pattern.str = str
828
--> 829 p = _parse_sub(source, pattern, 0)
830 p.pattern.flags = fix_flags(str, p.pattern.flags)
831
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_parse.py in _parse_sub(source, state, nested)
435 start = source.tell()
436 while True:
--> 437 itemsappend(_parse(source, state))
438 if not sourcematch("|"):
439 break
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_parse.py in _parse(source, state)
772 group = state.opengroup(name)
773 except error as err:
--> 774 raise source.error(err.msg, len(name) + 1) from None
775 if condgroup:
776 p = _parse_sub_cond(source, state, condgroup)
error: redefinition of group name 'H' as group 6; was group 4 at position 127
J'ai essayé ainsi:
obs_df['Date'] = pd.to_datetime(obs_df['Date'], errors='raise', yearfirst=True, utc=True, box=False, format="%Y-%m-%d_%H%M", exact=False,
infer_datetime_format=True)
mais nous avons eu cette erreur:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:44294)()
pandas/_libs/src/datetime.pxd in datetime._string_to_dts (pandas/_libs/tslib.c:98425)()
ValueError: Error parsing datetime string "2014100108481348" at position 8
During handling of the above exception, another exception occurred:
OverflowError Traceback (most recent call last)
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:44703)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.parse_datetime_string (pandas/_libs/tslib.c:35351)()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(timestr, parserinfo, **kwargs)
1181 else:
-> 1182 return DEFAULTPARSER.parse(timestr, **kwargs)
1183
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
577
--> 578 if cday > monthrange(cyear, cmonth)[1]:
579 repl['day'] = monthrange(cyear, cmonth)[1]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in monthrange(year, month)
120 raise IllegalMonthError(month)
--> 121 day1 = weekday(year, month, 1)
122 ndays = mdays[month] + (month == February and isleap(year))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in weekday(year, month, day)
112 day (1-31)."""
--> 113 return datetime.date(year, month, day).weekday()
114
OverflowError: signed integer is greater than maximum
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:44803)()
TypeError: invalid string coercion to datetime
During handling of the above exception, another exception occurred:
OverflowError Traceback (most recent call last)
<ipython-input-205-5e9ed01bf0eb> in <module>()
1 obs_df['Date'] = pd.to_datetime(obs_df['Date'], errors='raise', yearfirst=True, utc=True, box=False, format="%Y-%m-%d_%H%M", exact=False,
----> 2 infer_datetime_format=True)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
507 elif isinstance(arg, ABCSeries):
508 from pandas import Series
--> 509 values = _convert_listlike(arg._values, False, format)
510 result = Series(values, index=arg.index, name=arg.name)
511 elif isinstance(arg, (ABCDataFrame, MutableMapping)):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
433 dayfirst=dayfirst,
434 yearfirst=yearfirst,
--> 435 require_iso8601=require_iso8601
436 )
437
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:46617)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:46233)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:46122)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.parse_datetime_string (pandas/_libs/tslib.c:35351)()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(timestr, parserinfo, **kwargs)
1180 return parser(parserinfo).parse(timestr, **kwargs)
1181 else:
-> 1182 return DEFAULTPARSER.parse(timestr, **kwargs)
1183
1184
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
576 cday = default.day if res.day is None else res.day
577
--> 578 if cday > monthrange(cyear, cmonth)[1]:
579 repl['day'] = monthrange(cyear, cmonth)[1]
580
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in monthrange(year, month)
119 if not 1 <= month <= 12:
120 raise IllegalMonthError(month)
--> 121 day1 = weekday(year, month, 1)
122 ndays = mdays[month] + (month == February and isleap(year))
123 return day1, ndays
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in weekday(year, month, day)
111 """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12),
112 day (1-31)."""
--> 113 return datetime.date(year, month, day).weekday()
114
115
OverflowError: signed integer is greater than maximum
Comme vous pouvez le dire, je ne l'ai pas essayé d'ajouter des traits d'union et souligne depuis qui nécessite probablement une autre étape, mais toute indication à ce sujet serait grandement appréciée.