diff --git a/main.py b/main.py index 9e99e4b..c0c0f20 100644 --- a/main.py +++ b/main.py @@ -38,6 +38,9 @@ CAT_RE = re.compile(r"^([0-9a-fA-F-]{36})_categories\.json$") OCR_DT_RE = re.compile( r"(?P\d{2}):(?P\d{2}):(?P\d{2})\s+(?P\d{2})-(?P\d{2})-(?P\d{4})" ) +OCR_DT_RE_FLEX = re.compile( + r"(?P\d{2}):(?P\d{2}):(?P\d{2})\s*(?P\d{2})[-.](?P\d{2})[-.](?P\d{4})" +) @dataclass @@ -156,27 +159,57 @@ def ocr_extract_timestamp(jpg_bytes: bytes, crop_w_frac: float, crop_h_frac: flo cfg = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789:- ' text = pytesseract.image_to_string(bw, config=cfg).strip() - m = OCR_DT_RE.search(text) - if not m: - # retry without thresholding - text2 = pytesseract.image_to_string(gray, config=cfg).strip() - m = OCR_DT_RE.search(text2) - if not m: - raise ValueError(f"OCR timestamp not found. OCR1='{text}' OCR2='{text2}'") + # retry without thresholding + text2 = pytesseract.image_to_string(gray, config=cfg).strip() - dt_local_naive = datetime( - int(m.group("y")), - int(m.group("mo")), - int(m.group("d")), - int(m.group("h")), - int(m.group("m")), - int(m.group("s")), - ) + dt_local_naive = _parse_ocr_datetime(text, text2) # Keep camera-local timezone (no UTC conversion requested) return dt_local_naive.replace(tzinfo=customer_tzinfo) +def _parse_ocr_datetime(*texts: str) -> datetime: + def build_dt(h, m, s, d, mo, y): + return datetime(int(y), int(mo), int(d), int(h), int(m), int(s)) + + def valid_dt(h, m, s, d, mo, y) -> bool: + try: + build_dt(h, m, s, d, mo, y) + return True + except Exception: + return False + + # 1) direct regex matches + for t in texts: + for rx in (OCR_DT_RE, OCR_DT_RE_FLEX): + m = rx.search(t) + if m and valid_dt(m.group("h"), m.group("m"), m.group("s"), m.group("d"), m.group("mo"), m.group("y")): + return build_dt(m.group("h"), m.group("m"), m.group("s"), m.group("d"), m.group("mo"), m.group("y")) + + # 2) find time and date separately (handles missing space between time and date) + time_rx = re.compile(r"(?P\d{2}):(?P\d{2}):(?P\d{2})") + date_rx = re.compile(r"(?P\d{2})[-.](?P\d{2})[-.](?P\d{4})") + for t in texts: + tm = time_rx.search(t) + dm = date_rx.search(t) + if tm and dm and valid_dt(tm.group("h"), tm.group("m"), tm.group("s"), dm.group("d"), dm.group("mo"), dm.group("y")): + return build_dt(tm.group("h"), tm.group("m"), tm.group("s"), dm.group("d"), dm.group("mo"), dm.group("y")) + + # 3) digits-only fallback: scan for HHMMSSDDMMYYYY + for t in texts: + digits = re.sub(r"\D", "", t) + for i in range(0, max(0, len(digits) - 13)): + chunk = digits[i:i + 14] + if len(chunk) < 14: + continue + h, m, s = chunk[0:2], chunk[2:4], chunk[4:6] + d, mo, y = chunk[6:8], chunk[8:10], chunk[10:14] + if valid_dt(h, m, s, d, mo, y): + return build_dt(h, m, s, d, mo, y) + + raise ValueError(f"OCR timestamp not found. OCR1='{texts[0] if texts else ''}' OCR2='{texts[1] if len(texts) > 1 else ''}'") + + def exif_write_with_exiftool(jpg_in: bytes, dt_original_local: datetime, dt_digitized_utc: datetime) -> bytes: """ DateTimeOriginal = OCR time (camera local) + OffsetTimeOriginal @@ -280,11 +313,11 @@ def job_set_status(cur, uuid: str, status: str, *, err: Optional[str] = None, pr UPDATE remote_cam.import_job SET status = %s, attempts = attempts + CASE WHEN %s = 'ERROR' THEN 1 ELSE 0 END, - last_error = CASE WHEN %s IS NULL THEN last_error ELSE %s END, + last_error = COALESCE(%s::text, last_error), processed_jpg_key = COALESCE(%s, processed_jpg_key), thumbnail_key = COALESCE(%s, thumbnail_key) WHERE image_uuid = %s; - """, (status, status, err, err, processed_key, thumb_key, uuid)) + """, (status, status, err, processed_key, thumb_key, uuid)) def insert_resource(cur, uuid: str, typ: str): @@ -314,7 +347,13 @@ def insert_metadata(cur, uuid: str, meta: dict): lat_f = float(lat) if lat not in (None, "") else None lon_f = float(lon) if lon not in (None, "") else None - # NOTE: DateTimeLastSettings looks like "05:45:35 30.06.2025" in sample; parse if needed later. + dls_raw = get("DateTimeLastSettings") + dls = None + if dls_raw: + try: + dls = datetime.strptime(dls_raw, "%H:%M:%S %d.%m.%Y").replace(tzinfo=timezone.utc) + except Exception: + dls = None cur.execute(""" INSERT INTO remote_cam.metadata( metadata_uuid, import_ts, "ImageCreationDate", "SizeInByte", "ImageFormat", "ImageNight", @@ -366,7 +405,7 @@ def insert_metadata(cur, uuid: str, meta: dict): servertime = EXCLUDED.servertime; """, ( uuid, icd, get("SizeInByte"), get("ImageFormat"), get("ImageNight"), - get("DateTimeLastSettings"), get("BatteryStatus"), get("FirmwareVersion"), + dls, get("BatteryStatus"), get("FirmwareVersion"), get("SignalStrength"), get("Temperature"), get("CoordinateSwitch"), lat_f, lon_f, get("WorkPeriod"), get("WorkStart"), get("WorkEnd"), get("ThumbnailSize"),