openapi: 3.1.0

info:
  title: Handwriting OCR API
  version: '2'
  description: |
    Earlier version of the Handwriting OCR API. New integrations
    should use the [latest version](/api/docs).

servers:
  - url: https://api.handwritingocr.com/v2

tags:
  - name: Documents
    description: Upload, list, retrieve, and delete documents.

security:
  - BearerAuth: []

components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer

  parameters:
    DocumentId:
      name: id
      in: path
      required: true
      description: The document's unique identifier.
      schema:
        type: string
        example: abcde12345

paths:
  /documents:
    post:
      operationId: upload-document
      tags: [Documents]
      summary: Upload document
      description: |
        Upload a new document for processing. Supports PDF files and
        various image formats. The API checks the page count of the
        submitted document against your credit balance before queueing
        for processing.
      x-codeSamples:
        - lang: bash
          source: |
            curl -X POST "https://api.handwritingocr.com/v2/documents" \
                 -H "Authorization: Bearer your-api-token" \
                 -H "Accept: application/json" \
                 -F "file=@/path/to/document.pdf" \
                 -F "action=transcribe" \
                 -F "delete_after=604800"
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required: [action, file]
              properties:
                action:
                  type: string
                  enum: [transcribe, tables, extractor]
                  description: |
                    What to do with the document. Options are
                    `transcribe`, `tables`, `extractor`.
                file:
                  type: string
                  format: binary
                  description: |
                    The document to process. Valid file types are PDF,
                    JPG, PNG, TIFF, HEIC, GIF. Maximum file size is 20MB.
                delete_after:
                  type: integer
                  minimum: 300
                  maximum: 1209600
                  description: |
                    Seconds until auto-deletion. Overrides the
                    auto-deletion period set in your user settings.
                extractor_id:
                  type: string
                  description: |
                    A 10-character alphanumeric string e.g. `Ks08XVPyMd`.
                    Required when `action` is `extractor`.
      responses:
        '201':
          description: Document created and queued for processing.
          content:
            application/json:
              example: { id: abc123, status: queued }
        '400': { description: Bad Request — missing required fields. }
        '401': { description: Unauthorized — invalid or missing API token. }
        '403': { description: Forbidden — insufficient page credits. }
        '415': { description: Unsupported Media Type. }
        '422': { description: Validation Error — invalid parameters. }
        '429': { description: Too many requests — rate limited. }
        '500': { description: Server Error — file storage or processing failed. }
    get:
      operationId: list-documents
      tags: [Documents]
      summary: List documents
      description: |
        Retrieves a paginated list of documents belonging to the
        authenticated user. Documents are sorted by creation date in
        descending order.
      parameters:
        - name: per_page
          in: query
          required: false
          schema: { type: integer, default: 50, maximum: 200 }
          description: Number of items per page. Default 50, maximum 200.
        - name: page
          in: query
          required: false
          schema: { type: integer, default: 1 }
          description: The page number for pagination. Defaults to 1.
      responses:
        '200':
          description: Returns a paginated list of documents.
          content:
            application/json:
              example:
                current_page: 1
                data:
                  - document_id: xyz789
                    status: processed
                    created_at: '2024-03-15T14:30:00Z'
                    updated_at: '2024-03-15T14:35:00Z'
                    automatically_deleted_at: '2024-03-22T14:30:00Z'
                    page_count: 3
                    original_file_name: business_report.pdf
                    action: transcribe
                  - document_id: abc123
                    status: queued
                    created_at: '2024-03-15T14:25:00Z'
                    updated_at: '2024-03-15T14:25:00Z'
                    automatically_deleted_at: '2024-03-22T14:25:00Z'
                    page_count: 1
                    original_file_name: receipt.jpg
                    action: tables
                first_page_url: https://api.handwritingocr.com/v2/documents?page=1
                from: 1
                last_page: 5
                last_page_url: https://api.handwritingocr.com/v2/documents?page=5
                next_page_url: https://api.handwritingocr.com/v2/documents?page=2
                path: https://api.handwritingocr.com/v2/documents
                per_page: 50
                prev_page_url: null
                to: 50
                total: 243
        '401': { description: Unauthorized — invalid or missing API token. }
        '422': { description: Validation Error — invalid parameters. }

  /documents/{id}:
    get:
      operationId: download-result
      tags: [Documents]
      summary: Download result
      x-display-path: /documents/{id}[.{format}]
      description: |
        Retrieve the status of a document, or download the processed
        results. The format extension is optional — if not provided,
        returns a JSON status response. If the format extension is
        provided, downloads the processed document in that format.

        Image thumbnail URLs are provided for each page. These images
        must be authenticated with your API token to download.

        ### Webhooks

        We strongly encourage using a **webhook** instead of polling
        this endpoint repeatedly. Configure a webhook in the
        [user dashboard](https://app.handwritingocr.com/settings?tab=documents).
      parameters:
        - $ref: '#/components/parameters/DocumentId'
        - name: format
          in: query
          required: false
          description: |
            Output format passed as a URL extension on the path.
            Varies by action: `txt`, `docx`, `xlsx`, `csv`, `json`.
          schema:
            type: string
            enum: [txt, docx, xlsx, csv, json]
      responses:
        '200':
          description: Returns the processed result.
          content:
            application/json:
              example:
                id: abc123
                status: processed
                action: transcribe
                created_at: '2024-03-15T14:30:00Z'
                updated_at: '2024-03-15T14:35:00Z'
        '202': { description: Accepted — document is still being processed. }
        '400': { description: Bad Request — invalid format for action type. }
        '401': { description: Unauthorized — invalid or missing API token. }
        '403': { description: Forbidden — no permission to access document. }
        '404': { description: Not found — document not found. }
        '429': { description: Too many requests — rate limited. }
        '500': { description: Server Error — error preparing file for download. }
    delete:
      operationId: delete-document
      tags: [Documents]
      summary: Delete document
      description: |
        Permanently delete a document and its associated files. This
        action cannot be undone.
      parameters:
        - $ref: '#/components/parameters/DocumentId'
      responses:
        '204': { description: Document deleted. }
        '401': { description: Unauthorized — invalid or missing API token. }
        '403': { description: Forbidden — no permission to delete document. }
        '404': { description: Not Found — document not found. }
        '500': { description: Server Error — error deleting document. }
