Form Processing
Extract field-value pairs from tax forms, applications, surveys, and other structured documents. Parse identifies form fields, checkboxes, and their associated values automatically.
Form Schema
For forms with known fields, define a specific schema. For unknown forms, use a generic field-value pair approach:
Specific Form Schema (e.g., W-9 Tax Form)
{"name": "w9_form","description": "Extract data from W-9 tax forms","fields": [{ "name": "name", "type": "string", "description": "Name as shown on income tax return" },{ "name": "business_name", "type": "string", "description": "Business name, if different" },{ "name": "tax_classification", "type": "string", "description": "Federal tax classification" },{ "name": "address", "type": "string", "description": "Street address" },{ "name": "city_state_zip", "type": "string", "description": "City, state, and ZIP code" },{ "name": "ssn", "type": "string", "description": "Social Security Number" },{ "name": "ein", "type": "string", "description": "Employer Identification Number" },{ "name": "signature_date", "type": "date", "description": "Date of signature" }]}
Generic Form Schema
{"name": "generic_form","description": "Extract all field-value pairs from any form","fields": [{ "name": "form_title", "type": "string", "description": "Title or heading of the form" },{ "name": "form_fields", "type": "array", "description": "All fields found in the form", "items": {"type": "object","fields": [{ "name": "field_name", "type": "string" },{ "name": "field_value", "type": "string" },{ "name": "field_type", "type": "string" }]}},{ "name": "checkboxes", "type": "array", "description": "All checkboxes found in the form", "items": {"type": "object","fields": [{ "name": "label", "type": "string" },{ "name": "checked", "type": "boolean" }]}},{ "name": "signatures", "type": "array", "description": "Signature fields detected", "items": {"type": "object","fields": [{ "name": "label", "type": "string" },{ "name": "signed", "type": "boolean" },{ "name": "date", "type": "date" }]}}]}
Code Examples
cURL
curl -X POST https://api-parse.conversiontools.io/v1/extract \-H "Authorization: Bearer YOUR_API_KEY" \-F "file=@application-form.pdf" \-F "schema_id=YOUR_FORM_SCHEMA_ID"
Python
import requestsAPI_KEY = "YOUR_API_KEY"headers = {"Authorization": f"Bearer {API_KEY}"}with open("application-form.pdf", "rb") as f:response = requests.post("https://api-parse.conversiontools.io/v1/extract",headers=headers,files={"file": f},data={"schema_id": "YOUR_FORM_SCHEMA_ID"},)data = response.json()print(f"Form: {data['data']['form_title']}")for field in data["data"]["form_fields"]:print(f" {field['field_name']}: {field['field_value']}")for checkbox in data["data"]["checkboxes"]:status = "checked" if checkbox["checked"] else "unchecked"print(f" [{status}] {checkbox['label']}")
Node.js
const fs = require("fs");const API_KEY = "YOUR_API_KEY";const headers = { Authorization: `Bearer ${API_KEY}` };const form = new FormData();form.append("file", fs.createReadStream("application-form.pdf"));form.append("schema_id", "YOUR_FORM_SCHEMA_ID");const response = await fetch("https://api-parse.conversiontools.io/v1/extract", {method: "POST",headers,body: form,});const data = await response.json();console.log(`Form: ${data.data.form_title}`);data.data.form_fields.forEach((field) => {console.log(` ${field.field_name}: ${field.field_value}`);});data.data.checkboxes.forEach((cb) => {console.log(` [${cb.checked ? "x" : " "}] ${cb.label}`);});
Sample Output
{"success": true,"id": "ext_frm_001","data": {"form_title": "Employee Onboarding Form","form_fields": [{ "field_name": "Full Name", "field_value": "Jane Smith", "field_type": "text" },{ "field_name": "Email", "field_value": "jane.smith@email.com", "field_type": "email" },{ "field_name": "Phone", "field_value": "(555) 123-4567", "field_type": "phone" },{ "field_name": "Start Date", "field_value": "2024-02-01", "field_type": "date" },{ "field_name": "Department", "field_value": "Engineering", "field_type": "text" },{ "field_name": "Position", "field_value": "Senior Developer", "field_type": "text" },{ "field_name": "Emergency Contact", "field_value": "John Smith - (555) 987-6543", "field_type": "text" }],"checkboxes": [{ "label": "I agree to the terms and conditions", "checked": true },{ "label": "I have read the employee handbook", "checked": true },{ "label": "I opt in to the dental plan", "checked": false },{ "label": "I opt in to the vision plan", "checked": true }],"signatures": [{ "label": "Employee Signature", "signed": true, "date": "2024-01-25" }]},"pages_used": 2,"confidence": 0.92}
Tips for Form Processing
- Use a specific schema when you know the form layout for higher accuracy
- Checkboxes are detected automatically — both filled and empty states are captured
- Signature detection identifies whether a field has been signed, not the signature content
- Multi-page forms are supported — each page is processed and data is merged into a single result