Skip to content

Commit 77b4919

Browse files
committed
Update invoice extraction to standardized schema
1 parent 35a41f9 commit 77b4919

28 files changed

Lines changed: 2412 additions & 3383 deletions

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,6 @@ cython_debug/
564564
*Outputs.json
565565
*.pdf.json
566566
*redacted*.pdf
567-
samples/python/demo/invoices/*
568-
!samples/python/demo/invoices/*.py
569-
!samples/python/demo/invoices/*.ipynb
567+
samples/python/scenarios/invoices/*
568+
!samples/python/scenarios/invoices/*.py
569+
!samples/python/scenarios/invoices/*.ipynb
Lines changed: 77 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,83 @@
11
{
2-
"fname": "invoice_1.pdf",
3-
"expected": {
4-
"customer_name": "Sharp Consulting",
5-
"customer_address": {
6-
"street": "73 Regal Way",
7-
"city": "Leeds",
8-
"state": null,
9-
"postal_code": "LS1 5AB",
10-
"country": "UK"
2+
"fname": "invoice_1.pdf",
3+
"0_expected": {
4+
"customer_name": "Sharp Consulting",
5+
"customer_tax_id": null,
6+
"customer_address": {
7+
"street": "73 Regal Way",
8+
"city": "Leeds",
9+
"state": null,
10+
"postal_code": "LS1 5AB",
11+
"country": "UK"
12+
},
13+
"shipping_address": null,
14+
"purchase_order": "15931",
15+
"invoice_id": "3847193",
16+
"invoice_date": "2024-05-16",
17+
"due_date": "2024-05-24",
18+
"vendor_name": "NEXGEN",
19+
"vendor_address": null,
20+
"vendor_tax_id": null,
21+
"remittance_address": null,
22+
"subtotal": {
23+
"currency_code": "GBP",
24+
"amount": 293.52
25+
},
26+
"total_discount": null,
27+
"total_tax": null,
28+
"invoice_total": {
29+
"currency_code": "GBP",
30+
"amount": 293.52
31+
},
32+
"payment_term": null,
33+
"items": [
34+
{
35+
"product_code": "MA197",
36+
"description": "STRETCHWRAP ROLL",
37+
"quantity": 5,
38+
"tax": null,
39+
"unit_price": {
40+
"currency_code": "GBP",
41+
"amount": 16.62
1142
},
12-
"customer_tax_id": null,
13-
"shipping_address": null,
14-
"purchase_order": "15931",
15-
"invoice_id": "3847193",
16-
"invoice_date": "2024-05-16",
17-
"payable_by": "2024-05-24",
18-
"vendor_name": "NEXGEN",
19-
"vendor_address": null,
20-
"vendor_tax_id": null,
21-
"remittance_address": null,
22-
"subtotal": 293.52,
23-
"total_discount": null,
24-
"total_tax": null,
25-
"invoice_total": 293.52,
26-
"payment_terms": null,
27-
"items": [
28-
{
29-
"product_code": "MA197",
30-
"description": "STRETCHWRAP ROLL",
31-
"quantity": 5,
32-
"tax": null,
33-
"tax_rate": null,
34-
"unit_price": 16.62,
35-
"total": 83.1,
36-
"reason": null
37-
},
38-
{
39-
"product_code": "ST4086",
40-
"description": "BALLPOINT PEN MED.",
41-
"quantity": 10,
42-
"tax": null,
43-
"tax_rate": null,
44-
"unit_price": 2.49,
45-
"total": 24.9,
46-
"reason": null
47-
},
48-
{
49-
"product_code": "JF9912413BF",
50-
"description": "BUBBLE FILM ROLL CL.",
51-
"quantity": 12,
52-
"tax": null,
53-
"tax_rate": null,
54-
"unit_price": 15.46,
55-
"total": 185.52,
56-
"reason": null
57-
}
58-
],
59-
"total_item_quantity": 27,
60-
"items_customer_signature": {
61-
"signatory": "Sarah H",
62-
"is_signed": true
63-
},
64-
"items_vendor_signature": {
65-
"signatory": "James T",
66-
"is_signed": true
43+
"total": {
44+
"currency_code": "GBP",
45+
"amount": 83.1
46+
}
47+
},
48+
{
49+
"product_code": "ST4086",
50+
"description": "BALLPOINT PEN MED.",
51+
"quantity": 10,
52+
"tax": null,
53+
"unit_price": {
54+
"currency_code": "GBP",
55+
"amount": 2.49
6756
},
68-
"returns": [
69-
{
70-
"product_code": "MA145",
71-
"description": "POSTAL TUBE BROWN",
72-
"quantity": 1,
73-
"tax": null,
74-
"tax_rate": null,
75-
"unit_price": null,
76-
"total": null,
77-
"reason": "This item was provided in previous order as a replacement"
78-
},
79-
{
80-
"product_code": "JF7902",
81-
"description": "MAILBOX 25PK",
82-
"quantity": 1,
83-
"tax": null,
84-
"tax_rate": null,
85-
"unit_price": null,
86-
"total": null,
87-
"reason": "Not required"
88-
}
89-
],
90-
"total_return_quantity": 2,
91-
"returns_customer_signature": {
92-
"signatory": "Sarah H",
93-
"is_signed": true
57+
"total": {
58+
"currency_code": "GBP",
59+
"amount": 24.9
60+
}
61+
},
62+
{
63+
"product_code": "JF9912413BF",
64+
"description": "BUBBLE FILM ROLL CL.",
65+
"quantity": 12,
66+
"tax": null,
67+
"unit_price": {
68+
"currency_code": "GBP",
69+
"amount": 15.46
9470
},
95-
"returns_vendor_signature": {
96-
"signatory": "James T",
97-
"is_signed": true
71+
"total": {
72+
"currency_code": "GBP",
73+
"amount": 185.52
9874
}
75+
}
76+
],
77+
"customer_signature": {
78+
"signatory": "Sarah H",
79+
"date": null,
80+
"has_written_signature": true
9981
}
100-
}
82+
}
83+
}
Lines changed: 48 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,51 @@
11
{
2-
"fname": "invoice_2.pdf",
3-
"expected": {
4-
"customer_name": "White Triangle",
5-
"customer_address": {
6-
"street": "Unit 1, Fork Works",
7-
"city": "Rotherham",
8-
"state": null,
9-
"postal_code": "S1 1RT",
10-
"country": null
2+
"fname": "invoice_2.pdf",
3+
"0_expected": {
4+
"customer_name": "White Triangle",
5+
"customer_tax_id": null,
6+
"customer_address": {
7+
"street": "Unit 1, Fork Works",
8+
"city": "Rotherham",
9+
"state": null,
10+
"postal_code": "S1 1RT",
11+
"country": "UK"
12+
},
13+
"shipping_address": null,
14+
"purchase_order": "17153",
15+
"invoice_id": "4315272",
16+
"invoice_date": "2024-12-06",
17+
"due_date": null,
18+
"vendor_name": "NEXGEN",
19+
"vendor_address": null,
20+
"vendor_tax_id": null,
21+
"remittance_address": null,
22+
"subtotal": {
23+
"currency_code": "GBP",
24+
"amount": 750.0
25+
},
26+
"total_discount": null,
27+
"total_tax": null,
28+
"invoice_total": {
29+
"currency_code": "GBP",
30+
"amount": 750.0
31+
},
32+
"payment_term": null,
33+
"items": [
34+
{
35+
"product_code": "H509L",
36+
"description": null,
37+
"quantity": 150,
38+
"tax": null,
39+
"unit_price": {
40+
"currency_code": "GBP",
41+
"amount": 5.0
1142
},
12-
"customer_tax_id": null,
13-
"shipping_address": null,
14-
"purchase_order": "17153",
15-
"invoice_id": "4315272",
16-
"invoice_date": "2024-12-06",
17-
"payable_by": null,
18-
"vendor_name": "NEXGEN",
19-
"vendor_address": null,
20-
"vendor_tax_id": null,
21-
"remittance_address": null,
22-
"subtotal": 750.00,
23-
"total_discount": null,
24-
"total_tax": null,
25-
"invoice_total": 750.00,
26-
"payment_terms": null,
27-
"items": [
28-
{
29-
"product_code": "H509L",
30-
"description": null,
31-
"quantity": 150,
32-
"tax": null,
33-
"tax_rate": null,
34-
"unit_price": 5.00,
35-
"total": 750.00,
36-
"reason": null
37-
}
38-
],
39-
"total_item_quantity": 150,
40-
"items_customer_signature": {
41-
"signatory": null,
42-
"is_signed": false
43-
},
44-
"items_vendor_signature": {
45-
"signatory": null,
46-
"is_signed": false
47-
},
48-
"returns": [
49-
{
50-
"product_code": "H509M",
51-
"description": null,
52-
"quantity": 150,
53-
"tax": null,
54-
"tax_rate": null,
55-
"unit_price": null,
56-
"total": null,
57-
"reason": "Incorrect size. Too small."
58-
}
59-
],
60-
"total_return_quantity": 150,
61-
"returns_customer_signature": {
62-
"signatory": null,
63-
"is_signed": false
64-
},
65-
"returns_vendor_signature": {
66-
"signatory": null,
67-
"is_signed": false
43+
"total": {
44+
"currency_code": "GBP",
45+
"amount": 750.0
6846
}
69-
}
70-
}
47+
}
48+
],
49+
"customer_signature": null
50+
}
51+
}

0 commit comments

Comments
 (0)