diff --git a/.github/workflows/_test-cli.yml b/.github/workflows/_test-cli.yml new file mode 100644 index 000000000..bcb4e823c --- /dev/null +++ b/.github/workflows/_test-cli.yml @@ -0,0 +1,47 @@ +name: CLI Test + +on: + workflow_call: + workflow_dispatch: + +env: + MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + +jobs: + test: + name: Run Tests + timeout-minutes: 30 + strategy: + max-parallel: 4 + matrix: + os_config: + - os: "ubuntu-24.04" + rid: "linux-x64" + - os: "macos-latest" + rid: "osx-x64" + - os: "windows-latest" + rid: "win-x64" + ruby: + - "3.0" + - "4.0" + runs-on: ${{ matrix.os_config.os }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: set up Ruby ${{ matrix.ruby }} + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + + - name: Tests V2 CLI + run: | + ./spec/test_v2_cli.sh ./spec/data/file_types/pdf/blank_1.pdf ${{ matrix.os_config.rid }} + + - name: Tests V1 CLI + run: | + ./spec/test_v1_cli.sh ./spec/data/file_types/pdf/blank_1.pdf ${{ matrix.os_config.rid }} diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 75a72a712..ef025d4e8 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -7,6 +7,19 @@ on: workflow_call: workflow_dispatch: +env: + MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_LOG_LEVEL: DEBUG + jobs: integration-tests: name: Run Integration Tests @@ -50,17 +63,6 @@ jobs: sudo sed -i "s/$SRC/$RPL/" /etc/ImageMagick-6/policy.xml - name: Run Rspec for integration tests - env: - MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} - WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} - MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} - MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} - MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} - MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} - MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} - MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} - MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} - MINDEE_LOG_LEVEL: DEBUG run: | bundle exec rake integration diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-smoke.yml similarity index 56% rename from .github/workflows/_test-code-samples.yml rename to .github/workflows/_test-smoke.yml index 81e76f0b3..dab35abb0 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-smoke.yml @@ -1,4 +1,4 @@ -name: Test Code Samples +name: Smoke Test on: workflow_call: @@ -9,12 +9,13 @@ env: MINDEE_ACCOUNT_SE_TESTS: ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} MINDEE_API_KEY_SE_TESTS: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} - MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} - MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID }} - MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} - MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} - MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} - MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_FAILURE_WEBHOOK_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID }} + MINDEE_V2_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} jobs: test: @@ -40,14 +41,15 @@ jobs: ruby-version: ${{ matrix.ruby }} bundler-cache: true - - name: Tests V1 code samples + - name: Tests V2 code samples env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples_v1.sh + ./spec/test_v2_code_samples.sh - - name: Tests V2 code samples + - name: Tests V1 code samples env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples_v2.sh + ./spec/test_v1_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} + diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index e7f14660b..ee6755c89 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -5,6 +5,9 @@ on: - cron: '0 0 * * *' jobs: - test_code_samples: - uses: mindee/mindee-api-ruby/.github/workflows/_test-code-samples.yml@main + test-smoke: + uses: mindee/mindee-api-ruby/.github/workflows/_test-smoke.yml@main + secrets: inherit + test-cli: + uses: mindee/mindee-api-ruby/.github/workflows/_test-cli.yml@main secrets: inherit diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index a1178aeb3..d786c0e56 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -5,13 +5,13 @@ on: types: [ published ] jobs: - publish_docs: + publish-docs: uses: mindee/mindee-api-ruby/.github/workflows/_publish-docs.yml@main secrets: inherit - publish_guide: + publish-guide: uses: mindee/mindee-api-ruby/.github/workflows/_publish-guide.yml@main - needs: publish_docs + needs: publish-docs secrets: inherit - publish_code: + publish-code: uses: mindee/mindee-api-ruby/.github/workflows/_publish-code.yml@main secrets: inherit diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 6527968fb..656762ad9 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -8,17 +8,21 @@ permissions: pull-requests: read jobs: - static_analysis: + static-analysis: uses: ./.github/workflows/_static-analysis.yml - test_units: + test-units: uses: ./.github/workflows/_test-units.yml - needs: static_analysis + needs: static-analysis secrets: inherit - test_integrations: + test-integrations: uses: ./.github/workflows/_test-integrations.yml - needs: test_units + needs: test-units secrets: inherit - test_code_samples: - uses: ./.github/workflows/_test-code-samples.yml - needs: test_units + test-smoke: + uses: ./.github/workflows/_test-smoke.yml + needs: test-units + secrets: inherit + test-cli: + uses: ./.github/workflows/_test-cli.yml + needs: test-units secrets: inherit diff --git a/.github/workflows/push-main-branch.yml b/.github/workflows/push-main-branch.yml index 55ccabd55..812b14568 100644 --- a/.github/workflows/push-main-branch.yml +++ b/.github/workflows/push-main-branch.yml @@ -6,15 +6,15 @@ on: - main jobs: - static_analysis: + static-analysis: uses: mindee/mindee-api-ruby/.github/workflows/_static-analysis.yml@main - test_units: + test-units: uses: mindee/mindee-api-ruby/.github/workflows/_test-units.yml@main - needs: static_analysis + needs: static-analysis secrets: inherit tag: uses: mindee/client-lib-actions/.github/workflows/tag-version.yml@main - needs: test_units + needs: test-units release: uses: mindee/client-lib-actions/.github/workflows/create-release.yml@main needs: tag diff --git a/.rubocop.yml b/.rubocop.yml index fbb91a84e..d7fcf91c8 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -37,7 +37,8 @@ Metrics/MethodLength: Metrics/ClassLength: Max: 200 - +Metrics/ModuleLength: + Max: 200 Metrics/ParameterLists: Max: 8 diff --git a/bin/mindee.rb b/bin/mindee.rb index 6c303ef19..f6900dbed 100755 --- a/bin/mindee.rb +++ b/bin/mindee.rb @@ -3,127 +3,28 @@ require 'bundler/setup' require 'optparse' -require 'mindee' - -require_relative 'cli_products' - -options = {} - -# Initializes universal-specific options -# @param cli_parser [OptionParser] -def custom_subcommand(cli_parser, options) - cli_parser.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v| - options[:endpoint_version] = v - end - cli_parser.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v| - options[:account_name] = v - end -end - -product_parser = {} -PRODUCTS.each do |doc_key, doc_value| - product_parser[doc_key] = OptionParser.new do |opts| - opts.on('-w', '--all-words', 'Include words in response') do |v| - options[:all_words] = v - end - opts.on('-c', '--cut-pages', "Cut document pages") do |v| - options[:cut_pages] = v - end - opts.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v| - options[:api_key] = v - end - opts.on('-f', '--full', "Print the full data, including pages") do |v| - options[:print_full] = true - end - opts.on('-F', '--fix-pdf', "Attempts to fix broken PDF files before sending them to the server.") do |v| - options[:repair_pdf] = true - end - if doc_key != 'universal' - opts.banner = "#{doc_value[:description]}. \nUsage: \nmindee.rb universal [options] endpoint_name file\nor\nmindee.rb universal [options] endpoint_name file" - custom_subcommand(opts, options) - end - if doc_value[:async] - if doc_value[:sync] - opts.on("-A", "--async", "Call asynchronously") do |v| - options[:parse_async] = v - end - end - end +require_relative 'v1/parser' +require_relative 'v2/parser' + +def setup_main_parser + v1_parser = MindeeCLI::V1Parser.new(ARGV) + v2_parser = MindeeCLI::V2Parser.new(ARGV) + main_parser = OptionParser.new do |opts| + opts.banner = "Usage: mindee [command]" + opts.separator "Commands:" + opts.separator " v1 Use Version 1 of the Mindee API" + opts.separator " v2 Use Version 2 of the Mindee API" end -end - -global_parser = OptionParser.new do |opts| - opts.banner = "Usage: mindee.rb product [options] file" - opts.separator "Available products:" - opts.separator " #{PRODUCTS.keys.join("\n ")}" -end - -command = ARGV.shift -unless PRODUCTS.include?(command) - abort(global_parser.help) -end -doc_class = PRODUCTS[command][:doc_class] -product_parser[command].parse! + main_command = ARGV.shift -if command == 'universal' - if ARGV.length < 2 - $stderr.puts "The '#{command}' command requires both ENDPOINT_NAME and file arguments." - abort(product_parser[command].help) - end - endpoint_name = ARGV[0] - options[:file_path] = ARGV[1] -else - if ARGV.length < 1 - $stderr.puts "file missing" - abort(product_parser[command].help) - end - endpoint_name = nil - options[:file_path] = ARGV[0] -end - -mindee_client = Mindee::Client.new(api_key: options[:api_key]) -if options[:file_path].start_with?("https://") - input_source = mindee_client.source_from_url(options[:file_path]) -else - input_source = mindee_client.source_from_path(options[:file_path], repair_pdf: options[:repair_pdf]) -end - -if command == 'universal' - custom_endpoint = mindee_client.create_endpoint( - endpoint_name: endpoint_name, - account_name: options[:account_name], - version: options[:endpoint_version].nil? ? "1" : options[:endpoint_version] - ) -else - custom_endpoint = nil -end - -if options[:cut_pages].nil? || !options[:cut_pages].is_a?(Integer) || options[:cut_pages] < 0 - page_options = nil -else - page_options = Mindee::PageOptions.new(params: { - page_indexes: (0..options[:cut_pages].to_i).to_a, - operation: :KEEP_ONLY, - on_min_pages: 0, - }) -end - -if options[:parse_async].nil? - if !PRODUCTS[command][:sync] - options[:parse_async] = true + case main_command + when 'v1' + v1_parser.execute + when 'v2' + v2_parser.execute else - options[:parse_async] = false + abort(main_parser.help) end end -result = mindee_client.parse( - input_source, - doc_class, - options: { endpoint: custom_endpoint, - options: Mindee::ParseOptions.new(params: { page_options: page_options }), enqueue: options[:parse_async] } -) -if options[:print_full] - puts result.document -else - puts result.document.inference.prediction -end +setup_main_parser diff --git a/bin/v1/parser.rb b/bin/v1/parser.rb new file mode 100644 index 000000000..0be7bd381 --- /dev/null +++ b/bin/v1/parser.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require 'mindee' +require_relative 'products' + +module MindeeCLI + # Mindee Command Line Interface + # V1 CLI class. + class V1Parser + # @return [Array] + attr_reader :arguments + + # @return [OptionParser] + attr_reader :options_parser + + # @return [Parser] + attr_reader :product_parser + + def initialize(arguments) + @arguments = arguments + @options_parser = OptionParser.new do |opts| + opts.banner = 'Usage: mindee v1 product [options] file' + opts.separator 'Available products:' + opts.separator " #{V1_PRODUCTS.keys.join("\n ")}" + end + @product_parser = init_product_parser + end + + # @param cli_parser [OptionParser] + def custom_subcommand(cli_parser) + cli_parser.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v| + @options[:endpoint_version] = v + end + cli_parser.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v| + @options[:account_name] = v + end + end + + # @return [Hash] + def init_product_parser + v1_product_parser = {} + V1_PRODUCTS.each do |doc_key, doc_value| + v1_product_parser[doc_key] = OptionParser.new do |options_parser| + options_parser.on('-w', '--all-words', 'Include words in response') { |v| @options[:all_words] = v } + options_parser.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') { |v| @options[:api_key] = v } + options_parser.on('-o FORMAT', '--output-format FORMAT', ['raw', 'full', 'summary'], + 'Format of the output (raw, full, summary). Default: summary') do |format| + @options[:output_format] = format + end + options_parser.on('-F', '--fix-pdf', 'Repair PDF') { @options[:repair_pdf] = true } + + if doc_key != 'universal' + options_parser.banner = "Usage: mindee v1 #{doc_key} [options] file" + custom_subcommand(options_parser) + end + + if doc_value[:async] && doc_value[:sync] + options_parser.on('-A', '--async', 'Call asynchronously') { |v| @options[:parse_async] = v } + end + end + end + v1_product_parser + end + + # @param product_command [String] + # @param endpoint_name [String, nil] + # @param options [Hash] + # @return [Mindee::Parsing::Common::ApiResponse] + def send(product_command, endpoint_name, options) + mindee_client = Mindee::Client.new(api_key: options[:api_key]) + doc_class = V1_PRODUCTS[product_command][:doc_class] + input_source = setup_input_source(mindee_client, options) + custom_endpoint = setup_endpoint(mindee_client, product_command, endpoint_name, options) + options[:parse_async] = !V1_PRODUCTS[product_command][:sync] if options[:parse_async].nil? + + mindee_client.parse( + input_source, + doc_class, + options: { endpoint: custom_endpoint, + enqueue: options[:parse_async] } + ) + end + + def print_result(result, output_format) + if output_format == :raw + puts JSON.pretty_generate(JSON.parse(result.raw_http)) + else + puts output_format == :full ? result.document : result.document.inference.prediction + end + end + + # @param product_command [String] + def execute + @options = { output_format: :summary } + product_command = @arguments.shift + + abort(@options_parser.help) unless V1_PRODUCTS.include?(product_command) + @product_parser[product_command].parse! + + if product_command == 'universal' + if @arguments.length < 2 + warn "The 'universal' command requires both ENDPOINT_NAME and file arguments." + abort(@product_parser[product_command].help) + end + endpoint_name = @arguments[0] + @options[:file_path] = @arguments[1] + else + if @arguments.empty? + warn 'file missing' + abort(@product_parser[product_command].help) + end + endpoint_name = nil + @options[:file_path] = @arguments[0] + end + + result = send(product_command, endpoint_name, @options) + print_result(result, output_format) + end + + private + + # @return [Symbol] + def output_format + @options[:output_format]&.to_sym || :summary + end + + # @param mindee_client [Mindee::V1::Client] + # @param options [Hash] + # @return [Hash] + def setup_input_source(mindee_client, options) + if options[:file_path].start_with?('https://') + mindee_client.source_from_url(options[:file_path]) + else + mindee_client.source_from_path(options[:file_path], repair_pdf: options[:repair_pdf]) + end + end + + # @param mindee_client [Mindee::V1::Client] + # @param product_command [String] + # @param endpoint_name [String, nil] + # @param options [Hash] + # @return [Mindee::HTTP::Endpoint, nil] + def setup_endpoint(mindee_client, product_command, endpoint_name, options) + return unless product_command == 'universal' + + mindee_client.create_endpoint( + endpoint_name: endpoint_name, + account_name: options[:account_name], + version: options[:endpoint_version] || '1' + ) + end + end +end diff --git a/bin/cli_products.rb b/bin/v1/products.rb similarity index 63% rename from bin/cli_products.rb rename to bin/v1/products.rb index 41df18367..96cf465b9 100644 --- a/bin/cli_products.rb +++ b/bin/v1/products.rb @@ -1,166 +1,166 @@ # frozen_string_literal: true -PRODUCTS = { - "universal" => { - description: "Universal document type from API builder", +V1_PRODUCTS = { + 'universal' => { + description: 'Universal document type from API builder', doc_class: Mindee::Product::Universal::Universal, sync: true, async: true, }, - "barcode-reader" => { - description: "Barcode Reader", + 'barcode-reader' => { + description: 'Barcode Reader', doc_class: Mindee::Product::BarcodeReader::BarcodeReaderV1, sync: true, async: false, }, - "bill-of-lading" => { - description: "Bill of Lading", + 'bill-of-lading' => { + description: 'Bill of Lading', doc_class: Mindee::Product::BillOfLading::BillOfLadingV1, sync: false, async: true, }, - "business-card" => { - description: "Business Card", + 'business-card' => { + description: 'Business Card', doc_class: Mindee::Product::BusinessCard::BusinessCardV1, sync: false, async: true, }, - "cropper" => { - description: "Cropper", + 'cropper' => { + description: 'Cropper', doc_class: Mindee::Product::Cropper::CropperV1, sync: true, async: false, }, - "delivery-note" => { - description: "Delivery note", + 'delivery-note' => { + description: 'Delivery note', doc_class: Mindee::Product::DeliveryNote::DeliveryNoteV1, sync: false, async: true, }, - "driver-license" => { - description: "Driver License", + 'driver-license' => { + description: 'Driver License', doc_class: Mindee::Product::DriverLicense::DriverLicenseV1, sync: false, async: true, }, - "financial-document" => { - description: "Financial Document", + 'financial-document' => { + description: 'Financial Document', doc_class: Mindee::Product::FinancialDocument::FinancialDocumentV1, sync: true, async: true, }, - "fr-bank-account-details" => { - description: "Bank Account Details", + 'fr-bank-account-details' => { + description: 'Bank Account Details', doc_class: Mindee::Product::FR::BankAccountDetails::BankAccountDetailsV2, sync: true, async: false, }, - "fr-bank-statement" => { - description: "Bank Statement", + 'fr-bank-statement' => { + description: 'Bank Statement', doc_class: Mindee::Product::FR::BankStatement::BankStatementV2, sync: false, async: true, }, - "fr-carte-grise" => { - description: "Carte Grise", + 'fr-carte-grise' => { + description: 'Carte Grise', doc_class: Mindee::Product::FR::CarteGrise::CarteGriseV1, sync: true, async: false, }, - "fr-energy-bill" => { - description: "Energy Bill", + 'fr-energy-bill' => { + description: 'Energy Bill', doc_class: Mindee::Product::FR::EnergyBill::EnergyBillV1, sync: false, async: true, }, - "fr-health-card" => { - description: "Health Card", + 'fr-health-card' => { + description: 'Health Card', doc_class: Mindee::Product::FR::HealthCard::HealthCardV1, sync: false, async: true, }, - "fr-carte-nationale-d-identite" => { + 'fr-carte-nationale-d-identite' => { description: "Carte Nationale d'Identité", doc_class: Mindee::Product::FR::IdCard::IdCardV2, sync: true, async: false, }, - "fr-payslip" => { - description: "Payslip", + 'fr-payslip' => { + description: 'Payslip', doc_class: Mindee::Product::FR::Payslip::PayslipV3, sync: false, async: true, }, - "ind-passport-india" => { - description: "Passport - India", + 'ind-passport-india' => { + description: 'Passport - India', doc_class: Mindee::Product::IND::IndianPassport::IndianPassportV1, sync: false, async: true, }, - "international-id" => { - description: "International ID", + 'international-id' => { + description: 'International ID', doc_class: Mindee::Product::InternationalId::InternationalIdV2, sync: false, async: true, }, - "invoice" => { - description: "Invoice", + 'invoice' => { + description: 'Invoice', doc_class: Mindee::Product::Invoice::InvoiceV4, sync: true, async: true, }, - "invoice-splitter" => { - description: "Invoice Splitter", + 'invoice-splitter' => { + description: 'Invoice Splitter', doc_class: Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, sync: false, async: true, }, - "multi-receipts-detector" => { - description: "Multi Receipts Detector", + 'multi-receipts-detector' => { + description: 'Multi Receipts Detector', doc_class: Mindee::Product::MultiReceiptsDetector::MultiReceiptsDetectorV1, sync: true, async: false, }, - "nutrition-facts-label" => { - description: "Nutrition Facts Label", + 'nutrition-facts-label' => { + description: 'Nutrition Facts Label', doc_class: Mindee::Product::NutritionFactsLabel::NutritionFactsLabelV1, sync: false, async: true, }, - "passport" => { - description: "Passport", + 'passport' => { + description: 'Passport', doc_class: Mindee::Product::Passport::PassportV1, sync: true, async: false, }, - "receipt" => { - description: "Receipt", + 'receipt' => { + description: 'Receipt', doc_class: Mindee::Product::Receipt::ReceiptV5, sync: true, async: true, }, - "resume" => { - description: "Resume", + 'resume' => { + description: 'Resume', doc_class: Mindee::Product::Resume::ResumeV1, sync: false, async: true, }, - "us-bank-check" => { - description: "Bank Check", + 'us-bank-check' => { + description: 'Bank Check', doc_class: Mindee::Product::US::BankCheck::BankCheckV1, sync: true, async: false, }, - "us-healthcare-card" => { - description: "Healthcare Card", + 'us-healthcare-card' => { + description: 'Healthcare Card', doc_class: Mindee::Product::US::HealthcareCard::HealthcareCardV1, sync: false, async: true, }, - "us-us-mail" => { - description: "US Mail", + 'us-us-mail' => { + description: 'US Mail', doc_class: Mindee::Product::US::UsMail::UsMailV3, sync: false, async: true, }, -} +}.freeze diff --git a/bin/v2/parser.rb b/bin/v2/parser.rb new file mode 100644 index 000000000..343bfab09 --- /dev/null +++ b/bin/v2/parser.rb @@ -0,0 +1,235 @@ +# frozen_string_literal: true + +require 'mindee' +require_relative 'products' + +module MindeeCLI + # Mindee Command Line Interface + # V2 CLI class. + class V2Parser + # @return [Array] + attr_reader :arguments + + # @return [OptionParser] + attr_reader :options_parser + + # @return [Parser] + attr_reader :product_parser + + # @return [Parser] + attr_reader :search_parser + + def initialize(arguments) + @arguments = arguments + @options_parser = OptionParser.new do |opts| + opts.banner = 'Usage: mindee v2 command [options]' + end + @product_parser = init_product_parser + @search_parser = init_search_parser + end + + # Summarize and print the result of the command. + # @param command [String] + def print_result(command) + if command == 'search-models' + @search_parser.parse!(@arguments) + result = search(@options) + summarized_result = output_format == :full ? result.to_s : result.models.to_s + else + @product_parser[command].parse!(@arguments) + @options[:file_path] = @arguments.shift + if @options[:file_path].nil? + warn 'file missing' + abort(@product_parser[command].help) + end + result = send(command, @options) + summarized_result = output_format == :full ? result.inference.to_s : result.inference.result.to_s + end + + if output_format == :raw + puts JSON.pretty_generate(raw_payload(result.raw_http)) + else + puts summarized_result + end + end + + # Executes the command. + # @return [void] + def execute + @options = { output_format: :summary } + command = @arguments.shift + + validate_command!(command) + print_result(command) + rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e + if command == 'search-models' + abort("#{e.message}\n\n#{@search_parser.help}") + else + abort("#{e.message}\n\n#{@product_parser[command].help}") + end + end + + private + + def validate_command!(command) + return if V2_PRODUCTS.include?(command) || command == 'search-models' + + error_msg = "#{@options_parser.help}\nAvailable commands:\n" + error_msg += " #{'search-models'.ljust(50)}Search for available models for this API key\n" + + V2_PRODUCTS.each do |product_key, product_values| + error_msg += " #{product_key.to_s.ljust(50)}#{product_values[:description]}\n" + end + abort(error_msg) + end + + def init_search_parser + OptionParser.new do |options_parser| + options_parser.banner = 'Usage: mindee v2 search-models [options]' + init_common_options(options_parser) + options_parser.on('-n [NAME]', '--name [NAME]', + 'Search for partial matches in model name. Note: case insensitive') do |v| + @options[:model_name] = v + end + options_parser.on('-t [NAME]', '--type [NAME]', + 'Search for EXACT matches in model type. Note: case sensitive') do |v| + @options[:model_type] = v + end + end + end + + def setup_specific_options(options_parser, doc_value) + options_parser.on('-r', '--rag', 'Enable RAG') { @options[:rag] = true } if doc_value.key?(:rag) + if doc_value.key?(:raw_text) + options_parser.on('-R', '--raw-text', 'Enable Raw Text retrieval') do + @options[:raw_text] = true + end + end + if doc_value.key?(:confidence) + options_parser.on('-c', '--confidence', 'Enable confidence scores') do + @options[:confidence] = true + end + end + options_parser.on('-p', '--polygon', 'Enable polygons') { @options[:polygon] = true } if doc_value.key?(:polygon) + if doc_value.key?(:text_context) + options_parser.on('-t [TEXT CONTEXT]', '--text-context [TEXT CONTEXT]', 'Add Text Context') do |v| + @options[:text_context] = v + end + end + return unless doc_value.key?(:data_schema) + + options_parser.on('-d [DATA SCHEMA]', '--data-schema [DATA SCHEMA]', 'Add Data Schema') do |v| + @options[:data_schema] = v + end + end + + # Initialize common options for search and product commands. + # @param options_parser [OptionParser] + def init_common_options(options_parser) + options_parser.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') { |v| @options[:api_key] = v } + options_parser.on('-o FORMAT', '--output-format FORMAT', ['raw', 'full', 'summary'], + 'Format of the output (raw, full, summary). Default: summary') do |format| + @options[:output_format] = format + end + end + + # @return [Symbol] + def output_format + @options[:output_format]&.to_sym || :summary + end + + # Handles JSON payloads represented either as a string or an already-parsed hash. + # Also tolerates one extra JSON encoding layer. + # @param payload [String, Hash] + # @return [Hash, Array, String] + def raw_payload(payload) + parsed_payload = payload + 2.times do + break unless parsed_payload.is_a?(String) + + parsed_payload = JSON.parse(parsed_payload) + rescue JSON::ParserError + break + end + parsed_payload + end + + # @return [Hash] + def init_product_parser + v2_product_parser = {} + V2_PRODUCTS.each do |product_key, product_values| + v2_product_parser[product_key] = OptionParser.new do |options_parser| + options_parser.banner = "Usage: mindee v2 #{product_key} [options] file" + options_parser.on('-m MODEL_ID', '--model-id MODEL_ID', 'Model ID') { |v| @options[:model_id] = v } + options_parser.on('-a ALIAS', '--alias ALIAS', 'Add a file alias to the response') do |v| + @options[:alias] = v + end + init_common_options(options_parser) + options_parser.on('-F', '--fix-pdf', 'Attempt to repair PDF before enqueueing') do + @options[:repair_pdf] = true + end + setup_specific_options(options_parser, product_values) + end + end + v2_product_parser + end + + # @return [Hash] + def setup_product_params + params = { model_id: @options[:model_id] } + @options.each_pair do |key, value| + params[key] = value if V2_PRODUCTS['extraction'].include?(key) + end + params + end + + # @param product_command [String] + # @param options [Hash] + # @return [Mindee::Parsing::Common::ApiResponse] + def send(product_command, options) + mindee_client = Mindee::ClientV2.new(api_key: options[:api_key]) + response_class = V2_PRODUCTS[product_command][:response_class] + input_source = setup_input_source(options) + params = setup_product_params + + mindee_client.enqueue_and_get_result( + response_class, + input_source, + params + ) + end + + # @param options [Hash] + # @return [Mindee::V2::Parsing::Search::SearchResponse] + def search(options) + mindee_client = Mindee::ClientV2.new(api_key: options[:api_key]) + mindee_client.search_models(options[:model_name], options[:model_type]) + end + + # @param options [Hash] + # @return [Mindee::Input::InputSource] + def setup_input_source(options) + if options[:file_path].start_with?('https://') + Mindee::Input::Source::URLInputSource.new(options[:file_path]) + else + Mindee::Input::Source::PathInputSource.new(options[:file_path], repair_pdf: options[:repair_pdf]) + end + end + + # @param options [Hash] + # @return [Hash, nil] + def setup_page_options(options) + if options[:cut_pages].nil? || !options[:cut_pages].is_a?(Integer) || + options[:cut_pages].negative? + nil + else + + { + page_indexes: (0..options[:cut_pages].to_i).to_a, + operation: :KEEP_ONLY, + on_min_pages: 0, + } + end + end + end +end diff --git a/bin/v2/products.rb b/bin/v2/products.rb new file mode 100644 index 000000000..be9320cd4 --- /dev/null +++ b/bin/v2/products.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'mindee/v2/product' + +# NOTE: keep product names as string instead of symbols due to kebab-case. + +V2_PRODUCTS = { + 'classification' => { + description: 'Classification Utility', + response_class: Mindee::V2::Product::Classification::Classification, + }, + 'extraction' => { + description: 'Extraction Inference', + response_class: Mindee::V2::Product::Extraction::Extraction, + rag: true, + polygon: true, + confidence: true, + raw_text: true, + text_context: true, + data_schema: true, + }, + 'crop' => { + description: 'Crop Utility', + response_class: Mindee::V2::Product::Crop::Crop, + }, + 'ocr' => { + description: 'OCR Utility', + response_class: Mindee::V2::Product::Ocr::Ocr, + }, + 'split' => { + description: 'Split Utility', + response_class: Mindee::V2::Product::Split::Split, + }, +}.freeze diff --git a/docs/code_samples/v2_extraction.txt b/docs/code_samples/v2_extraction.txt index 3c9d57e2c..aeb416b96 100644 --- a/docs/code_samples/v2_extraction.txt +++ b/docs/code_samples/v2_extraction.txt @@ -11,7 +11,7 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -inference_params = { +extraction_params = { # ID of the model, required. model_id: model_id, @@ -35,7 +35,7 @@ input_source = Mindee::Input::Source::PathInputSource.new(input_path) response = mindee_client.enqueue_and_get_result( Mindee::V2::Product::Extraction::Extraction, input_source, - inference_params + extraction_params ) # Print a brief summary of the parsed data diff --git a/docs/code_samples/v2_extraction_webhook.txt b/docs/code_samples/v2_extraction_webhook.txt index a68f31eb7..dcbbc57e3 100644 --- a/docs/code_samples/v2_extraction_webhook.txt +++ b/docs/code_samples/v2_extraction_webhook.txt @@ -10,7 +10,7 @@ model_id = 'MY_MODEL_ID' # Init a new client mindee_client = Mindee::ClientV2.new(api_key: api_key) -inference_params = { +extraction_params = { # ID of the model, required. model_id: model_id, # Add any number of webhook IDs here. @@ -36,7 +36,7 @@ input_source = Mindee::Input::Source::PathInputSource.new(input_path) response = mindee_client.enqueue( Mindee::V2::Product::Extraction::Extraction, input_source, - inference_params + extraction_params ) # Print the job ID diff --git a/lib/mindee.rb b/lib/mindee.rb index d05a94203..a9020a0f4 100644 --- a/lib/mindee.rb +++ b/lib/mindee.rb @@ -90,6 +90,13 @@ module V2 # Product-specific module. module Product end + + # V2 parsing module. + module Parsing + # V2 search module. + module Search + end + end end end diff --git a/lib/mindee/client.rb b/lib/mindee/client.rb index 39ef0dec8..cf21f03ce 100644 --- a/lib/mindee/client.rb +++ b/lib/mindee/client.rb @@ -162,7 +162,7 @@ def parse_sync(input_source, product_class, endpoint, options) options ) - Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s) + Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http) end # Enqueue a document for async parsing @@ -200,7 +200,7 @@ def enqueue(input_source, product_class, endpoint: nil, options: {}) input_source, opts ) - Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json) + Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http) end # Parses a queued document @@ -215,7 +215,7 @@ def parse_queued(job_id, product_class, endpoint: nil) endpoint = initialize_endpoint(product_class) if endpoint.nil? logger.debug("Fetching queued document as '#{endpoint.url_root}'") prediction, raw_http = endpoint.parse_async(job_id) - Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json) + Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http) end # Enqueue a document for async parsing and automatically try to retrieve it diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index 39def57cc..d591fe08d 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -19,47 +19,27 @@ def initialize(api_key: '') @mindee_api = Mindee::HTTP::MindeeApiV2.new(api_key: api_key) end - # Retrieves an inference. - # @param inference_id [String] - # @return [Mindee::Parsing::V2::InferenceResponse] - def get_inference(inference_id) - @mindee_api.req_get_inference(inference_id) - end - # Retrieves a result from a given queue or URL to the result. # @param product [Class] The return class. # @param resource [String] ID of the inference or URL to the result. - # @return [Mindee::Parsing::V2::BaseResponse] + # @return [Mindee::V2::Parsing::BaseResponse] def get_result(product, resource) @mindee_api.req_get_result(product, resource) end # Retrieves an inference from a given queue or URL to the job. # @param job_id [String] ID of the job. - # @return [Mindee::Parsing::V2::JobResponse] + # @return [Mindee::V2::Parsing::JobResponse] def get_job(job_id) @mindee_api.req_get_job(job_id) end - # Enqueue a document for async parsing. - # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # The source of the input document (local file or URL). - # @param params [Hash, InferenceParameters] - # @return [Mindee::Parsing::V2::JobResponse] - def enqueue_inference(input_source, params, disable_redundant_warnings: false) - unless disable_redundant_warnings - warn '[DEPRECATION] `enqueue_inference` is deprecated; use `enqueue` instead.', uplevel: 1 - end - normalized_params = normalize_parameters(Input::InferenceParameters, params) - enqueue(Mindee::Parsing::V2::Inference, input_source, normalized_params) - end - # Enqueue a document for async parsing. # @param product [Class] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). - # @param params [Hash, InferenceParameters] Parameters for the inference. - # @return [Mindee::Parsing::V2::JobResponse] + # @param params [Hash, Input::BaseParameters] Parameters for the inference. + # @return [Mindee::V2::Parsing::JobResponse] def enqueue( product, input_source, @@ -77,8 +57,8 @@ def enqueue( # @param product [Class] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). - # @param params [Hash, InferenceParameters] Parameters for the inference. - # @return [Mindee::Parsing::Common::ApiResponse] + # @param params [Hash, Input::BaseParameters] Parameters for the inference. + # @return [Parsing::BaseResponse] def enqueue_and_get_result( product, input_source, @@ -130,26 +110,17 @@ def enqueue_and_get_result( "Asynchronous parsing request timed out after #{sec_count} seconds" end - # Enqueue a document for async parsing and automatically try to retrieve it. - # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # The source of the input document (local file or URL). - # @param params [Hash, InferenceParameters] Parameters for the inference. - # @return [Mindee::Parsing::V2::InferenceResponse] - def enqueue_and_get_inference(input_source, params, disable_redundant_warnings: false) - unless disable_redundant_warnings - warn '[DEPRECATION] `enqueue_and_get_inference` is deprecated; use `enqueue_and_get_result` instead.', - uplevel: 1 - end - - response = enqueue_and_get_result(Mindee::Parsing::V2::Inference, input_source, params) - unless response.is_a?(Mindee::Parsing::V2::InferenceResponse) - raise TypeError, "Invalid response type \"#{response.class}\"" - end - - response + # Searches for a list of available models for the given API key. + # @param model_name [String] + # @param model_type [String] + # @return [Mindee::V2::Parsing::Search::SearchResponse] + def search_models(model_name, model_type) + @mindee_api.search_models(model_name, model_type) end - # If needed, converts the parsing options provided as a hash into a proper InferenceParameters object. + private + + # If needed, converts the parsing options provided as a hash into a proper BaseParameters subclass object. # @param params [Hash, Class] Params. # @return [BaseParameters] def normalize_parameters(param_class, params) diff --git a/lib/mindee/errors/mindee_http_error_v2.rb b/lib/mindee/errors/mindee_http_error_v2.rb index 90af794ab..a482d1df9 100644 --- a/lib/mindee/errors/mindee_http_error_v2.rb +++ b/lib/mindee/errors/mindee_http_error_v2.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require_relative 'mindee_error' -require_relative '../parsing/v2/error_item' +require_relative '../v2/parsing/error_item' module Mindee module Errors @@ -18,9 +18,9 @@ class MindeeHTTPErrorV2 < MindeeError # @return [Array] A list of explicit error details. attr_reader :errors - # @param http_error [Hash, Mindee::Parsing::V2::ErrorResponse] + # @param http_error [Hash, Mindee::V2::Parsing::ErrorResponse] def initialize(http_error) - if http_error.is_a?(Parsing::V2::ErrorResponse) + if http_error.is_a?(V2::Parsing::ErrorResponse) http_error = { 'detail' => http_error.detail, 'status' => http_error.status, 'title' => http_error.title, @@ -33,7 +33,7 @@ def initialize(http_error) @code = http_error['code'] @errors = if http_error.key?('errors') http_error['errors'].map do |error| - Mindee::Parsing::V2::ErrorItem.new(error) + Mindee::V2::Parsing::ErrorItem.new(error) end else [] diff --git a/lib/mindee/http/api_settings_v2.rb b/lib/mindee/http/api_settings_v2.rb index 2ae2dcc08..c3b59e895 100644 --- a/lib/mindee/http/api_settings_v2.rb +++ b/lib/mindee/http/api_settings_v2.rb @@ -14,7 +14,7 @@ class ApiSettingsV2 # V2 base URL default environment key name. MINDEE_V2_BASE_URL_ENV_NAME = 'MINDEE_V2_BASE_URL' # V2 base URL default value. - MINDEE_V2_BASE_URL_DEFAULT = 'https://api-v2.mindee.net/v2' + MINDEE_V2_BASE_URL_DEFAULT = 'https://api-v2.mindee.net' # HTTP request timeout default environment key name. MINDEE_V2_REQUEST_TIMEOUT_ENV_NAME = 'MINDEE_V2_REQUEST_TIMEOUT' diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 1c1d4b7ef..a7dcbffc2 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -3,7 +3,6 @@ require_relative 'api_settings_v2' require_relative '../input' require_relative '../errors' -require_relative '../parsing/v2' module Mindee module HTTP @@ -17,11 +16,11 @@ def initialize(api_key: nil) @settings = ApiSettingsV2.new(api_key: api_key) end - # Sends a file to the inference queue. + # Sends a file to the queue. # # @param input_source [Input::Source::LocalInputSource, Input::Source::URLInputSource] # @param params [Input::BaseParameters] - # @return [Mindee::Parsing::V2::JobResponse] + # @return [Mindee::V2::Parsing::JobResponse] # @raise [Mindee::Errors::MindeeHttpErrorV2] def req_post_enqueue(input_source, params) @settings.check_api_key @@ -29,21 +28,13 @@ def req_post_enqueue(input_source, params) input_source, params ) - Mindee::Parsing::V2::JobResponse.new(process_response(response)) - end - - # Retrieves a queued inference. - # - # @param inference_id [String] - # @return [Mindee::Parsing::V2::InferenceResponse] - def req_get_inference(inference_id) - req_get_result(Parsing::V2::Inference, inference_id) + Mindee::V2::Parsing::JobResponse.new(process_response(response)) end # Retrieves a result from a given queue. # @param product [Class] The return class. # @param resource [String] ID of the inference or URL to the result. - # @return [Mindee::Parsing::V2::BaseResponse] + # @return [Mindee::V2::Parsing::BaseResponse] def req_get_result(product, resource) return req_get_result_url(product.response_type, resource) if uri?(resource) @@ -58,48 +49,80 @@ def req_get_result(product, resource) # Retrieves a queued job. # # @param job_id [String] ID of the job or URL to the job. - # @return [Mindee::Parsing::V2::JobResponse] + # @return [Mindee::V2::Parsing::JobResponse] def req_get_job(job_id) @settings.check_api_key - response = inference_job_req_get( - job_id - ) - Mindee::Parsing::V2::JobResponse.new(process_response(response)) + response = poll("#{@settings.base_url}/v2/jobs/#{job_id}") + Mindee::V2::Parsing::JobResponse.new(process_response(response)) + end + + # Retrieves a list of models. + # @param model_name [String, nil] + # @param model_type [String, nil] + # @return [Mindee::V2::Parsing::Search::SearchResponse] + def search_models(model_name, model_type) + Mindee::V2::Parsing::Search::SearchResponse.new(process_response(req_get_search_models(model_name, model_type))) end private + # Retrieves a list of models. + # @param model_name [String, nil] + # @param model_type [String, nil] + # @return [Net::HTTPResponse] + def req_get_search_models(model_name, model_type) + url = "#{@settings.base_url}/v2/search/models" + uri = URI(url) + + query_params = {} + query_params[:name] = model_name if model_name + query_params[:model_type] = model_type if model_type + uri.query = URI.encode_www_form(query_params) unless query_params.empty? + + headers = { + 'Authorization' => @settings.api_key, + 'User-Agent' => @settings.user_agent, + } + req = Net::HTTP::Get.new(uri, headers) + req['Transfer-Encoding'] = 'chunked' + + Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @settings.request_timeout) do |http| + return http.request(req) + end + raise Mindee::Errors::MindeeError, 'Could not resolve server response.' + end + + # @param resource [String] Resource to check. + # @return [Boolean] + def uri?(resource) + uri = URI.parse(resource) + throw Mindee::Errors::MindeeError, 'HTTP is not supported.' if uri.scheme == 'http' + uri.scheme == 'https' + rescue URI::BadURIError, URI::InvalidURIError + false + end + # Retrieves a queued job. # # @param url [String] - # @return [Mindee::Parsing::V2::JobResponse] + # @return [Mindee::V2::Parsing::JobResponse] def req_get_job_url(url) @settings.check_api_key response = poll(url) - Mindee::Parsing::V2::JobResponse.new(process_response(response)) + Mindee::V2::Parsing::JobResponse.new(process_response(response)) end # Retrieves a queued job. # - # @param result_class [Mindee::V2::Parsing::BaseResponse] + # @param result_class [Class] # @param url [String] - # @return [Mindee::Parsing::V2::JobResponse] + # @return [Mindee::V2::Parsing::BaseResponse] def req_get_result_url(result_class, url) @settings.check_api_key response = poll(url) result_class.new(process_response(response)) end - # @param resource [String] Resource to check. - # @return [Boolean] - def uri?(resource) - uri = URI.parse(resource) - throw Mindee::Errors::MindeeError, 'HTTP is not supported.' if uri.scheme == 'http' - uri.scheme == 'https' - rescue URI::BadURIError, URI::InvalidURIError - false - end - # Converts an HTTP response to a parsed response object. # # @param response [Net::HTTPResponse, nil] @@ -137,45 +160,29 @@ def poll(url) raise Mindee::Errors::MindeeError, 'Could not resolve server response.' end - # Polls the API for the status of a job. - # - # @param job_id [String] ID of the job. - # @return [Net::HTTPResponse] - def inference_job_req_get(job_id) - poll("#{@settings.base_url}/jobs/#{job_id}") - end - - # Polls the API for the result of an inference. - # - # @param queue_id [String] ID of the queue. - # @return [Net::HTTPResponse] - def inference_result_req_get(queue_id) - poll("#{@settings.base_url}/inferences/#{queue_id}") - end - # Polls the API for the result of an inference. # # @param queue_id [String] ID of the queue. # @param product [Class] The return class. # @return [Net::HTTPResponse] def result_req_get(queue_id, product) - poll("#{@settings.base_url}/products/#{product.slug}/results/#{queue_id}") + poll("#{@settings.base_url}/v2/products/#{product.slug}/results/#{queue_id}") end # Handle parameters for the enqueue form # @param form_data [Array] Array of form fields - # @param params [Input::InferenceParameters] Inference options. + # @param params [V2::Product::Extraction::Params::ExtractionParameters] Inference options. def enqueue_form_options(form_data, params) - # deal with optional features form_data.push(['rag', params.rag.to_s]) unless params.rag.nil? form_data.push(['raw_text', params.raw_text.to_s]) unless params.raw_text.nil? form_data.push(['polygon', params.polygon.to_s]) unless params.polygon.nil? form_data.push(['confidence', params.confidence.to_s]) unless params.confidence.nil? - form_data.push ['file_alias', params.file_alias] if params.file_alias form_data.push ['text_context', params.text_context] if params.text_context form_data.push ['data_schema', params.data_schema.to_s] if params.data_schema unless params.webhook_ids.nil? || params.webhook_ids.empty? - form_data.push ['webhook_ids', params.webhook_ids.join(',')] + params.webhook_ids.each do |webhook_id| + form_data.push ['webhook_ids[]', webhook_id] + end end form_data end @@ -184,15 +191,19 @@ def enqueue_form_options(form_data, params) # @param params [Input::BaseParameters] Inference options. # @return [Net::HTTPResponse, nil] def enqueue(input_source, params) - uri = URI("#{@settings.base_url}/products/#{params.slug}/enqueue") + uri = URI("#{@settings.base_url}/v2/products/#{params.slug}/enqueue") form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) - [['url', input_source.url]] # : Array[untyped] + [['url', input_source.url]] # : Array[Array[untyped]] else file_data, file_metadata = input_source.read_contents(close: params.close_file) - [['file', file_data, file_metadata]] # : Array[untyped] + [['file', file_data, file_metadata]] # : Array[Array[untyped]] end form_data.push(['model_id', params.model_id]) + form_data.push ['file_alias', params.file_alias] if params.file_alias + if params.is_a?(V2::Product::Extraction::Params::ExtractionParameters) + form_data = enqueue_form_options(form_data, params) + end form_data = params.append_form_data(form_data) diff --git a/lib/mindee/input.rb b/lib/mindee/input.rb index 9bbf38a87..ea85e6fd7 100644 --- a/lib/mindee/input.rb +++ b/lib/mindee/input.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true require_relative 'input/data_schema' -require_relative 'input/inference_parameters' require_relative 'input/polling_options' require_relative 'input/sources' diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb deleted file mode 100644 index a377d052c..000000000 --- a/lib/mindee/input/inference_parameters.rb +++ /dev/null @@ -1,119 +0,0 @@ -# frozen_string_literal: true - -require_relative 'data_schema' -require_relative '../input/base_parameters' - -module Mindee - module Input - # Parameters to set when sending a file for inference. - class InferenceParameters < Mindee::Input::BaseParameters - # @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation. - attr_reader :rag - - # @return [Boolean, nil] Extract the full text content from the document as strings, - # and fill the raw_text` attribute. - attr_reader :raw_text - - # @return [Boolean, nil] Calculate bounding box polygons for all fields, - # and fill their `locations` attribute. - attr_reader :polygon - - # @return [Boolean, nil] Boost the precision and accuracy of all extractions. - # Calculate confidence scores for all fields, and fill their confidence attribute. - attr_reader :confidence - - # @return [String, nil] Additional text context used by the model during inference. - # Not recommended, for specific use only. - attr_reader :text_context - - # @return [DataSchemaField] - attr_reader :data_schema - - # @return [String] Slug for the endpoint. - def self.slug - 'extraction' - end - - # rubocop:disable Metrics/ParameterLists - # @param [String] model_id ID of the model - # @param [Boolean, nil] rag Whether to enable RAG. - # @param [Boolean, nil] raw_text Whether to enable rax text. - # @param [Boolean, nil] polygon Whether to enable polygons. - # @param [Boolean, nil] confidence Whether to enable confidence scores. - # @param [String, nil] file_alias File alias, if applicable. - # @param [Array, nil] webhook_ids - # @param [String, nil] text_context - # @param [Hash, nil] polling_options - # @param [Boolean, nil] close_file - # @param [DataSchemaField, String, Hash nil] data_schema - def initialize( - model_id, - rag: nil, - raw_text: nil, - polygon: nil, - confidence: nil, - file_alias: nil, - webhook_ids: nil, - text_context: nil, - polling_options: nil, - close_file: true, - data_schema: nil - ) - super( - model_id, - file_alias: file_alias, - webhook_ids: webhook_ids, - polling_options: polling_options, - close_file: close_file - ) - - @rag = rag - @raw_text = raw_text - @polygon = polygon - @confidence = confidence - @text_context = text_context - @data_schema = DataSchema.new(data_schema) unless data_schema.nil? - # rubocop:enable Metrics/ParameterLists - end - - # Appends inference-specific form data to the provided array. - # @param [Array] form_data Array of form fields - # @return [Array] - def append_form_data(form_data) - new_form_data = super - - new_form_data.push(['rag', @rag.to_s]) unless @rag.nil? - new_form_data.push(['raw_text', @raw_text.to_s]) unless @raw_text.nil? - new_form_data.push(['polygon', @polygon.to_s]) unless @polygon.nil? - new_form_data.push(['confidence', @confidence.to_s]) unless @confidence.nil? - new_form_data.push(['text_context', @text_context]) if @text_context - new_form_data.push(['data_schema', @data_schema.to_s]) if @data_schema - - new_form_data - end - - # Loads a prediction from a Hash. - # @param [Hash] params Parameters to provide as a hash. - # @return [InferenceParameters] - def self.from_hash(params: {}) - rag = params.fetch(:rag, nil) - raw_text = params.fetch(:raw_text, nil) - polygon = params.fetch(:polygon, nil) - confidence = params.fetch(:confidence, nil) - base_params = load_from_hash(params: params) - new_params = base_params.merge(rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence) - model_id = new_params.fetch(:model_id) - - InferenceParameters.new( - model_id, rag: rag, - raw_text: raw_text, - polygon: polygon, - confidence: confidence, - file_alias: params.fetch(:file_alias, nil), - webhook_ids: params.fetch(:webhook_ids, nil), - close_file: params.fetch(:close_file, true) - ) - end - end - end -end diff --git a/lib/mindee/input/local_response.rb b/lib/mindee/input/local_response.rb index 5e6c14400..b32711e5e 100644 --- a/lib/mindee/input/local_response.rb +++ b/lib/mindee/input/local_response.rb @@ -70,12 +70,10 @@ def valid_hmac_signature?(secret_key, signature) end # Deserializes a loaded response - # @param response_class [Parsing::V2::CommonResponse] class to return. - # @return [Parsing::V2::JobResponse, Mindee::V2::Parsing::CommonResponse] + # @param response_class [Class] class to return. + # @return [V2::Parsing::JobResponse, Mindee::V2::Parsing::BaseResponse] def deserialize_response(response_class) - response_class.new(as_hash) # : Mindee::Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse - rescue StandardError - raise Errors::MindeeInputError, 'Invalid response provided.' + response_class.new(as_hash) # : Mindee::V2::Parsing::JobResponse | Mindee::V2::Parsing::BaseResponse end end end diff --git a/lib/mindee/logging/logger.rb b/lib/mindee/logging/logger.rb index ec60b5eb6..2bc2c62d3 100644 --- a/lib/mindee/logging/logger.rb +++ b/lib/mindee/logging/logger.rb @@ -5,8 +5,16 @@ module Mindee # Mindee logging module. module Logging - @logger = Logger.new($stdout) log_level = ENV.fetch('MINDEE_LOG_LEVEL', 'WARN') + log_output = ENV.fetch('MINDEE_LOG_OUTPUT', 'stderr') + @logger = if log_output == 'stderr' + Logger.new($stderr) + elsif log_output == 'stdout' + Logger.new($stdout) + else + warn "Invalid MINDEE_LOG_OUTPUT='#{log_output}', defaulting to 'stderr'" + Logger.new($stderr) + end @logger.level = Logger.const_get(log_level) class << self diff --git a/lib/mindee/parsing/common/api_response.rb b/lib/mindee/parsing/common/api_response.rb index 7e9f47349..5689de2e9 100644 --- a/lib/mindee/parsing/common/api_response.rb +++ b/lib/mindee/parsing/common/api_response.rb @@ -39,10 +39,10 @@ class ApiResponse # @param product_class [Mindee::Inference] # @param http_response [Hash] - # @param raw_http [Hash] + # @param raw_http [String] def initialize(product_class, http_response, raw_http) logger.debug('Handling API response') - @raw_http = raw_http.to_s + @raw_http = raw_http raise Errors::MindeeAPIError, 'Invalid response format.' unless http_response.key?('api_request') @api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request']) diff --git a/lib/mindee/parsing/v2.rb b/lib/mindee/parsing/v2.rb deleted file mode 100644 index d5ae4ebb8..000000000 --- a/lib/mindee/parsing/v2.rb +++ /dev/null @@ -1,16 +0,0 @@ -# frozen_string_literal: true - -require_relative 'v2/common_response' -require_relative 'v2/error_response' -require_relative 'v2/field' -require_relative 'v2/inference' -require_relative 'v2/inference_file' -require_relative 'v2/inference_model' -require_relative 'v2/inference_response' -require_relative 'v2/inference_result' -require_relative 'v2/inference_active_options' -require_relative 'v2/job' -require_relative 'v2/job_response' -require_relative 'v2/job_webhook' -require_relative 'v2/raw_text' -require_relative 'v2/raw_text_page' diff --git a/lib/mindee/parsing/v2/inference.rb b/lib/mindee/parsing/v2/inference.rb deleted file mode 100644 index 328015640..000000000 --- a/lib/mindee/parsing/v2/inference.rb +++ /dev/null @@ -1,44 +0,0 @@ -# frozen_string_literal: true - -require_relative 'inference_job' -require_relative 'inference_model' -require_relative 'inference_file' -require_relative 'inference_result' -require_relative 'inference_active_options' -require_relative '../../v2/parsing/base_inference' - -module Mindee - module Parsing - module V2 - # Complete data returned by an inference request. - class Inference < Mindee::V2::Parsing::BaseInference - # @return [InferenceActiveOptions] Options which were activated during the inference. - attr_reader :active_options - # @return [InferenceResult] Result contents. - attr_reader :result - - @params_type = Input::InferenceParameters - @slug = 'extraction' - @response_type = InferenceResponse - - # @param server_response [Hash] Hash representation of the JSON returned by the service. - def initialize(server_response) - super - @active_options = InferenceActiveOptions.new(server_response['active_options']) - @result = InferenceResult.new(server_response['result']) - end - - # String representation. - # @return [String] - def to_s - [ - super, - @active_options.to_s, - @result.to_s, - '', - ].join("\n") - end - end - end - end -end diff --git a/lib/mindee/parsing/v2/inference_response.rb b/lib/mindee/parsing/v2/inference_response.rb deleted file mode 100644 index 7911c33ba..000000000 --- a/lib/mindee/parsing/v2/inference_response.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: true - -require_relative 'common_response' -require_relative 'inference' -require_relative '../../v2/parsing/base_response' - -module Mindee - module Parsing - module V2 - # HTTP response wrapper that embeds a V2 Inference. - class InferenceResponse < Mindee::V2::Parsing::BaseResponse - # @return [Inference] Parsed inference payload. - attr_reader :inference - - @slug = 'extraction' - @_params_type = Input::InferenceParameters - - # @param server_response [Hash] Hash parsed from the API JSON response. - def initialize(server_response) - super - - @inference = Inference.new(server_response['inference']) - end - - # String representation. - # @return [String] - def to_s - @inference.to_s - end - end - end - end -end diff --git a/lib/mindee/parsing/v2/inference_result.rb b/lib/mindee/parsing/v2/inference_result.rb deleted file mode 100644 index 8ee1bd1c2..000000000 --- a/lib/mindee/parsing/v2/inference_result.rb +++ /dev/null @@ -1,42 +0,0 @@ -# frozen_string_literal: true - -require_relative 'field/inference_fields' -require_relative 'raw_text' -require_relative 'rag_metadata' - -module Mindee - module Parsing - module V2 - # Wrapper for the result of a V2 inference request. - class InferenceResult - # @return [Mindee::Parsing::V2::Field::InferenceFields] Fields produced by the model. - attr_reader :fields - # @return [Mindee::Parsing::V2::RawText, nil] Optional extra data. - attr_reader :raw_text - # @return [Mindee::Parsing::V2::RAGMetadata, nil] Optional RAG metadata. - attr_reader :rag - - # @param server_response [Hash] Hash version of the JSON returned by the API. - def initialize(server_response) - raise ArgumentError, 'server_response must be a Hash' unless server_response.is_a?(Hash) - - @fields = Field::InferenceFields.new(server_response['fields']) - - @raw_text = server_response['raw_text'] ? RawText.new(server_response['raw_text']) : nil - @rag = (V2::RAGMetadata.new(server_response['rag']) if server_response.key?('rag') && server_response['rag']) - end - - # String representation. - # @return [String] - def to_s - parts = [ - 'Fields', - '======', - @fields.to_s, - ] - parts.join("\n") - end - end - end - end -end diff --git a/lib/mindee/pdf/pdf_processor.rb b/lib/mindee/pdf/pdf_processor.rb index 4f37116f4..4796c0e3c 100644 --- a/lib/mindee/pdf/pdf_processor.rb +++ b/lib/mindee/pdf/pdf_processor.rb @@ -61,6 +61,11 @@ def self.indexes_from_remove(page_indexes, all_pages) # @param io_stream [StringIO] # @return [Origami::PDF] def self.open_pdf(io_stream) + unless PDFTools.pdf_header?(io_stream) + raise Origami::InvalidPDFError, + 'Input stream does not contain a PDF header.' + end + pdf_parser = Origami::PDF::LinearParser.new({ verbosity: Origami::Parser::VERBOSE_QUIET }) io_stream.seek(0) pdf_parser.parse(io_stream) diff --git a/lib/mindee/pdf/pdf_tools.rb b/lib/mindee/pdf/pdf_tools.rb index c3e6bb549..e2ee8bf6e 100644 --- a/lib/mindee/pdf/pdf_tools.rb +++ b/lib/mindee/pdf/pdf_tools.rb @@ -53,10 +53,28 @@ def self.stream_has_text?(stream) text_operators.any? { |op| data.include?(op) } end + # Checks whether a stream contains a PDF header near the beginning. + # @param [StringIO] io_stream Binary-encoded stream. + # @param [Integer] maximum_offset Maximum allowed offset to find '%PDF-'. + # @return [bool] `true` when the stream appears to be a PDF. + def self.pdf_header?(io_stream, maximum_offset: 500) + initial_pos = nil + initial_pos = io_stream.pos if io_stream.respond_to?(:pos) + io_stream.seek(0) + io_stream.gets('%PDF-') + !(io_stream.eof? || io_stream.pos > maximum_offset) + rescue TypeError, IOError, SystemCallError + false + ensure + io_stream.seek(initial_pos) if !initial_pos.nil? && io_stream.respond_to?(:seek) + end + # Checks whether the file has source_text. Sends false if the file isn't a PDF. # @param [StringIO] pdf_data Abinary-encoded stream representing the PDF file. # @return [bool] `true` if the pdf has source text, false otherwise. def self.source_text?(pdf_data) + return false unless pdf_header?(pdf_data) + begin pdf_data.rewind pdf = Origami::PDF.read(pdf_data) diff --git a/lib/mindee/v2.rb b/lib/mindee/v2.rb index 6950e5f93..59d6e4157 100644 --- a/lib/mindee/v2.rb +++ b/lib/mindee/v2.rb @@ -1,4 +1,4 @@ # frozen_string_literal: true -require_relative 'parsing' -require_relative 'product' +require_relative 'v2/parsing' +require_relative 'v2/product' diff --git a/lib/mindee/v2/parsing.rb b/lib/mindee/v2/parsing.rb new file mode 100644 index 000000000..46fd19e4b --- /dev/null +++ b/lib/mindee/v2/parsing.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative 'parsing/common_response' +require_relative 'parsing/error_response' +require_relative 'parsing/field' +require_relative 'parsing/inference_file' +require_relative 'parsing/inference_job' +require_relative 'parsing/inference_model' +require_relative 'parsing/inference_active_options' +require_relative 'parsing/job' +require_relative 'parsing/job_response' +require_relative 'parsing/job_webhook' +require_relative 'parsing/rag_metadata' +require_relative 'parsing/raw_text' +require_relative 'parsing/raw_text_page' +require_relative 'parsing/search' diff --git a/lib/mindee/v2/parsing/base_inference.rb b/lib/mindee/v2/parsing/base_inference.rb index 31cbe87cf..8e62cf022 100644 --- a/lib/mindee/v2/parsing/base_inference.rb +++ b/lib/mindee/v2/parsing/base_inference.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require_relative '../product/base_product' -require_relative '../../parsing/v2/inference_response' +require_relative '../parsing' module Mindee module V2 @@ -10,9 +10,9 @@ module Parsing class BaseInference < Mindee::V2::Product::BaseProduct # @return [InferenceJob] Metadata about the job. attr_reader :job - # @return [Parsing::V2::InferenceModel] Model info for the inference. + # @return [V2::Parsing::InferenceModel] Model info for the inference. attr_reader :model - # @return [Parsing::V2::InferenceFile] File info for the inference. + # @return [V2::Parsing::InferenceFile] File info for the inference. attr_reader :file # @return [String] ID of the inference. attr_reader :id @@ -21,10 +21,10 @@ def initialize(http_response) raise ArgumentError, 'Server response must be a Hash' unless http_response.is_a?(Hash) super() - @model = Mindee::Parsing::V2::InferenceModel.new(http_response['model']) - @file = Mindee::Parsing::V2::InferenceFile.new(http_response['file']) + @model = Mindee::V2::Parsing::InferenceModel.new(http_response['model']) + @file = Mindee::V2::Parsing::InferenceFile.new(http_response['file']) @id = http_response['id'] - @job = Mindee::Parsing::V2::InferenceJob.new(http_response['job']) if http_response.key?('job') + @job = Mindee::V2::Parsing::InferenceJob.new(http_response['job']) if http_response.key?('job') end # String representation. diff --git a/lib/mindee/v2/parsing/base_response.rb b/lib/mindee/v2/parsing/base_response.rb index a604e1564..126baeec5 100644 --- a/lib/mindee/v2/parsing/base_response.rb +++ b/lib/mindee/v2/parsing/base_response.rb @@ -1,10 +1,12 @@ # frozen_string_literal: true +require_relative 'common_response' + module Mindee module V2 module Parsing # Base class for V2 inference responses. - class BaseResponse < Mindee::Parsing::V2::CommonResponse + class BaseResponse < Mindee::V2::Parsing::CommonResponse # @return [BaseInference] The inference result for a split utility request attr_reader :inference end diff --git a/lib/mindee/parsing/v2/common_response.rb b/lib/mindee/v2/parsing/common_response.rb similarity index 69% rename from lib/mindee/parsing/v2/common_response.rb rename to lib/mindee/v2/parsing/common_response.rb index b28938042..bcdb7223a 100644 --- a/lib/mindee/parsing/v2/common_response.rb +++ b/lib/mindee/v2/parsing/common_response.rb @@ -1,16 +1,18 @@ # frozen_string_literal: true +require 'json' + module Mindee - module Parsing - module V2 + module V2 + module Parsing # Base class for inference and job responses on the V2 API. class CommonResponse - # @return [Hash] + # @return [String] attr_reader :raw_http # @param http_response [Hash] def initialize(http_response) - @raw_http = http_response + @raw_http = JSON.generate(http_response) end end end diff --git a/lib/mindee/parsing/v2/error_item.rb b/lib/mindee/v2/parsing/error_item.rb similarity index 94% rename from lib/mindee/parsing/v2/error_item.rb rename to lib/mindee/v2/parsing/error_item.rb index bc81da6a0..d77475783 100644 --- a/lib/mindee/parsing/v2/error_item.rb +++ b/lib/mindee/v2/parsing/error_item.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Individual error item. class ErrorItem # @return [String, nil] A JSON Pointer to the location of the body property. diff --git a/lib/mindee/parsing/v2/error_response.rb b/lib/mindee/v2/parsing/error_response.rb similarity index 98% rename from lib/mindee/parsing/v2/error_response.rb rename to lib/mindee/v2/parsing/error_response.rb index 6745603e3..cec156101 100644 --- a/lib/mindee/parsing/v2/error_response.rb +++ b/lib/mindee/v2/parsing/error_response.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Encapsulates information returned by the API when an error occurs. class ErrorResponse # @return [Integer] The HTTP status code returned by the server. diff --git a/lib/mindee/parsing/v2/field.rb b/lib/mindee/v2/parsing/field.rb similarity index 100% rename from lib/mindee/parsing/v2/field.rb rename to lib/mindee/v2/parsing/field.rb diff --git a/lib/mindee/parsing/v2/field/base_field.rb b/lib/mindee/v2/parsing/field/base_field.rb similarity index 98% rename from lib/mindee/parsing/v2/field/base_field.rb rename to lib/mindee/v2/parsing/field/base_field.rb index 1256c6425..89913d73e 100644 --- a/lib/mindee/parsing/v2/field/base_field.rb +++ b/lib/mindee/v2/parsing/field/base_field.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Field submodule for V2. module Field # Base class for V2 fields. diff --git a/lib/mindee/parsing/v2/field/field_confidence.rb b/lib/mindee/v2/parsing/field/field_confidence.rb similarity index 99% rename from lib/mindee/parsing/v2/field/field_confidence.rb rename to lib/mindee/v2/parsing/field/field_confidence.rb index 0570b7a90..647196742 100644 --- a/lib/mindee/parsing/v2/field/field_confidence.rb +++ b/lib/mindee/v2/parsing/field/field_confidence.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # Confidence level of a field as returned by the V2 API. class FieldConfidence diff --git a/lib/mindee/parsing/v2/field/field_location.rb b/lib/mindee/v2/parsing/field/field_location.rb similarity index 96% rename from lib/mindee/parsing/v2/field/field_location.rb rename to lib/mindee/v2/parsing/field/field_location.rb index d5869b57a..5b1e49bcd 100644 --- a/lib/mindee/parsing/v2/field/field_location.rb +++ b/lib/mindee/v2/parsing/field/field_location.rb @@ -3,8 +3,8 @@ require_relative '../../../geometry/polygon' module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # Location of a field. class FieldLocation diff --git a/lib/mindee/parsing/v2/field/inference_fields.rb b/lib/mindee/v2/parsing/field/inference_fields.rb similarity index 99% rename from lib/mindee/parsing/v2/field/inference_fields.rb rename to lib/mindee/v2/parsing/field/inference_fields.rb index 8d1094c90..16aa7fc8e 100644 --- a/lib/mindee/parsing/v2/field/inference_fields.rb +++ b/lib/mindee/v2/parsing/field/inference_fields.rb @@ -3,8 +3,8 @@ require_relative 'base_field' module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # Represents a hash-like collection of inference fields, providing methods for # retrieval and string representation. diff --git a/lib/mindee/parsing/v2/field/list_field.rb b/lib/mindee/v2/parsing/field/list_field.rb similarity index 98% rename from lib/mindee/parsing/v2/field/list_field.rb rename to lib/mindee/v2/parsing/field/list_field.rb index 33bb5470e..ab8c36a1c 100644 --- a/lib/mindee/parsing/v2/field/list_field.rb +++ b/lib/mindee/v2/parsing/field/list_field.rb @@ -3,8 +3,8 @@ require_relative 'base_field' module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # Represents a field that contains a list of items. class ListField < BaseField diff --git a/lib/mindee/parsing/v2/field/object_field.rb b/lib/mindee/v2/parsing/field/object_field.rb similarity index 99% rename from lib/mindee/parsing/v2/field/object_field.rb rename to lib/mindee/v2/parsing/field/object_field.rb index 39a1f548b..a0ce89a49 100644 --- a/lib/mindee/parsing/v2/field/object_field.rb +++ b/lib/mindee/v2/parsing/field/object_field.rb @@ -4,8 +4,8 @@ require_relative 'inference_fields' module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # A field containing a nested set of inference fields. class ObjectField < BaseField diff --git a/lib/mindee/parsing/v2/field/simple_field.rb b/lib/mindee/v2/parsing/field/simple_field.rb similarity index 98% rename from lib/mindee/parsing/v2/field/simple_field.rb rename to lib/mindee/v2/parsing/field/simple_field.rb index f5e1903e2..2bc8f675c 100644 --- a/lib/mindee/parsing/v2/field/simple_field.rb +++ b/lib/mindee/v2/parsing/field/simple_field.rb @@ -3,8 +3,8 @@ require_relative 'base_field' module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # A simple field with a scalar value. class SimpleField < BaseField diff --git a/lib/mindee/parsing/v2/inference_active_options.rb b/lib/mindee/v2/parsing/inference_active_options.rb similarity index 98% rename from lib/mindee/parsing/v2/inference_active_options.rb rename to lib/mindee/v2/parsing/inference_active_options.rb index 47d884730..32db887a3 100644 --- a/lib/mindee/parsing/v2/inference_active_options.rb +++ b/lib/mindee/v2/parsing/inference_active_options.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Data schema options activated during the inference. class DataSchemaActiveOption # @return [Boolean] diff --git a/lib/mindee/parsing/v2/inference_file.rb b/lib/mindee/v2/parsing/inference_file.rb similarity index 97% rename from lib/mindee/parsing/v2/inference_file.rb rename to lib/mindee/v2/parsing/inference_file.rb index 78dda706b..6758c6984 100644 --- a/lib/mindee/parsing/v2/inference_file.rb +++ b/lib/mindee/v2/parsing/inference_file.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Information about a file returned in an inference. class InferenceFile # @return [String] File name. diff --git a/lib/mindee/parsing/v2/inference_job.rb b/lib/mindee/v2/parsing/inference_job.rb similarity index 94% rename from lib/mindee/parsing/v2/inference_job.rb rename to lib/mindee/v2/parsing/inference_job.rb index e6b6f4b6a..c6e3c4764 100644 --- a/lib/mindee/parsing/v2/inference_job.rb +++ b/lib/mindee/v2/parsing/inference_job.rb @@ -4,8 +4,8 @@ require_relative 'job' module Mindee - module Parsing - module V2 + module V2 + module Parsing # HTTP response wrapper that embeds a V2 job. class InferenceJob # @return [String] UUID of the Job. diff --git a/lib/mindee/parsing/v2/inference_model.rb b/lib/mindee/v2/parsing/inference_model.rb similarity index 95% rename from lib/mindee/parsing/v2/inference_model.rb rename to lib/mindee/v2/parsing/inference_model.rb index 85cb4eb8d..fa6b19f9d 100644 --- a/lib/mindee/parsing/v2/inference_model.rb +++ b/lib/mindee/v2/parsing/inference_model.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # ID of the model that produced the inference. class InferenceModel # @return [String] Identifier of the model. diff --git a/lib/mindee/parsing/v2/job.rb b/lib/mindee/v2/parsing/job.rb similarity index 99% rename from lib/mindee/parsing/v2/job.rb rename to lib/mindee/v2/parsing/job.rb index 3185397f4..41b091c3a 100644 --- a/lib/mindee/parsing/v2/job.rb +++ b/lib/mindee/v2/parsing/job.rb @@ -5,8 +5,8 @@ require_relative 'job_webhook' module Mindee - module Parsing - module V2 + module V2 + module Parsing # Metadata returned when polling a job (asynchronous request). class Job # @return [String] Unique job identifier. diff --git a/lib/mindee/parsing/v2/job_response.rb b/lib/mindee/v2/parsing/job_response.rb similarity index 95% rename from lib/mindee/parsing/v2/job_response.rb rename to lib/mindee/v2/parsing/job_response.rb index 70aab84ac..5515a11ba 100644 --- a/lib/mindee/parsing/v2/job_response.rb +++ b/lib/mindee/v2/parsing/job_response.rb @@ -4,8 +4,8 @@ require_relative 'job' module Mindee - module Parsing - module V2 + module V2 + module Parsing # HTTP response wrapper that embeds a V2 job. class JobResponse < CommonResponse # @return [Job] Parsed job payload. diff --git a/lib/mindee/parsing/v2/job_webhook.rb b/lib/mindee/v2/parsing/job_webhook.rb similarity index 95% rename from lib/mindee/parsing/v2/job_webhook.rb rename to lib/mindee/v2/parsing/job_webhook.rb index ac291a0b3..15aae54ad 100644 --- a/lib/mindee/parsing/v2/job_webhook.rb +++ b/lib/mindee/v2/parsing/job_webhook.rb @@ -1,12 +1,11 @@ -# lib/mindee/parsing/v2/job_response_webhook.rb # frozen_string_literal: true require 'date' require_relative 'error_response' module Mindee - module Parsing - module V2 + module V2 + module Parsing # Information about a webhook created for a job response. class JobWebhook # @return [String] Identifier of the webhook. diff --git a/lib/mindee/parsing/v2/rag_metadata.rb b/lib/mindee/v2/parsing/rag_metadata.rb similarity index 92% rename from lib/mindee/parsing/v2/rag_metadata.rb rename to lib/mindee/v2/parsing/rag_metadata.rb index 5bebc6033..e9682413d 100644 --- a/lib/mindee/parsing/v2/rag_metadata.rb +++ b/lib/mindee/v2/parsing/rag_metadata.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Metadata about the RAG operation. class RAGMetadata # The UUID of the matched document used during the RAG operation. diff --git a/lib/mindee/parsing/v2/raw_text.rb b/lib/mindee/v2/parsing/raw_text.rb similarity index 85% rename from lib/mindee/parsing/v2/raw_text.rb rename to lib/mindee/v2/parsing/raw_text.rb index 9fa829f88..033d7f262 100644 --- a/lib/mindee/parsing/v2/raw_text.rb +++ b/lib/mindee/v2/parsing/raw_text.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Raw text extracted from all pages in the document. class RawText - # @return [Array[Mindee::Parsing::V2::RawTextPage]] List of pages with their extracted text content. + # @return [Array[Mindee::V2::Parsing::RawTextPage]] List of pages with their extracted text content. attr_reader :pages # @param server_response [Hash] Raw JSON parsed into a Hash. diff --git a/lib/mindee/parsing/v2/raw_text_page.rb b/lib/mindee/v2/parsing/raw_text_page.rb similarity index 94% rename from lib/mindee/parsing/v2/raw_text_page.rb rename to lib/mindee/v2/parsing/raw_text_page.rb index 3b952b626..bf34ee429 100644 --- a/lib/mindee/parsing/v2/raw_text_page.rb +++ b/lib/mindee/v2/parsing/raw_text_page.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Mindee - module Parsing - module V2 + module V2 + module Parsing # Raw text extracted from a single page. class RawTextPage # @return [String] Text content of the page as a single string. '\n' is used to separate lines. diff --git a/lib/mindee/v2/parsing/search.rb b/lib/mindee/v2/parsing/search.rb new file mode 100644 index 000000000..4972c44e8 --- /dev/null +++ b/lib/mindee/v2/parsing/search.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +require_relative 'search/pagination_metadata' +require_relative 'search/search_model' +require_relative 'search/search_models' +require_relative 'search/search_response' diff --git a/lib/mindee/v2/parsing/search/pagination_metadata.rb b/lib/mindee/v2/parsing/search/pagination_metadata.rb new file mode 100644 index 000000000..207a068a4 --- /dev/null +++ b/lib/mindee/v2/parsing/search/pagination_metadata.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + module Search + # Pagination Metadata data associated with model search. + class PaginationMetadata + # @return [Integer] Number of items per page. + attr_reader :per_page + + # @return [Integer] 1-indexed page number. + attr_reader :page + + # @return [Integer] Total items. + attr_reader :total_items + + # @return [Integer] Total number of pages. + attr_reader :total_pages + + # @param raw_response [Hash] The parsed JSON payload mapping to pagination metadata. + def initialize(raw_response) + @per_page = raw_response['per_page'] + @page = raw_response['page'] + @total_items = raw_response['total_items'] + @total_pages = raw_response['total_pages'] + end + + # String representation of the pagination metadata. + # @return [String] + def to_s + [ + ":Per Page: #{@per_page}", + ":Page: #{@page}", + ":Total Items: #{@total_items}", + ":Total Pages: #{@total_pages}", + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/parsing/search/search_model.rb b/lib/mindee/v2/parsing/search/search_model.rb new file mode 100644 index 000000000..ee3ec5edc --- /dev/null +++ b/lib/mindee/v2/parsing/search/search_model.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + module Search + # Individual model information. + class SearchModel + # @return [String] ID of the model. + attr_reader :id + + # @return [String] Name of the model. + attr_reader :name + + # @return [String] Type of the model. + attr_reader :model_type + + # @param payload [Hash] The parsed JSON payload mapping to the search model. + def initialize(payload) + @id = payload['id'] + @name = payload['name'] + @model_type = payload['model_type'] + end + + # String representation of the model. + # @return [String] + def to_s + [ + ":Name: #{@name}", + ":ID: #{@id}", + ":Model Type: #{@model_type}", + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/parsing/search/search_models.rb b/lib/mindee/v2/parsing/search/search_models.rb new file mode 100644 index 000000000..9e90f0d62 --- /dev/null +++ b/lib/mindee/v2/parsing/search/search_models.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + module Search + # Array of search models. + class SearchModels < Array + def initialize(prediction) + super(prediction.map { |entry| SearchModel.new(entry) }) + end + + # Default string representation. + # @return [String] + def to_s + return "\n" if empty? + + lines = flat_map do |model| + [ + "* :Name: #{model.name}", + " :ID: #{model.id}", + " :Model Type: #{model.model_type}", + ] + end + + # Joins all lines with a newline and appends a final newline + # to perfectly match the C# StringBuilder output. + "#{lines.join("\n")}\n" + end + end + end + end + end +end diff --git a/lib/mindee/v2/parsing/search/search_response.rb b/lib/mindee/v2/parsing/search/search_response.rb new file mode 100644 index 000000000..4ab081d2e --- /dev/null +++ b/lib/mindee/v2/parsing/search/search_response.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + module Search + # Models search response. + class SearchResponse < CommonResponse + # @return [Search::Search] Parsed search payload. + attr_reader :models + # @return [Search::Search] Pagination metadata. + attr_reader :pagination_metadata + + def initialize(server_response) + super + + @models = Search::SearchModels.new(server_response['models']) + @pagination_metadata = PaginationMetadata.new(server_response['pagination']) + end + + def to_s + [ + 'Models', + '######', + models.to_s, + 'Pagination Metadata', + '###################', + pagination_metadata.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/base_product.rb b/lib/mindee/v2/product/base_product.rb index a11e0eb1a..d6633a707 100644 --- a/lib/mindee/v2/product/base_product.rb +++ b/lib/mindee/v2/product/base_product.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative '../parsing/base_response' +require_relative '../../input/base_parameters' module Mindee module V2 diff --git a/lib/mindee/v2/product/classification/classification_inference.rb b/lib/mindee/v2/product/classification/classification_inference.rb index 837c19c27..da7042e5a 100644 --- a/lib/mindee/v2/product/classification/classification_inference.rb +++ b/lib/mindee/v2/product/classification/classification_inference.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative 'classification_result' +require_relative '../../parsing/base_inference' module Mindee module V2 diff --git a/lib/mindee/v2/product/crop/crop_item.rb b/lib/mindee/v2/product/crop/crop_item.rb index 036b8c433..592fc999d 100644 --- a/lib/mindee/v2/product/crop/crop_item.rb +++ b/lib/mindee/v2/product/crop/crop_item.rb @@ -8,13 +8,13 @@ module Crop class CropItem # @return [String] Type or classification of the detected object. attr_reader :object_type - # @return [Parsing::V2::Field::FieldLocation] Coordinates of the detected object on the document. + # @return [V2::Parsing::Field::FieldLocation] Coordinates of the detected object on the document. attr_reader :location # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) @object_type = server_response['object_type'] - @location = Mindee::Parsing::V2::Field::FieldLocation.new(server_response['location']) + @location = Mindee::V2::Parsing::Field::FieldLocation.new(server_response['location']) end # String representation. diff --git a/lib/mindee/v2/product/extraction/extraction.rb b/lib/mindee/v2/product/extraction/extraction.rb index c01afee31..289a78da5 100644 --- a/lib/mindee/v2/product/extraction/extraction.rb +++ b/lib/mindee/v2/product/extraction/extraction.rb @@ -9,7 +9,8 @@ module Product module Extraction # Extraction product. # Note: currently a placeholder for the `Inference` class. - class Extraction < Mindee::Parsing::V2::Inference + class Extraction < Product::BaseProduct + @slug = 'extraction' @params_type = Product::Extraction::Params::ExtractionParameters @response_type = Product::Extraction::ExtractionResponse end diff --git a/lib/mindee/v2/product/extraction/extraction_inference.rb b/lib/mindee/v2/product/extraction/extraction_inference.rb index 08f66d173..e8cc017bb 100644 --- a/lib/mindee/v2/product/extraction/extraction_inference.rb +++ b/lib/mindee/v2/product/extraction/extraction_inference.rb @@ -1,22 +1,37 @@ # frozen_string_literal: true -require_relative '../../../parsing/v2/inference' +require_relative 'extraction_response' require_relative 'extraction_result' +require_relative 'params/extraction_parameters' module Mindee module V2 module Product module Extraction # Extraction inference. - class ExtractionInference < Mindee::Parsing::V2::Inference - # @return [ExtractionResult] Parsed inference payload. + class ExtractionInference < Mindee::V2::Parsing::BaseInference + # @return [InferenceActiveOptions] Options which were activated during the inference. + attr_reader :active_options + # @return [ExtractionResult] Result contents. attr_reader :result # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) super - @result = Mindee::V2::Product::Extraction::ExtractionResult.new(server_response['result']) + @active_options = V2::Parsing::InferenceActiveOptions.new(server_response['active_options']) + @result = ExtractionResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @active_options.to_s, + @result.to_s, + '', + ].join("\n") end end end diff --git a/lib/mindee/v2/product/extraction/extraction_response.rb b/lib/mindee/v2/product/extraction/extraction_response.rb index 5b9f29b7f..af85cad30 100644 --- a/lib/mindee/v2/product/extraction/extraction_response.rb +++ b/lib/mindee/v2/product/extraction/extraction_response.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require_relative '../../../parsing/v2/inference_response' require_relative 'extraction_inference' module Mindee @@ -8,14 +7,24 @@ module V2 module Product module Extraction # HTTP response wrapper that embeds a V2 Inference. - class ExtractionResponse < Mindee::Parsing::V2::InferenceResponse + class ExtractionResponse < Mindee::V2::Parsing::BaseResponse # @return [ExtractionInference] Parsed inference payload. attr_reader :inference + @slug = 'extraction' + @_params_type = Params::ExtractionParameters + def initialize(server_response) super + @inference = Mindee::V2::Product::Extraction::ExtractionInference.new(server_response['inference']) end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end end end end diff --git a/lib/mindee/v2/product/extraction/extraction_result.rb b/lib/mindee/v2/product/extraction/extraction_result.rb index ac080ea3d..132052433 100644 --- a/lib/mindee/v2/product/extraction/extraction_result.rb +++ b/lib/mindee/v2/product/extraction/extraction_result.rb @@ -1,13 +1,42 @@ # frozen_string_literal: true -require_relative '../../../parsing/v2/inference_result' +require_relative '../../parsing' module Mindee module V2 module Product module Extraction # Result of an extraction utility inference. - class ExtractionResult < Mindee::Parsing::V2::InferenceResult + class ExtractionResult + # @return [Mindee::V2::Parsing::Field::InferenceFields] Fields produced by the model. + attr_reader :fields + # @return [Mindee::V2::Parsing::RawText, nil] Optional extra data. + attr_reader :raw_text + # @return [Mindee::V2::Parsing::RAGMetadata, nil] Optional RAG metadata. + attr_reader :rag + + # @param server_response [Hash] Hash version of the JSON returned by the API. + def initialize(server_response) + raise ArgumentError, 'server_response must be a Hash' unless server_response.is_a?(Hash) + + @fields = Parsing::Field::InferenceFields.new(server_response['fields']) + + @raw_text = server_response['raw_text'] ? Parsing::RawText.new(server_response['raw_text']) : nil + return unless server_response.key?('rag') && server_response['rag'] + + @rag = Parsing::RAGMetadata.new(server_response['rag']) + end + + # String representation. + # @return [String] + def to_s + parts = [ + 'Fields', + '======', + @fields.to_s, + ] + parts.join("\n") + end end end end diff --git a/lib/mindee/v2/product/extraction/params/extraction_parameters.rb b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb index daabbc72a..dc5218aec 100644 --- a/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +++ b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative '../../../../input/inference_parameters' +require_relative '../../../../input/base_parameters' module Mindee module V2 @@ -8,8 +8,114 @@ module Product module Extraction module Params # Parameters accepted by the extraction v2 endpoint. - # Currently a placeholder for the InferenceParameters class. - class ExtractionParameters < Input::InferenceParameters + class ExtractionParameters < Mindee::Input::BaseParameters + # @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation. + attr_reader :rag + + # @return [Boolean, nil] Extract the full text content from the document as strings, + # and fill the raw_text` attribute. + attr_reader :raw_text + + # @return [Boolean, nil] Calculate bounding box polygons for all fields, + # and fill their `locations` attribute. + attr_reader :polygon + + # @return [Boolean, nil] Boost the precision and accuracy of all extractions. + # Calculate confidence scores for all fields, and fill their confidence attribute. + attr_reader :confidence + + # @return [String, nil] Additional text context used by the model during inference. + # Not recommended, for specific use only. + attr_reader :text_context + + # @return [DataSchemaField] + attr_reader :data_schema + + # @return [String] Slug for the endpoint. + def self.slug + 'extraction' + end + + # rubocop:disable Metrics/ParameterLists + # @param [String] model_id ID of the model + # @param [Boolean, nil] rag Whether to enable RAG. + # @param [Boolean, nil] raw_text Whether to enable rax text. + # @param [Boolean, nil] polygon Whether to enable polygons. + # @param [Boolean, nil] confidence Whether to enable confidence scores. + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids + # @param [String, nil] text_context + # @param [Hash, nil] polling_options + # @param [Boolean, nil] close_file + # @param [DataSchemaField, String, Hash nil] data_schema + def initialize( + model_id, + rag: nil, + raw_text: nil, + polygon: nil, + confidence: nil, + file_alias: nil, + webhook_ids: nil, + text_context: nil, + polling_options: nil, + close_file: true, + data_schema: nil + ) + super( + model_id, + file_alias: file_alias, + webhook_ids: webhook_ids, + polling_options: polling_options, + close_file: close_file + ) + + @rag = rag + @raw_text = raw_text + @polygon = polygon + @confidence = confidence + @text_context = text_context + @data_schema = Input::DataSchema.new(data_schema) unless data_schema.nil? + # rubocop:enable Metrics/ParameterLists + end + + # Appends inference-specific form data to the provided array. + # @param [Array] form_data Array of form fields + # @return [Array] + def append_form_data(form_data) + new_form_data = super + + new_form_data.push(['rag', @rag.to_s]) unless @rag.nil? + new_form_data.push(['raw_text', @raw_text.to_s]) unless @raw_text.nil? + new_form_data.push(['polygon', @polygon.to_s]) unless @polygon.nil? + new_form_data.push(['confidence', @confidence.to_s]) unless @confidence.nil? + new_form_data.push(['text_context', @text_context]) if @text_context + new_form_data.push(['data_schema', @data_schema.to_s]) if @data_schema + + new_form_data + end + + # Loads a prediction from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [ExtractionParameters] + def self.from_hash(params: {}) + rag = params.fetch(:rag, nil) + raw_text = params.fetch(:raw_text, nil) + polygon = params.fetch(:polygon, nil) + confidence = params.fetch(:confidence, nil) + base_params = load_from_hash(params: params) + new_params = base_params.merge(rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence) + model_id = new_params.fetch(:model_id) + + ExtractionParameters.new( + model_id, rag: rag, + raw_text: raw_text, + polygon: polygon, + confidence: confidence, + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end end end end diff --git a/lib/mindee/version.rb b/lib/mindee/version.rb index 548479647..ab6545c56 100644 --- a/lib/mindee/version.rb +++ b/lib/mindee/version.rb @@ -3,7 +3,7 @@ # Mindee module Mindee # Current version. - VERSION = '4.13.0' + VERSION = '5.0.0-alpha.1' # Finds and return the current platform. # @return [Symbol, Hash[String | Symbol, Regexp], Nil?] diff --git a/mindee.gemspec b/mindee.gemspec index eeba5d4e2..af6cc1bc9 100644 --- a/mindee.gemspec +++ b/mindee.gemspec @@ -22,7 +22,9 @@ Gem::Specification.new do |spec| `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(.github|spec|features)/}) } end spec.bindir = 'bin' - spec.executables = Dir['bin/*'].map { |f| File.basename(f) }.reject { |f| f == 'cli_products.rb' } + spec.executables = Dir.children('bin') + .select { |f| File.file?(File.join('bin', f)) } + .reject { |f| f == 'products.rb' } spec.require_paths = ['lib'] spec.required_ruby_version = Gem::Requirement.new('>= 3.0') diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index 66b5409c2..14290e9e9 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -7,14 +7,13 @@ module Mindee def logger: () -> Logger def initialize: (?api_key: String) -> void - def get_inference: (String inference_id) -> Parsing::V2::InferenceResponse def get_result: [T] (HTTP::_ProductClass[T] product, String resource) -> T - def get_result_url: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T - def get_job: (String job_id) -> Parsing::V2::JobResponse - def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::JobResponse - def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Parsing::V2::JobResponse + def get_job: (String job_id) -> V2::Parsing::JobResponse + def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> V2::Parsing::JobResponse def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> T - def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::InferenceResponse + + def search_models: (String?, String?) -> Mindee::V2::Parsing::Search::SearchResponse + def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Input::BaseParameters end diff --git a/sig/mindee/errors/mindee_http_error_v2.rbs b/sig/mindee/errors/mindee_http_error_v2.rbs index 02e403a18..0c4381e1f 100644 --- a/sig/mindee/errors/mindee_http_error_v2.rbs +++ b/sig/mindee/errors/mindee_http_error_v2.rbs @@ -7,9 +7,9 @@ module Mindee attr_reader status: Integer attr_reader code: String attr_reader title: String - attr_reader errors: Array[Parsing::V2::ErrorItem] + attr_reader errors: Array[V2::Parsing::ErrorItem] - def initialize: (Hash[String, untyped] | Mindee::Parsing::V2::ErrorResponse) -> void + def initialize: (Hash[String, untyped] | Mindee::V2::Parsing::ErrorResponse) -> void end end end diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index c67b09b3a..83c350b95 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -18,21 +18,25 @@ module Mindee def req_get_result: [T] (_ProductClass[T] product, String resource) -> T - def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Parsing::V2::JobResponse - def req_get_inference: (String) -> Parsing::V2::InferenceResponse - def req_get_job: (String) -> Parsing::V2::JobResponse + def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> V2::Parsing::JobResponse + def req_get_job: (String) -> V2::Parsing::JobResponse def process_response: (Net::HTTPResponse?) -> Hash[String | Symbol, untyped] def poll: (String) -> Net::HTTPResponse - def inference_job_req_get: (String) -> Net::HTTPResponse - def inference_result_req_get: (String) -> Net::HTTPResponse def result_req_get: [T] (String, _ProductClass[T] product) -> Net::HTTPResponse def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Net::HTTPResponse? + def search_models: (String?, String?) -> Mindee::V2::Parsing::Search::SearchResponse + private - def req_get_job_url: (String) -> Parsing::V2::JobResponse + + def enqueue_form_options: (Array[Array[untyped]], V2::Product::Extraction::Params::ExtractionParameters) -> Array[Array[untyped]] + + def req_get_job_url: (String) -> V2::Parsing::JobResponse def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T + + def req_get_search_models: (String?, String?) -> Net::HTTPResponse + def uri?: (String) -> bool - def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] end end end diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs deleted file mode 100644 index b88d8ef27..000000000 --- a/sig/mindee/input/inference_parameters.rbs +++ /dev/null @@ -1,32 +0,0 @@ -# lib/mindee/input/inference_parameters.rb -module Mindee - module Input - class InferenceParameters < BaseParameters - def self.slug: -> String - - attr_reader confidence: bool? - attr_reader polygon: bool? - attr_reader rag: bool? - attr_reader raw_text: bool? - attr_reader text_context: String? - attr_reader data_schema: DataSchema? - - def initialize: ( - String, - ?rag: bool?, - ?raw_text: bool?, - ?polygon: bool?, - ?confidence: bool?, - ?file_alias: String?, - ?text_context: String?, - ?webhook_ids: Array[String]?, - ?polling_options: Hash[Symbol | String, untyped] | PollingOptions?, - ?close_file: bool?, - ?data_schema: DataSchema|String|Hash[Symbol | String, untyped]? - ) -> void - - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters - def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] - end - end -end diff --git a/sig/mindee/input/local_response.rbs b/sig/mindee/input/local_response.rbs index 4f0468fba..6b384f018 100644 --- a/sig/mindee/input/local_response.rbs +++ b/sig/mindee/input/local_response.rbs @@ -8,7 +8,7 @@ module Mindee def self.process_secret_key: (String) -> String def get_hmac_signature: (String) -> String def valid_hmac_signature?: (String, String) -> bool - def deserialize_response: (singleton(Parsing::V2::CommonResponse))-> (Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse) + def deserialize_response: (singleton(V2::Parsing::CommonResponse))-> (V2::Parsing::CommonResponse) end end end diff --git a/sig/mindee/parsing/common/api_response.rbs b/sig/mindee/parsing/common/api_response.rbs index 126363bd0..78651d04f 100644 --- a/sig/mindee/parsing/common/api_response.rbs +++ b/sig/mindee/parsing/common/api_response.rbs @@ -15,11 +15,12 @@ module Mindee class ApiResponse + attr_reader document: Parsing::Common::Document? + attr_reader job: Parsing::Common::Job? + attr_reader api_request: Parsing::Common::ApiRequest + attr_reader raw_http: String + def logger: () -> Logger - def document: -> Parsing::Common::Document? - def job: -> Parsing::Common::Job? - def api_request: -> Parsing::Common::ApiRequest? - def raw_http: -> String def initialize: (singleton(Parsing::Common::Inference), Hash[String | Symbol, untyped] | Net::HTTPResponse, String) -> void end end diff --git a/sig/mindee/parsing/v2/common_response.rbs b/sig/mindee/parsing/v2/common_response.rbs deleted file mode 100644 index 8c94189a8..000000000 --- a/sig/mindee/parsing/v2/common_response.rbs +++ /dev/null @@ -1,11 +0,0 @@ -# lib/mindee/parsing/v2/common_response.rb -module Mindee - module Parsing - module V2 - class CommonResponse - attr_reader raw_http: Hash[String | Symbol, untyped] - def initialize: (Hash[String | Symbol, untyped]) -> void - end - end - end -end diff --git a/sig/mindee/parsing/v2/inference.rbs b/sig/mindee/parsing/v2/inference.rbs deleted file mode 100644 index b6584c95a..000000000 --- a/sig/mindee/parsing/v2/inference.rbs +++ /dev/null @@ -1,18 +0,0 @@ -# lib/mindee/parsing/v2/inference.rb -module Mindee - module Parsing - module V2 - class Inference < Mindee::V2::Parsing::BaseInference - attr_reader self.params_type: singleton(Input::InferenceParameters) - attr_reader self.response_type: singleton(InferenceResponse) - attr_reader self.slug: String - - attr_reader active_options: InferenceActiveOptions - attr_reader result: InferenceResult - - def initialize: (Hash[String | Symbol, untyped]) -> void - def to_s: -> String - end - end - end -end diff --git a/sig/mindee/parsing/v2/inference_response.rbs b/sig/mindee/parsing/v2/inference_response.rbs deleted file mode 100644 index 051387484..000000000 --- a/sig/mindee/parsing/v2/inference_response.rbs +++ /dev/null @@ -1,21 +0,0 @@ -# lib/mindee/parsing/v2/inference_response.rb -module Mindee - module Parsing - module V2 - class InferenceResponse < Mindee::V2::Parsing::BaseResponse[Mindee::Parsing::V2::Inference] - - self.@slug: String - self.@_params_type: singleton(Input::BaseParameters) - - attr_reader inference: Mindee::Parsing::V2::Inference - def initialize: (Hash[String | Symbol, untyped]) -> void - - def _params_type: -> singleton(Input::InferenceParameters) - - def to_s: -> String - def self._params_type: () -> singleton(Input::InferenceParameters) - def self.slug: () -> String - end - end - end -end diff --git a/sig/mindee/parsing/v2/inference_result.rbs b/sig/mindee/parsing/v2/inference_result.rbs deleted file mode 100644 index a83427809..000000000 --- a/sig/mindee/parsing/v2/inference_result.rbs +++ /dev/null @@ -1,15 +0,0 @@ -# lib/mindee/parsing/v2/inference_result.rb -module Mindee - module Parsing - module V2 - class InferenceResult - attr_reader fields: Field::InferenceFields - attr_reader raw_text: RawText? - attr_reader rag: RAGMetadata? - - def initialize: (Hash[String | Symbol, untyped]) -> void - def to_s: -> String - end - end - end -end diff --git a/sig/mindee/pdf/pdf_tools.rbs b/sig/mindee/pdf/pdf_tools.rbs index dc8ae8e9d..a456ff4c4 100644 --- a/sig/mindee/pdf/pdf_tools.rbs +++ b/sig/mindee/pdf/pdf_tools.rbs @@ -8,6 +8,8 @@ module Mindee Height: Integer | Float Width: Integer | Float + def self.pdf_header?: (StringIO | File) -> bool + def to_io_stream: (?Hash[Symbol, untyped]) -> StringIO def intents_as_pdfa1: () -> void def delinearize!: () -> void diff --git a/sig/mindee/v2/parsing/base_inference.rbs b/sig/mindee/v2/parsing/base_inference.rbs index 4f6b03968..b9ddd9e25 100644 --- a/sig/mindee/v2/parsing/base_inference.rbs +++ b/sig/mindee/v2/parsing/base_inference.rbs @@ -4,10 +4,10 @@ module Mindee module V2 module Parsing class BaseInference - attr_reader job: Mindee::Parsing::V2::InferenceJob - attr_reader file: Mindee::Parsing::V2::InferenceFile + attr_reader job: Mindee::V2::Parsing::InferenceJob + attr_reader file: Mindee::V2::Parsing::InferenceFile attr_reader id: String - attr_reader model: Mindee::Parsing::V2::InferenceModel + attr_reader model: Mindee::V2::Parsing::InferenceModel def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/v2/parsing/base_response.rbs b/sig/mindee/v2/parsing/base_response.rbs index 8d1c7814c..803afaf20 100644 --- a/sig/mindee/v2/parsing/base_response.rbs +++ b/sig/mindee/v2/parsing/base_response.rbs @@ -3,7 +3,7 @@ module Mindee module V2 module Parsing - class BaseResponse[T] < Mindee::Parsing::V2::CommonResponse + class BaseResponse[T] < Mindee::V2::Parsing::CommonResponse attr_reader inference: T end end diff --git a/sig/mindee/v2/parsing/common_response.rbs b/sig/mindee/v2/parsing/common_response.rbs new file mode 100644 index 000000000..cb1d6df42 --- /dev/null +++ b/sig/mindee/v2/parsing/common_response.rbs @@ -0,0 +1,11 @@ +# lib/mindee/v2/parsing/common_response.rb +module Mindee + module V2 + module Parsing + class CommonResponse + attr_reader raw_http: String + def initialize: (Hash[String | Symbol, untyped]) -> void + end + end + end +end diff --git a/sig/mindee/parsing/v2/error_item.rbs b/sig/mindee/v2/parsing/error_item.rbs similarity index 74% rename from sig/mindee/parsing/v2/error_item.rbs rename to sig/mindee/v2/parsing/error_item.rbs index 04b4e6756..05d2d7d22 100644 --- a/sig/mindee/parsing/v2/error_item.rbs +++ b/sig/mindee/v2/parsing/error_item.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/error_item.rb +# lib/mindee/v2/parsing/error_item.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class ErrorItem attr_reader pointer: String attr_reader detail: String|nil diff --git a/sig/mindee/parsing/v2/error_response.rbs b/sig/mindee/v2/parsing/error_response.rbs similarity index 84% rename from sig/mindee/parsing/v2/error_response.rbs rename to sig/mindee/v2/parsing/error_response.rbs index 7c0fdf55a..334ca8b34 100644 --- a/sig/mindee/parsing/v2/error_response.rbs +++ b/sig/mindee/v2/parsing/error_response.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/error_response.rb +# lib/mindee/v2/parsing/error_response.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class ErrorResponse attr_reader detail: String attr_reader status: Integer diff --git a/sig/mindee/parsing/v2/field/base_field.rbs b/sig/mindee/v2/parsing/field/base_field.rbs similarity index 85% rename from sig/mindee/parsing/v2/field/base_field.rbs rename to sig/mindee/v2/parsing/field/base_field.rbs index 5c401ab1a..b05002072 100644 --- a/sig/mindee/parsing/v2/field/base_field.rbs +++ b/sig/mindee/v2/parsing/field/base_field.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/base_field.rb +# lib/mindee/v2/parsing/field/base_field.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field class BaseField attr_reader indent_level: Integer diff --git a/sig/mindee/parsing/v2/field/field_confidence.rbs b/sig/mindee/v2/parsing/field/field_confidence.rbs similarity index 91% rename from sig/mindee/parsing/v2/field/field_confidence.rbs rename to sig/mindee/v2/parsing/field/field_confidence.rbs index 77ca6b682..e1c72817a 100644 --- a/sig/mindee/parsing/v2/field/field_confidence.rbs +++ b/sig/mindee/v2/parsing/field/field_confidence.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/field_confidence.rb +# lib/mindee/v2/parsing/field/field_confidence.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field class FieldConfidence attr_reader value: String diff --git a/sig/mindee/parsing/v2/field/field_location.rbs b/sig/mindee/v2/parsing/field/field_location.rbs similarity index 78% rename from sig/mindee/parsing/v2/field/field_location.rbs rename to sig/mindee/v2/parsing/field/field_location.rbs index 0c400ee08..d8cb365cc 100644 --- a/sig/mindee/parsing/v2/field/field_location.rbs +++ b/sig/mindee/v2/parsing/field/field_location.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/field_location.rb +# lib/mindee/v2/parsing/field/field_location.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field class FieldLocation attr_reader page: Integer diff --git a/sig/mindee/parsing/v2/field/inference_fields.rbs b/sig/mindee/v2/parsing/field/inference_fields.rbs similarity index 87% rename from sig/mindee/parsing/v2/field/inference_fields.rbs rename to sig/mindee/v2/parsing/field/inference_fields.rbs index 203f06aaa..4c29c6123 100644 --- a/sig/mindee/parsing/v2/field/inference_fields.rbs +++ b/sig/mindee/v2/parsing/field/inference_fields.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/inference_fields.rb +# lib/mindee/v2/parsing/field/inference_fields.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field class InferenceFields < Hash[String, ListField | ObjectField | SimpleField?] attr_reader indent_level: Integer diff --git a/sig/mindee/parsing/v2/field/list_field.rbs b/sig/mindee/v2/parsing/field/list_field.rbs similarity index 89% rename from sig/mindee/parsing/v2/field/list_field.rbs rename to sig/mindee/v2/parsing/field/list_field.rbs index 1f0295dab..ff17adc83 100644 --- a/sig/mindee/parsing/v2/field/list_field.rbs +++ b/sig/mindee/v2/parsing/field/list_field.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/list_field.rb +# lib/mindee/v2/parsing/field/list_field.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field class ListField < BaseField include Enumerable[BaseField] diff --git a/sig/mindee/parsing/v2/field/object_field.rbs b/sig/mindee/v2/parsing/field/object_field.rbs similarity index 91% rename from sig/mindee/parsing/v2/field/object_field.rbs rename to sig/mindee/v2/parsing/field/object_field.rbs index 149dd23cd..1aa3557aa 100644 --- a/sig/mindee/parsing/v2/field/object_field.rbs +++ b/sig/mindee/v2/parsing/field/object_field.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/object_field.rb +# lib/mindee/v2/parsing/field/object_field.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field # A field containing a nested set of inference fields. class ObjectField < BaseField diff --git a/sig/mindee/parsing/v2/field/simple_field.rbs b/sig/mindee/v2/parsing/field/simple_field.rbs similarity index 81% rename from sig/mindee/parsing/v2/field/simple_field.rbs rename to sig/mindee/v2/parsing/field/simple_field.rbs index facf0b121..b0a35b29e 100644 --- a/sig/mindee/parsing/v2/field/simple_field.rbs +++ b/sig/mindee/v2/parsing/field/simple_field.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/field/simple_field.rb +# lib/mindee/v2/parsing/field/simple_field.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing module Field class SimpleField < BaseField attr_reader value: String | Integer | Float | bool | nil diff --git a/sig/mindee/parsing/v2/inference_active_options.rbs b/sig/mindee/v2/parsing/inference_active_options.rbs similarity index 94% rename from sig/mindee/parsing/v2/inference_active_options.rbs rename to sig/mindee/v2/parsing/inference_active_options.rbs index f0cc0296e..f0d690343 100644 --- a/sig/mindee/parsing/v2/inference_active_options.rbs +++ b/sig/mindee/v2/parsing/inference_active_options.rbs @@ -1,6 +1,6 @@ module Mindee - module Parsing - module V2 + module V2 + module Parsing class DataSchemaActiveOption attr_reader replace: bool diff --git a/sig/mindee/parsing/v2/inference_file.rbs b/sig/mindee/v2/parsing/inference_file.rbs similarity index 81% rename from sig/mindee/parsing/v2/inference_file.rbs rename to sig/mindee/v2/parsing/inference_file.rbs index 43e313115..ce8c56f35 100644 --- a/sig/mindee/parsing/v2/inference_file.rbs +++ b/sig/mindee/v2/parsing/inference_file.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/inference_file.rb +# lib/mindee/v2/parsing/inference_file.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class InferenceFile attr_reader name: String attr_reader file_alias: String? diff --git a/sig/mindee/parsing/v2/inference_job.rbs b/sig/mindee/v2/parsing/inference_job.rbs similarity index 86% rename from sig/mindee/parsing/v2/inference_job.rbs rename to sig/mindee/v2/parsing/inference_job.rbs index 029576818..d763e4af9 100644 --- a/sig/mindee/parsing/v2/inference_job.rbs +++ b/sig/mindee/v2/parsing/inference_job.rbs @@ -1,6 +1,6 @@ module Mindee - module Parsing - module V2 + module V2 + module Parsing class InferenceJob attr_reader id: String diff --git a/sig/mindee/parsing/v2/inference_model.rbs b/sig/mindee/v2/parsing/inference_model.rbs similarity index 69% rename from sig/mindee/parsing/v2/inference_model.rbs rename to sig/mindee/v2/parsing/inference_model.rbs index 07a9be237..6b0946968 100644 --- a/sig/mindee/parsing/v2/inference_model.rbs +++ b/sig/mindee/v2/parsing/inference_model.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/inference_model.rb +# lib/mindee/v2/parsing/inference_model.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class InferenceModel attr_reader id: String def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/parsing/v2/job.rbs b/sig/mindee/v2/parsing/job.rbs similarity index 90% rename from sig/mindee/parsing/v2/job.rbs rename to sig/mindee/v2/parsing/job.rbs index 1b60d013f..3b7e2bcc9 100644 --- a/sig/mindee/parsing/v2/job.rbs +++ b/sig/mindee/v2/parsing/job.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/job.rb +# lib/mindee/v2/parsing/job.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class Job attr_reader alias: String attr_reader created_at: Time diff --git a/sig/mindee/parsing/v2/job_response.rbs b/sig/mindee/v2/parsing/job_response.rbs similarity index 74% rename from sig/mindee/parsing/v2/job_response.rbs rename to sig/mindee/v2/parsing/job_response.rbs index 1ecf8683a..1277fdca8 100644 --- a/sig/mindee/parsing/v2/job_response.rbs +++ b/sig/mindee/v2/parsing/job_response.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/job_response.rb +# lib/mindee/v2/parsing/job_response.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class JobResponse < CommonResponse attr_reader job: Job def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/parsing/v2/job_webhook.rbs b/sig/mindee/v2/parsing/job_webhook.rbs similarity index 83% rename from sig/mindee/parsing/v2/job_webhook.rbs rename to sig/mindee/v2/parsing/job_webhook.rbs index 5d9ea9201..5d3e3017d 100644 --- a/sig/mindee/parsing/v2/job_webhook.rbs +++ b/sig/mindee/v2/parsing/job_webhook.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/job_webhook.rb +# lib/mindee/v2/parsing/job_webhook.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class JobWebhook attr_reader created_at: DateTime? attr_reader error: ErrorResponse? diff --git a/sig/mindee/parsing/v2/rag_metadata.rbs b/sig/mindee/v2/parsing/rag_metadata.rbs similarity index 72% rename from sig/mindee/parsing/v2/rag_metadata.rbs rename to sig/mindee/v2/parsing/rag_metadata.rbs index 2995de66e..bbb6c70ca 100644 --- a/sig/mindee/parsing/v2/rag_metadata.rbs +++ b/sig/mindee/v2/parsing/rag_metadata.rbs @@ -1,8 +1,8 @@ -# lib/mindee/parsing/v2/rag_metadata.rb +# lib/mindee/v2/parsing/rag_metadata.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class RAGMetadata attr_accessor retrieved_document_id: string | nil diff --git a/sig/mindee/parsing/v2/raw_text.rbs b/sig/mindee/v2/parsing/raw_text.rbs similarity index 72% rename from sig/mindee/parsing/v2/raw_text.rbs rename to sig/mindee/v2/parsing/raw_text.rbs index b5959005c..a8e0a9f73 100644 --- a/sig/mindee/parsing/v2/raw_text.rbs +++ b/sig/mindee/v2/parsing/raw_text.rbs @@ -1,7 +1,7 @@ -# lib/mindee/parsing/v2/raw_text.rb +# lib/mindee/v2/parsing/raw_text.rb module Mindee - module Parsing - module V2 + module V2 + module Parsing class RawText attr_reader pages: Array[RawTextPage] diff --git a/sig/mindee/parsing/v2/raw_text_page.rbs b/sig/mindee/v2/parsing/raw_text_page.rbs similarity index 84% rename from sig/mindee/parsing/v2/raw_text_page.rbs rename to sig/mindee/v2/parsing/raw_text_page.rbs index 536d2c484..ce87f51c9 100644 --- a/sig/mindee/parsing/v2/raw_text_page.rbs +++ b/sig/mindee/v2/parsing/raw_text_page.rbs @@ -1,6 +1,6 @@ module Mindee - module Parsing - module V2 + module V2 + module Parsing class RawTextPage attr_reader content: String diff --git a/sig/mindee/v2/parsing/search/pagination_metadata.rbs b/sig/mindee/v2/parsing/search/pagination_metadata.rbs new file mode 100644 index 000000000..5c43bb624 --- /dev/null +++ b/sig/mindee/v2/parsing/search/pagination_metadata.rbs @@ -0,0 +1,20 @@ +# lib/mindee/v2/parsing/search/pagination_metadata.rbs + +module Mindee + module V2 + module Parsing + module Search + class PaginationMetadata + attr_reader page: Integer + attr_reader per_page: Integer + attr_reader total_items: Integer + attr_reader total_pages: Integer + + def initialize: (Hash[String|Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/parsing/search/search_model.rbs b/sig/mindee/v2/parsing/search/search_model.rbs new file mode 100644 index 000000000..e0c02de63 --- /dev/null +++ b/sig/mindee/v2/parsing/search/search_model.rbs @@ -0,0 +1,19 @@ +# lib/mindee/v2/parsing/search/search_model.rb + +module Mindee + module V2 + module Parsing + module Search + class SearchModel + attr_reader id: String + attr_reader model_type: String + attr_reader name: String + + def initialize: (Hash[String|Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/parsing/search/search_response.rbs b/sig/mindee/v2/parsing/search/search_response.rbs new file mode 100644 index 000000000..14c0bfdae --- /dev/null +++ b/sig/mindee/v2/parsing/search/search_response.rbs @@ -0,0 +1,17 @@ +# lib/mindee/v2/search/search_response.rb +module Mindee + module V2 + module Parsing + module Search + class SearchResponse + attr_reader models: Array[SearchResponse] + attr_reader pagination_metadata: PaginationMetadata + + def initialize: (Hash[String|Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/parsing/search_models.rbs b/sig/mindee/v2/parsing/search_models.rbs new file mode 100644 index 000000000..ba9d391e4 --- /dev/null +++ b/sig/mindee/v2/parsing/search_models.rbs @@ -0,0 +1,14 @@ +# lib/mindee/v2/parsing/search/search_models.rb +module Mindee + module V2 + module Parsing + module Search + class SearchModels < Array[SearchModel] + def initialize: (Array[untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification_response.rbs b/sig/mindee/v2/product/classification/classification_response.rbs index ee34fbc12..6e43050cd 100644 --- a/sig/mindee/v2/product/classification/classification_response.rbs +++ b/sig/mindee/v2/product/classification/classification_response.rbs @@ -4,7 +4,7 @@ module Mindee module V2 module Product module Classification - class ClassificationResponse + class ClassificationResponse < Parsing::BaseResponse[ClassificationInference] self.@slug: String self.@_params_type: singleton(Params::ClassificationParameters) diff --git a/sig/mindee/v2/product/crop/crop_item.rbs b/sig/mindee/v2/product/crop/crop_item.rbs index 1e3e79986..f9594bd5e 100644 --- a/sig/mindee/v2/product/crop/crop_item.rbs +++ b/sig/mindee/v2/product/crop/crop_item.rbs @@ -4,7 +4,7 @@ module Mindee module Crop class CropItem attr_reader object_type: String - attr_reader location: Mindee::Parsing::V2::Field::FieldLocation + attr_reader location: Mindee::V2::Parsing::Field::FieldLocation def initialize: (Hash[String | Symbol, untyped]) -> void def to_s: -> String diff --git a/sig/mindee/v2/product/crop/crop_response.rbs b/sig/mindee/v2/product/crop/crop_response.rbs index 4b0c638f7..773045a92 100644 --- a/sig/mindee/v2/product/crop/crop_response.rbs +++ b/sig/mindee/v2/product/crop/crop_response.rbs @@ -4,7 +4,7 @@ module Mindee module V2 module Product module Crop - class CropResponse + class CropResponse < Parsing::BaseResponse[CropInference] self.@slug: String self.@_params_type: singleton(Params::CropParameters) diff --git a/sig/mindee/v2/product/extraction/extraction.rbs b/sig/mindee/v2/product/extraction/extraction.rbs index c747c7b8d..197a5f7e5 100644 --- a/sig/mindee/v2/product/extraction/extraction.rbs +++ b/sig/mindee/v2/product/extraction/extraction.rbs @@ -1,3 +1,5 @@ +# lib/mindee/v2/product/extraction/extraction.rb + module Mindee module V2 module Product @@ -5,6 +7,7 @@ module Mindee class Extraction self.@params_type: singleton(Mindee::V2::Product::Extraction::Params::ExtractionParameters) self.@response_type: singleton(Mindee::V2::Product::Extraction::ExtractionResponse) + self.@slug: String end end end diff --git a/sig/mindee/v2/product/extraction/extraction_inference.rbs b/sig/mindee/v2/product/extraction/extraction_inference.rbs index ffefa44bf..2facf4f77 100644 --- a/sig/mindee/v2/product/extraction/extraction_inference.rbs +++ b/sig/mindee/v2/product/extraction/extraction_inference.rbs @@ -3,6 +3,11 @@ module Mindee module Product module Extraction class ExtractionInference + attr_reader self.params_type: singleton(Params::ExtractionParameters) + attr_reader self.response_type: singleton(ExtractionResponse) + attr_reader self.slug: String + + attr_reader active_options: Mindee::V2::Parsing::InferenceActiveOptions attr_reader result: ExtractionResult def initialize: (Hash[String | Symbol, untyped]) -> void diff --git a/sig/mindee/v2/product/extraction/extraction_response.rbs b/sig/mindee/v2/product/extraction/extraction_response.rbs index 000d7f822..0b5cec064 100644 --- a/sig/mindee/v2/product/extraction/extraction_response.rbs +++ b/sig/mindee/v2/product/extraction/extraction_response.rbs @@ -1,16 +1,22 @@ +# lib/mindee/v2/product/extraction/extraction_response.rb + module Mindee module V2 module Product module Extraction - class ExtractionResponse + class ExtractionResponse < Parsing::BaseResponse[ExtractionInference] self.@_params_type: singleton(Params::ExtractionParameters) - attr_reader inference: Mindee::V2::Product::Extraction::ExtractionInference + self.@slug: String + + attr_reader inference: ExtractionInference def initialize: (Hash[String | Symbol, untyped]) -> void def _params_type: -> singleton(Params::ExtractionParameters) def self._params_type: () -> singleton(Params::ExtractionParameters) + + def to_s: -> String end end end diff --git a/sig/mindee/v2/product/extraction/extraction_result.rbs b/sig/mindee/v2/product/extraction/extraction_result.rbs index 975d364c1..c8e0ea00d 100644 --- a/sig/mindee/v2/product/extraction/extraction_result.rbs +++ b/sig/mindee/v2/product/extraction/extraction_result.rbs @@ -1,8 +1,16 @@ +# lib/mindee/v2/product/extraction/extraction_result.rb + module Mindee module V2 module Product module Extraction - class ExtractionResult < Mindee::Parsing::V2::InferenceResult + class ExtractionResult + attr_reader fields: Parsing::Field::InferenceFields + attr_reader raw_text: Parsing::RawText? + attr_reader rag: Parsing::RAGMetadata? + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String end end end diff --git a/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs b/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs index f93bcdaec..8cb96595e 100644 --- a/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +++ b/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs @@ -3,7 +3,33 @@ module Mindee module Product module Extraction module Params - class ExtractionParameters < Input::InferenceParameters + class ExtractionParameters < Input::BaseParameters + + def self.slug: -> String + + attr_reader confidence: bool? + attr_reader polygon: bool? + attr_reader rag: bool? + attr_reader raw_text: bool? + attr_reader text_context: String? + attr_reader data_schema: Input::DataSchema? + + def initialize: ( + String, + ?rag: bool?, + ?raw_text: bool?, + ?polygon: bool?, + ?confidence: bool?, + ?file_alias: String?, + ?text_context: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool?, + ?data_schema: Input::DataSchema|String|Hash[Symbol | String, untyped]? + ) -> void + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> ExtractionParameters + def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] end end end diff --git a/sig/mindee/v2/product/ocr/ocr_response.rbs b/sig/mindee/v2/product/ocr/ocr_response.rbs index 609aef0e2..f9d96875d 100644 --- a/sig/mindee/v2/product/ocr/ocr_response.rbs +++ b/sig/mindee/v2/product/ocr/ocr_response.rbs @@ -4,7 +4,7 @@ module Mindee module V2 module Product module Ocr - class OcrResponse + class OcrResponse < Parsing::BaseResponse[OcrInference] self.@slug: String self.@_params_type: singleton(Params::OcrParameters) diff --git a/sig/mindee/v2/product/split/split_response.rbs b/sig/mindee/v2/product/split/split_response.rbs index 9068eeb57..9a0232517 100644 --- a/sig/mindee/v2/product/split/split_response.rbs +++ b/sig/mindee/v2/product/split/split_response.rbs @@ -4,7 +4,7 @@ module Mindee module V2 module Product module Split - class SplitResponse + class SplitResponse < Parsing::BaseResponse[SplitInference] self.@slug: String self.@_params_type: singleton(Params::SplitParameters) diff --git a/sig/mindee/v2/search/search_response.rbs b/sig/mindee/v2/search/search_response.rbs new file mode 100644 index 000000000..e69de29bb diff --git a/spec/bin/cli_integration.rb b/spec/bin/cli_integration.rb new file mode 100644 index 000000000..ede5c4af6 --- /dev/null +++ b/spec/bin/cli_integration.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require 'open3' +require 'rbconfig' +require_relative '../data' + +describe 'Mindee CLI V2', :integration, :v2, order: :defined do + let(:findoc_model_id) { ENV.fetch('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID') } + let(:classification_model_id) { ENV.fetch('MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID') } + let(:crop_model_id) { ENV.fetch('MINDEE_V2_SE_TESTS_CROP_MODEL_ID') } + let(:ocr_model_id) { ENV.fetch('MINDEE_V2_SE_TESTS_OCR_MODEL_ID') } + let(:split_model_id) { ENV.fetch('MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID') } + let(:blank_pdf_url) { ENV.fetch('MINDEE_V2_SE_TESTS_BLANK_PDF_URL') } + let(:test_file) { File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf') } + let(:cli_path) { File.expand_path('../../bin/mindee.rb', __dir__) } + + def run_cli(*args) + Open3.capture3(RbConfig.ruby, cli_path, *args) + end + + context 'search-models command' do + ['classification', 'crop', 'extraction', 'ocr', 'split'].each do |model_type| + it "returns model list for type #{model_type}" do + stdout, stderr, status = run_cli('v2', 'search-models', '-t', model_type) + expect(status.success?).to eq(true), stderr + expect(stdout.strip).not_to be_empty + end + end + + it 'returns no models for non-existent name' do + stdout, stderr, status = run_cli('v2', 'search-models', '-n', 'supercalifragilisticexpialidocious') + expect(status.success?).to eq(true), stderr + expect(stdout.strip).to eq('') + end + + it 'returns models for name filter' do + stdout, stderr, status = run_cli('v2', 'search-models', '-n', 'findoc') + expect(status.success?).to eq(true), stderr + expect(stdout.strip).not_to be_empty + end + + it 'returns models for name and model_type filters' do + stdout, stderr, status = run_cli('v2', 'search-models', '-n', 'findoc', '-t', 'extraction') + expect(status.success?).to eq(true), stderr + expect(stdout.strip).not_to be_empty + end + + it 'returns HTTP 422 on invalid model type' do + stdout, stderr, status = run_cli('v2', 'search-models', '-t', 'invalid') + expect(status.success?).to eq(false) + expect("#{stdout}\n#{stderr}").to include('HTTP 422') + end + end + + context 'product commands' do + it 'runs extraction from an URL source' do + stdout, stderr, status = run_cli('v2', 'extraction', '-m', findoc_model_id, blank_pdf_url) + expect(status.success?).to eq(true), stderr + expect(stdout.strip).not_to be_empty + end + + { + 'classification' => -> { classification_model_id }, + 'crop' => -> { crop_model_id }, + 'ocr' => -> { ocr_model_id }, + 'split' => -> { split_model_id }, + }.each do |command, model_id_proc| + it "runs #{command} with default args" do + stdout, stderr, status = run_cli('v2', command, '-m', instance_exec(&model_id_proc), test_file) + expect(status.success?).to eq(true), stderr + expect(stdout.strip).not_to be_empty + end + end + end + + context 'extraction options' do + [ + ['-a', 'toto'], + ['-r'], + ['-c'], + ['-p'], + ['-t', 'toto'], + ].each do |option_args| + it "runs extraction with #{option_args.join(' ')}" do + stdout, stderr, status = run_cli('v2', 'extraction', '-m', findoc_model_id, test_file, *option_args) + expect(status.success?).to eq(true), stderr + expect(stdout.strip).not_to be_empty + end + end + end +end diff --git a/spec/pdf/pdf_compressor_spec.rb b/spec/pdf/pdf_compressor_spec.rb index 13f6ab588..e9b50adb8 100644 --- a/spec/pdf/pdf_compressor_spec.rb +++ b/spec/pdf/pdf_compressor_spec.rb @@ -20,6 +20,7 @@ it 'should not detect text pdf in an image file.' do image_input = Mindee::Input::Source::PathInputSource.new("#{FILE_TYPES_DIR}/receipt.jpg") + expect(Origami::PDF).not_to receive(:read) expect(Mindee::PDF::PDFTools.source_text?(image_input.io_stream)).to be(false) end end diff --git a/spec/test_code_samples_v2.sh b/spec/test_code_samples_v2.sh deleted file mode 100755 index 16a6e30c7..000000000 --- a/spec/test_code_samples_v2.sh +++ /dev/null @@ -1,45 +0,0 @@ -#! /bin/sh -set -e - -OUTPUT_FILE='./_test_v2.rb' - -for f in $(find ./docs/code_samples -maxdepth 1 -name "v2_*.txt" | sort -h) -do - echo - echo "###############################################" - echo "${f}" - echo "###############################################" - echo - - cat "${f}" > $OUTPUT_FILE - sed -i "s/MY_API_KEY/${MINDEE_V2_API_KEY}/" $OUTPUT_FILE - sed -i "s/MY_WEBHOOK_ID/${MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID}/" $OUTPUT_FILE - sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE - - if echo "${f}" | grep -q "v2_extraction.*.txt" - then - sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}/" $OUTPUT_FILE - fi - - if echo "${f}" | grep -q "v2_classification.txt" - then - sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID}/" $OUTPUT_FILE - fi - - if echo "${f}" | grep -q "v2_crop.txt" - then - sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_CROP_MODEL_ID}/" $OUTPUT_FILE - fi - - if echo "${f}" | grep -q "v2_ocr.txt" - then - sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_OCR_MODEL_ID}/" $OUTPUT_FILE - fi - - if echo "${f}" | grep -q "v2_split.txt" - then - sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}/" $OUTPUT_FILE - fi - - bundle exec ruby $OUTPUT_FILE -done diff --git a/spec/test_v1_cli.sh b/spec/test_v1_cli.sh new file mode 100755 index 000000000..166d9e7d7 --- /dev/null +++ b/spec/test_v1_cli.sh @@ -0,0 +1,56 @@ +#!/bin/sh +set -e + +# Initialize rbenv +export PATH="$HOME/.rbenv/bin:$PATH" +eval "$($HOME/.rbenv/bin/rbenv init -)" + +TEST_FILE=$1 +RID=$2 + +if [ -z "$TEST_FILE" ]; then + echo "Error: no sample file provided" + exit 1 +fi + +if [ -z "$RID" ]; then + OS_NAME="$(uname -s)" + case "$OS_NAME" in + Linux*) RID="linux-x64" ;; + Darwin*) RID="osx-x64" ;; + CYGWIN*|MINGW*|MSYS*) RID="win-x64" ;; + *) + echo "" + echo "Error: Could not determine default Runtime Identifier (RID) for OS type '$OS_NAME'." + echo "Please provide one manually. Available: 'linux-x64', 'osx-x64', 'win-x64'" + exit 1 + ;; + esac + echo "Warning: Runtime Identifier (RID) not provided, defaulting to $RID" +fi + +WD="$(basename "$PWD")" +if [ "$WD" = "spec" ]; then + CLI_PATH="../bin/mindee.rb" +else + CLI_PATH="./bin/mindee.rb" +fi + +if [ "$RID" = "win-x64" ]; then + CLI_PATH="${CLI_PATH}.exe" +fi + +PRODUCTS="financial-document receipt invoice invoice-splitter" +PRODUCTS_SIZE=4 +i=1 + +for product in $PRODUCTS +do + echo "--- Test $product with Summary Output ($i/$PRODUCTS_SIZE) ---" + SUMMARY_OUTPUT=$(ruby "$CLI_PATH" v1 "$product" "$TEST_FILE") + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 + i=$((i + 1)) +done diff --git a/spec/test_code_samples_v1.sh b/spec/test_v1_code_samples.sh similarity index 100% rename from spec/test_code_samples_v1.sh rename to spec/test_v1_code_samples.sh diff --git a/spec/test_v2_cli.sh b/spec/test_v2_cli.sh new file mode 100755 index 000000000..89c4b406e --- /dev/null +++ b/spec/test_v2_cli.sh @@ -0,0 +1,54 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 +RID=$2 + +export PATH="$HOME/.rbenv/bin:$PATH" +eval "$($HOME/.rbenv/bin/rbenv init -)" + +if [ -z "$TEST_FILE" ]; then + echo "Error: no sample file provided" + exit 1 +fi + +if [ -z "$RID" ]; then + OS_NAME="$(uname -s)" + case "$OS_NAME" in + Linux*) RID="linux-x64" ;; + Darwin*) RID="osx-x64" ;; + CYGWIN*|MINGW*|MSYS*) RID="win-x64" ;; + *) + echo "" + echo "Error: Could not determine default Runtime Identifier (RID) for OS type '$OS_NAME'." + echo "Please provide one manually. Available: 'linux-x64', 'osx-x64', 'win-x64'" + exit 1 + ;; + esac + echo "Warning: Runtime Identifier (RID) not provided, defaulting to $RID" +fi + +WD="$(basename "$PWD")" +if [ "$WD" = "spec" ]; then + CLI_PATH="../bin/mindee.rb" +else + CLI_PATH="./bin/mindee.rb" +fi + +echo "--- Test model list retrieval (all models)" +MODELS=$("$CLI_PATH" v2 search-models) +if [ -z "$MODELS" ]; then + echo "Error: no models found" + exit 1 +else + echo "Models retrieval OK" +fi + +echo "--- Test extraction with no additional args" +SUMMARY_OUTPUT=$("$CLI_PATH" v2 extraction -m "$MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID" "$TEST_FILE") +if [ -z "$SUMMARY_OUTPUT" ]; then + echo "Error: no extraction output" + exit 1 +else + echo "Extraction retrieval OK" +fi diff --git a/spec/test_v2_code_samples.sh b/spec/test_v2_code_samples.sh new file mode 100755 index 000000000..4a2917023 --- /dev/null +++ b/spec/test_v2_code_samples.sh @@ -0,0 +1,45 @@ +#! /bin/sh +set -e + +OUTPUT_FILE='./_test_v2.rb' + +for f in $(find ./docs/code_samples -maxdepth 1 -name "v2_*.txt" | sort -h) +do + echo + echo "###############################################" + echo "${f}" + echo "###############################################" + echo + + cat "${f}" > $OUTPUT_FILE + sed -i "s/MY_API_KEY/${MINDEE_V2_API_KEY}/" $OUTPUT_FILE + sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE + + if echo "${f}" | grep -q "v2_extraction" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_FINDOC_MODEL_ID}/" $OUTPUT_FILE + sed -i "s/MY_WEBHOOK_ID/${MINDEE_V2_FAILURE_WEBHOOK_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_classification" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_CLASSIFICATION_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_crop" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_CROP_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_ocr" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_OCR_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_split" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SPLIT_MODEL_ID}/" $OUTPUT_FILE + fi + + bundle exec ruby $OUTPUT_FILE +done diff --git a/spec/v1/api_response_spec.rb b/spec/v1/api_response_spec.rb index ef744bb60..b59efc682 100644 --- a/spec/v1/api_response_spec.rb +++ b/spec/v1/api_response_spec.rb @@ -8,12 +8,13 @@ context 'An Invoice document' do it 'should be properly created from an ApiResponse' do response = load_json(V1_PRODUCT_DATA_DIR, 'invoices/response_v4/complete.json') + raw_response = JSON.generate(response) rst_response = read_file(V1_PRODUCT_DATA_DIR, 'invoices/response_v4/summary_full.rst') parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::Invoice::InvoiceV4, - response, response.to_s) + response, raw_response) expect(parsed_response.document.inference).to be_a Mindee::Product::Invoice::InvoiceV4 expect(parsed_response.document.inference.prediction).to be_a Mindee::Product::Invoice::InvoiceV4Document - expect(parsed_response.raw_http).to eq(response.to_s) + expect(parsed_response.raw_http).to eq(raw_response) expect(parsed_response.document.n_pages).to eq(1) expect(parsed_response.document.inference.pages.length).to eq(1) expect(parsed_response.document.to_s).to eq(rst_response.to_s) diff --git a/spec/v1/async_rseponse_spec.rb b/spec/v1/async_rseponse_spec.rb index 9a264a57f..9a20af42a 100644 --- a/spec/v1/async_rseponse_spec.rb +++ b/spec/v1/async_rseponse_spec.rb @@ -13,14 +13,14 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(true) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response.to_json) + response, fake_response.body) expect(parsed_response.job.status).to eq(Mindee::Parsing::Common::JobStatus::WAITING) expect(parsed_response.job.id).to eq('76c90710-3a1b-4b91-8a39-31a6543e347c') expect(parsed_response.job.status).to_not respond_to(:available_at) expect(parsed_response.job.status).to_not respond_to(:millisecs_taken) expect(parsed_response.api_request.error).to eq({}) - expect(parsed_response.raw_http).to eq(response.to_json) + expect(parsed_response.raw_http).to eq(fake_response.body) end it 'should not be able to be sent on incompatible endpoints' do @@ -29,7 +29,7 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(false) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response.to_json) + response, fake_response.body) expect(parsed_response.job).to be(nil) end @@ -39,14 +39,14 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(true) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response.to_json) + response, fake_response.body) expect(parsed_response.job.issued_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2023-03-16T12:33:49.602947') expect(parsed_response.job.status).to eq(Mindee::Parsing::Common::JobStatus::PROCESSING) expect(parsed_response.job.id).to eq('76c90710-3a1b-4b91-8a39-31a6543e347c') expect(parsed_response.job.status).to_not respond_to(:available_at) expect(parsed_response.job.status).to_not respond_to(:millisecs_taken) expect(parsed_response.api_request.error['code']).to eq(nil) - expect(parsed_response.raw_http).to eq(response.to_json) + expect(parsed_response.raw_http).to eq(fake_response.body) end it 'should be able to poll a completed queue' do @@ -55,7 +55,7 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(true) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response.to_json) + response, fake_response.body) expect(parsed_response.job.issued_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2023-03-21T13:52:56.326107') expect(parsed_response.job.status).to eq(Mindee::Parsing::Common::JobStatus::COMPLETED) expect(parsed_response.job.id).to eq('b6caf9e8-9bcc-4412-bcb7-f5b416678f0d') @@ -63,7 +63,7 @@ expect(parsed_response.job.millisecs_taken).to eq(4664) expect(parsed_response.document).to_not be(nil) expect(parsed_response.api_request.error['code']).to eq(nil) - expect(parsed_response.raw_http).to eq(response.to_json) + expect(parsed_response.raw_http).to eq(fake_response.body) end it 'should retrieve a failed job' do @@ -72,7 +72,7 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(false) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response.to_json) + response, fake_response.body) expect(parsed_response.job.issued_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2024-02-20T10:31:06.878599') expect(parsed_response.job.available_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2024-02-20T10:31:06.878599') expect(parsed_response.api_request.status).to eq(Mindee::Parsing::Common::RequestStatus::SUCCESS) diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index 598a72b94..6affbdd77 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -1,6 +1,10 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/product' + +using Mindee::V2::Product::Extraction::Params +using Mindee::V2::Product::Extraction describe 'Mindee::ClientV2 – integration tests (V2)', :integration, order: :defined do let(:api_key) { ENV.fetch('MINDEE_V2_API_KEY') } @@ -19,7 +23,7 @@ max_retries: 80 ) - inference_params = Mindee::Input::InferenceParameters.new( + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( model_id, rag: false, raw_text: true, @@ -30,25 +34,25 @@ text_context: 'this is a test' ) - response = client.enqueue_and_get_inference(input, inference_params) + response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params) expect(response).not_to be_nil expect(response.inference).not_to be_nil file = response.inference.file expect(file).not_to be_nil - expect(file).to be_a(Mindee::Parsing::V2::InferenceFile) + expect(file).to be_a(Mindee::V2::Parsing::InferenceFile) expect(file.name).to eq('multipage_cut-2.pdf') expect(file.page_count).to eq(2) model = response.inference.model expect(model).not_to be_nil - expect(model).to be_a(Mindee::Parsing::V2::InferenceModel) + expect(model).to be_a(Mindee::V2::Parsing::InferenceModel) expect(model.id).to eq(model_id) active_options = response.inference.active_options expect(active_options).not_to be_nil - expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions) + expect(active_options).to be_a(Mindee::V2::Parsing::InferenceActiveOptions) expect(active_options.raw_text).to eq(true) expect(active_options.polygon).to eq(false) expect(active_options.confidence).to eq(false) @@ -68,7 +72,7 @@ src_path = File.join(V1_PRODUCT_DATA_DIR, 'financial_document', 'default_sample.jpg') input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'default_sample.jpg') - inference_params = Mindee::Input::InferenceParameters.new( + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( model_id, raw_text: false, polygon: false, @@ -77,23 +81,23 @@ file_alias: 'rb_integration_test' ) - response = client.enqueue_and_get_inference(input, inference_params) + response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params) expect(response).not_to be_nil file = response.inference.file expect(file).not_to be_nil - expect(file).to be_a(Mindee::Parsing::V2::InferenceFile) + expect(file).to be_a(Mindee::V2::Parsing::InferenceFile) expect(file.name).to eq('default_sample.jpg') expect(file.page_count).to eq(1) model = response.inference.model expect(model).not_to be_nil - expect(model).to be_a(Mindee::Parsing::V2::InferenceModel) + expect(model).to be_a(Mindee::V2::Parsing::InferenceModel) expect(model.id).to eq(model_id) active_options = response.inference.active_options expect(active_options).not_to be_nil - expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions) + expect(active_options).to be_a(Mindee::V2::Parsing::InferenceActiveOptions) expect(active_options.raw_text).to eq(false) expect(active_options.polygon).to eq(false) expect(active_options.confidence).to eq(false) @@ -116,10 +120,10 @@ src_path = File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf') input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'blank_1.pdf') - inference_params = Mindee::Input::InferenceParameters.new('INVALID_MODEL_ID') + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new('INVALID_MODEL_ID') expect do - client.enqueue_inference(input, inference_params) + client.enqueue(Mindee::V2::Product::Extraction::Extraction, input, inference_params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(422) } end @@ -127,13 +131,13 @@ src_path = File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf') input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'blank_1.pdf') - params = Mindee::Input::InferenceParameters.new( + params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( model_id, webhook_ids: ['INVALID_WEBHOOK_ID'] ) expect do - client.enqueue_inference(input, params) + client.enqueue(Mindee::V2::Product::Extraction::Extraction, input, params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(422) expect(e.code).to start_with('422-') @@ -148,13 +152,13 @@ src_path = File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf') input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'blank_1.pdf') - inference_params = Mindee::Input::InferenceParameters.new( + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( model_id, webhook_ids: ['fc405e37-4ba4-4d03-aeba-533a8d1f0f21', 'fc405e37-4ba4-4d03-aeba-533a8d1f0f21'] ) expect do - client.enqueue_inference(input, inference_params) + client.enqueue(Mindee::V2::Product::Extraction::Extraction, input, inference_params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(422) expect(e.code).to start_with('422-') @@ -162,13 +166,13 @@ expect(e.title).to_not be_nil expect(e.errors).to be_an_instance_of(Array) expect(e.errors.count).to be_positive - expect(e.errors[0]).to be_an_instance_of(Mindee::Parsing::V2::ErrorItem) + expect(e.errors[0]).to be_an_instance_of(Mindee::V2::Parsing::ErrorItem) } end it 'raises MindeeHTTPErrorV2 on invalid job id' do expect do - client.get_inference('INVALID_JOB_ID') + client.get_result(Mindee::V2::Product::Extraction::Extraction, 'INVALID_JOB_ID') end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(422) expect(e.code).to start_with('422-') @@ -187,7 +191,7 @@ 'default_sample.jpg' ) - inference_params = Mindee::Input::InferenceParameters.new( + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( 'fc405e37-4ba4-4d03-aeba-533a8d1f0f21', raw_text: false, polygon: false, @@ -195,7 +199,7 @@ rag: false, file_alias: 'rb_integration_test' ) - client.enqueue_and_get_inference(input, inference_params) + client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(404) expect(e.code).to start_with('404-') @@ -210,9 +214,9 @@ it 'parses an URL input source without errors' do url_input = Mindee::Input::Source::URLInputSource.new(blank_pdf_url) - inference_params = Mindee::Input::InferenceParameters.new(model_id) + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new(model_id) - response = client.enqueue_and_get_inference(url_input, inference_params) + response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, url_input, inference_params) expect(response).not_to be_nil expect(response.inference).not_to be_nil @@ -225,7 +229,7 @@ 'data_schema_replace_param.json')) input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf')) - inference_params = Mindee::Input::InferenceParameters.new( + inference_params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( model_id, raw_text: false, polygon: false, @@ -235,17 +239,17 @@ data_schema: data_schema_replace ) - response = client.enqueue_and_get_inference(input, inference_params) + response = client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input, inference_params) expect(response).not_to be_nil model = response.inference.model expect(model).not_to be_nil - expect(model).to be_a(Mindee::Parsing::V2::InferenceModel) + expect(model).to be_a(Mindee::V2::Parsing::InferenceModel) expect(model.id).to eq(model_id) active_options = response.inference.active_options expect(active_options).not_to be_nil - expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions) + expect(active_options).to be_a(Mindee::V2::Parsing::InferenceActiveOptions) expect(active_options.raw_text).to eq(false) expect(active_options.polygon).to eq(false) expect(active_options.confidence).to eq(false) diff --git a/spec/v2/client_v2_spec.rb b/spec/v2/client_v2_spec.rb index 57c3c2fa0..46f532239 100644 --- a/spec/v2/client_v2_spec.rb +++ b/spec/v2/client_v2_spec.rb @@ -2,8 +2,12 @@ require 'json' require 'mindee' +require 'mindee/v2/product' require_relative '../http/mock_http_response' # <- the original helper +using Mindee::V2::Product::Extraction::Params +using Mindee::V2::Product::Extraction + describe Mindee::ClientV2 do let(:input_doc) { Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank.pdf')) } let(:base_url) { 'https://dummy-url' } @@ -45,12 +49,12 @@ def stub_next_request_with(method, hash:, status_code: 0) it 'enqueue(path) raises MindeeHTTPErrorV2 on 4xx' do expect do stub_next_request_with(:enqueue, hash: JSON.generate(json400)) - inference_params = Mindee::Input::InferenceParameters.new( + params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( 'dummy-model', raw_text: false, text_context: 'Hello my name is mud.' ) - client.enqueue_inference(input_doc, inference_params) + client.enqueue(Mindee::V2::Product::Extraction::Extraction, input_doc, params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(400) expect(e.detail).to eq('Unsupported content.') @@ -60,8 +64,8 @@ def stub_next_request_with(method, hash:, status_code: 0) it 'enqueue_and_get_inference(path) raises MindeeHTTPErrorV2 on 4xx' do expect do stub_next_request_with(:enqueue, hash: JSON.generate(json400)) - inference_params = Mindee::Input::InferenceParameters.new('dummy-model') - client.enqueue_and_get_inference(input_doc, inference_params) + params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new('dummy-model') + client.enqueue_and_get_result(Mindee::V2::Product::Extraction::Extraction, input_doc, params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(400) expect(e.detail).to eq('Unsupported content.') @@ -73,8 +77,8 @@ def stub_next_request_with(method, hash:, status_code: 0) expect do stub_next_request_with(:enqueue, hash: JSON.generate(error_hash)) - inference_params = Mindee::Input::InferenceParameters.new('dummy-model') - client.enqueue_inference(input_doc, inference_params) + params = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new('dummy-model') + client.enqueue(Mindee::V2::Product::Extraction::Extraction, input_doc, params) end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e| expect(e.status).to eq(413) expect(e.detail).to include('File exceeds size limit') @@ -84,10 +88,11 @@ def stub_next_request_with(method, hash:, status_code: 0) it 'get_job(job_id) returns a fully-formed JobResponse' do json_path = File.join(V2_DATA_DIR, 'job', 'ok_processing.json') parsed = File.read(json_path) - stub_next_request_with(:inference_job_req_get, hash: parsed, status_code: 200) + stub_next_request_with(:poll, hash: parsed, status_code: 200) resp = client.get_job('123e4567-e89b-12d3-a456-426614174000') - expect(resp).to be_a(Mindee::Parsing::V2::JobResponse) + expect(resp).to be_a(Mindee::V2::Parsing::JobResponse) + expect(resp.raw_http).to eq(JSON.generate(JSON.parse(parsed))) expect(resp.job.status).to eq('Processing') expect( resp.job.created_at.strftime('%Y-%m-%dT%H:%M:%S.%6N') @@ -98,10 +103,11 @@ def stub_next_request_with(method, hash:, status_code: 0) it 'should deserialize a job properly' do json_path = File.join(V2_DATA_DIR, 'job', 'ok_processed_webhooks_ok.json') parsed = File.read(json_path) - stub_next_request_with(:inference_job_req_get, hash: parsed, status_code: 200) + stub_next_request_with(:poll, hash: parsed, status_code: 200) resp = client.get_job('123e4567-e89b-12d3-a456-426614174000') - expect(resp).to be_a(Mindee::Parsing::V2::JobResponse) + expect(resp).to be_a(Mindee::V2::Parsing::JobResponse) + expect(resp.raw_http).to eq(JSON.generate(JSON.parse(parsed))) expect(resp.job.status).to eq('Processed') expect(resp.job.model_id).to eq('87654321-4321-4321-4321-CBA987654321') expect(resp.job.filename).to eq('default_sample.jpg') diff --git a/spec/v2/input/local_response_v2_spec.rb b/spec/v2/input/local_response_v2_spec.rb index ee5dd549d..7cf1d54bd 100644 --- a/spec/v2/input/local_response_v2_spec.rb +++ b/spec/v2/input/local_response_v2_spec.rb @@ -11,8 +11,8 @@ def assert_local_response(local_response) dummy_secret_key, 'invalid signature' )).to be(false) expect(local_response.get_hmac_signature(dummy_secret_key)).to eq(signature) - inference_response = local_response.deserialize_response(Mindee::Parsing::V2::InferenceResponse) - expect(inference_response).to be_a(Mindee::Parsing::V2::InferenceResponse) + inference_response = local_response.deserialize_response(Mindee::V2::Product::Extraction::ExtractionResponse) + expect(inference_response).to be_a(Mindee::V2::Product::Extraction::ExtractionResponse) expect(inference_response).not_to be_nil expect(inference_response.inference).not_to be_nil end diff --git a/spec/v2/parser_spec.rb b/spec/v2/parser_spec.rb new file mode 100644 index 000000000..97d2dc812 --- /dev/null +++ b/spec/v2/parser_spec.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require 'json' +require_relative '../../bin/v2/parser' + +describe MindeeCLI::V2Parser do + subject(:parser) { described_class.new([]) } + + it 'keeps already parsed raw payloads unchanged' do + payload = { 'api_request' => { 'status' => 'success' } } + expect(parser.__send__(:raw_payload, payload)).to eq(payload) + end + + it 'parses JSON string payloads' do + payload = { 'api_request' => { 'status' => 'success' } } + expect(parser.__send__(:raw_payload, JSON.generate(payload))).to eq(payload) + end + + it 'parses double-encoded JSON string payloads once more' do + payload = { 'api_request' => { 'status' => 'success' } } + double_encoded = JSON.generate(JSON.generate(payload)) + expect(parser.__send__(:raw_payload, double_encoded)).to eq(payload) + end +end diff --git a/spec/v2/parsing/common_response_spec.rb b/spec/v2/parsing/common_response_spec.rb new file mode 100644 index 000000000..530c4dc23 --- /dev/null +++ b/spec/v2/parsing/common_response_spec.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee' +require 'mindee/v2/parsing/common_response' + +describe Mindee::V2::Parsing::CommonResponse do + it 'stores raw_http as a JSON string' do + server_response = { + 'api_request' => { 'status' => 'success' }, + 'job' => { 'status' => 'Processing' }, + } + + response = described_class.new(server_response) + expect(response.raw_http).to be_a(String) + expect(JSON.parse(response.raw_http)).to eq(server_response) + end +end diff --git a/spec/v2/parsing/inference_spec.rb b/spec/v2/parsing/inference_spec.rb index 31f2c2b04..d0a4fe1e6 100644 --- a/spec/v2/parsing/inference_spec.rb +++ b/spec/v2/parsing/inference_spec.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/parsing/field' describe 'inference' do let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } @@ -19,13 +20,13 @@ def load_v2_inference(resource_path) local_response = Mindee::Input::LocalResponse.new(resource_path) - local_response.deserialize_response(Mindee::Parsing::V2::InferenceResponse) + local_response.deserialize_response(Mindee::V2::Product::Extraction::ExtractionResponse) end - simple_field = Mindee::Parsing::V2::Field::SimpleField - object_field = Mindee::Parsing::V2::Field::ObjectField - list_field = Mindee::Parsing::V2::Field::ListField - field_confidence = Mindee::Parsing::V2::Field::FieldConfidence + simple_field = Mindee::V2::Parsing::Field::SimpleField + object_field = Mindee::V2::Parsing::Field::ObjectField + list_field = Mindee::V2::Parsing::Field::ListField + field_confidence = Mindee::V2::Parsing::Field::FieldConfidence describe 'simple' do it 'loads a blank inference with valid properties' do @@ -33,7 +34,7 @@ def load_v2_inference(resource_path) fields = response.inference.result.fields expect(fields).not_to be_empty - expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + expect(fields).to be_a(Mindee::V2::Parsing::Field::InferenceFields) expect(fields.size).to eq(21) expect(fields).to have_key('taxes') @@ -64,7 +65,7 @@ def load_v2_inference(resource_path) inference = response.inference job = inference.job expect(job).not_to be_nil - expect(job).to be_a(Mindee::Parsing::V2::InferenceJob) + expect(job).to be_a(Mindee::V2::Parsing::InferenceJob) expect(job.id).to eq('12345678-1234-1234-1234-jobid1234567') expect(inference).not_to be_nil @@ -191,7 +192,7 @@ def load_standard_fields expect(active_options.raw_text).to eq(true) fields = response.inference.result.fields - expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + expect(fields).to be_a(Mindee::V2::Parsing::Field::InferenceFields) fields end @@ -291,13 +292,13 @@ def load_standard_fields raw_text = response.inference.result.raw_text expect(raw_text).not_to be_nil - expect(raw_text).to be_a(Mindee::Parsing::V2::RawText) + expect(raw_text).to be_a(Mindee::V2::Parsing::RawText) expect(raw_text.to_s).to eq(File.read(raw_text_str_path, encoding: 'UTF-8')) expect(raw_text.pages.length).to eq(2) first = raw_text.pages.first - expect(first).to be_a(Mindee::Parsing::V2::RawTextPage) + expect(first).to be_a(Mindee::V2::Parsing::RawTextPage) expect(first.content).to eq('This is the raw text of the first page...') raw_text.pages.each do |page| diff --git a/spec/v2/parsing/job_webhook_spec.rb b/spec/v2/parsing/job_webhook_spec.rb index b07e62635..3fbead365 100644 --- a/spec/v2/parsing/job_webhook_spec.rb +++ b/spec/v2/parsing/job_webhook_spec.rb @@ -1,8 +1,9 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/parsing/job_webhook' -describe Mindee::Parsing::V2::JobWebhook do +describe Mindee::V2::Parsing::JobWebhook do describe '#initialize' do context 'when error key is present but value is nil' do it 'does not raise an error and sets @error to nil' do @@ -52,7 +53,7 @@ expect(webhook.id).to eq('12345678-1234-1234-1234-123456789012') expect(webhook.status).to eq('Failed') - expect(webhook.error).to be_a(Mindee::Parsing::V2::ErrorResponse) + expect(webhook.error).to be_a(Mindee::V2::Parsing::ErrorResponse) expect(webhook.error.status).to eq(500) expect(webhook.error.detail).to eq('Internal server error') end diff --git a/spec/v2/input/inference_parameter_spec.rb b/spec/v2/product/extraction/extraction_parameter_spec.rb similarity index 72% rename from spec/v2/input/inference_parameter_spec.rb rename to spec/v2/product/extraction/extraction_parameter_spec.rb index bd4ae299c..59289cffc 100644 --- a/spec/v2/input/inference_parameter_spec.rb +++ b/spec/v2/product/extraction/extraction_parameter_spec.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require 'mindee/input/inference_parameters' require 'mindee/input/data_schema' +require 'mindee/v2/product/extraction/params/extraction_parameters' -describe Mindee::Input::InferenceParameters do +describe Mindee::V2::Product::Extraction::Params::ExtractionParameters do let(:extracted_schema_content) do File.read(File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) end @@ -14,14 +14,14 @@ describe 'Data Schema' do describe "shouldn't replace when unset" do it 'should initialize with a data schema' do - param = Mindee::Input::InferenceParameters.new( + param = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( 'dummy-model' ) expect(param.data_schema).to be_nil end it 'should initialize with string' do - param = Mindee::Input::InferenceParameters.new( + param = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( 'dummy-model', data_schema: extracted_schema_str ) @@ -29,7 +29,7 @@ end it 'should initialize with hash' do - param = Mindee::Input::InferenceParameters.new( + param = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( 'dummy-model', data_schema: extracted_schema_hash ) @@ -37,7 +37,7 @@ end it 'should initialize with DataSchema object' do - param = Mindee::Input::InferenceParameters.new( + param = Mindee::V2::Product::Extraction::Params::ExtractionParameters.new( 'dummy-model', data_schema: extracted_schema_object ) diff --git a/spec/v2/product/extraction/extraction_spec.rb b/spec/v2/product/extraction/extraction_spec.rb index 352a822f8..4a676f192 100644 --- a/spec/v2/product/extraction/extraction_spec.rb +++ b/spec/v2/product/extraction/extraction_spec.rb @@ -3,6 +3,11 @@ require 'mindee' require 'mindee/input/local_response' require 'mindee/v2/product' +require 'mindee/v2/parsing/field' +require 'mindee/v2/product/extraction/extraction_response' + +using Mindee::V2::Product::Extraction +using Mindee::V2::Parsing::Field describe 'extraction' do let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } @@ -24,10 +29,10 @@ def load_v2_extraction_inference(resource_path) local_response.deserialize_response(Mindee::V2::Product::Extraction::ExtractionResponse) end - simple_field = Mindee::Parsing::V2::Field::SimpleField - object_field = Mindee::Parsing::V2::Field::ObjectField - list_field = Mindee::Parsing::V2::Field::ListField - field_confidence = Mindee::Parsing::V2::Field::FieldConfidence + simple_field = Mindee::V2::Parsing::Field::SimpleField + object_field = Mindee::V2::Parsing::Field::ObjectField + list_field = Mindee::V2::Parsing::Field::ListField + field_confidence = Mindee::V2::Parsing::Field::FieldConfidence describe 'simple' do it 'loads a blank extraction inference with valid properties' do @@ -35,7 +40,7 @@ def load_v2_extraction_inference(resource_path) fields = response.inference.result.fields expect(fields).not_to be_empty - expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + expect(fields).to be_a(Mindee::V2::Parsing::Field::InferenceFields) expect(fields.size).to eq(21) expect(fields).to have_key('taxes') @@ -66,7 +71,7 @@ def load_v2_extraction_inference(resource_path) inference = response.inference job = inference.job expect(job).not_to be_nil - expect(job).to be_a(Mindee::Parsing::V2::InferenceJob) + expect(job).to be_a(Mindee::V2::Parsing::InferenceJob) expect(job.id).to eq('12345678-1234-1234-1234-jobid1234567') expect(inference).not_to be_nil @@ -193,7 +198,7 @@ def load_standard_fields expect(active_options.raw_text).to eq(true) fields = response.inference.result.fields - expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + expect(fields).to be_a(Mindee::V2::Parsing::Field::InferenceFields) fields end @@ -293,13 +298,13 @@ def load_standard_fields raw_text = response.inference.result.raw_text expect(raw_text).not_to be_nil - expect(raw_text).to be_a(Mindee::Parsing::V2::RawText) + expect(raw_text).to be_a(Mindee::V2::Parsing::RawText) expect(raw_text.to_s).to eq(File.read(raw_text_str_path, encoding: 'UTF-8')) expect(raw_text.pages.length).to eq(2) first = raw_text.pages.first - expect(first).to be_a(Mindee::Parsing::V2::RawTextPage) + expect(first).to be_a(Mindee::V2::Parsing::RawTextPage) expect(first.content).to eq('This is the raw text of the first page...') raw_text.pages.each do |page|