diff --git a/src/FileFormats/MPS/read.jl b/src/FileFormats/MPS/read.jl index 8fe37111e8..a3436184e1 100644 --- a/src/FileFormats/MPS/read.jl +++ b/src/FileFormats/MPS/read.jl @@ -156,9 +156,7 @@ end HEADER_INDICATORS, ) -# `Headers` gets called _alot_ (on every line), so we try very hard to be -# efficient. -function Headers(s) +function parse_single_header(s::AbstractString) N = length(s) x = first(s) if N == 3 @@ -180,7 +178,7 @@ function Headers(s) return HEADER_ENDATA end elseif N == 7 - if (x == 'C' || x == 'c') && (uppercase(s) == "COLUMNS") + if (x == 'C' || x == 'c') && uppercase(s) == "COLUMNS" return HEADER_COLUMNS elseif (x == 'Q' || x == 'q') header = uppercase(s) @@ -190,34 +188,127 @@ function Headers(s) return HEADER_QMATRIX end end - elseif N >= 8 - if (x == 'O' || x == 'o') && startswith(uppercase(s), "OBJSENSE") + elseif N == 8 + if (x == 'O' || x == 'o') && uppercase(s) == "OBJSENSE" return HEADER_OBJSENSE - elseif (x == 'Q' || x == 'q') - header = uppercase(s) - if startswith(header, "QCMATRIX") - return HEADER_QCMATRIX - elseif startswith(header, "QSECTION") - return HEADER_QSECTION + end + elseif N == 10 + if (x == 'I' || x == 'i') && uppercase(s) == "INDICATORS" + return HEADER_INDICATORS + end + end + return HEADER_UNKNOWN +end + +function parse_double_header(s::AbstractString) + N = length(s) + x = first(s) + if N != 8 + return HEADER_UNKNOWN + elseif (x == 'O' || x == 'o') && uppercase(s) == "OBJSENSE" + return HEADER_OBJSENSE + elseif (x == 'Q' || x == 'q') + header = uppercase(s) + if startswith(header, "QCMATRIX") + return HEADER_QCMATRIX + elseif startswith(header, "QSECTION") + return HEADER_QSECTION + end + end + return HEADER_UNKNOWN +end + +""" + LineToItems(line::String) + +Split on any whitespace characters. We can't split only on `' '` because at +least one models in MIPLIB has `\t` as a separator. + +This decision assumes that we are parsing a free MPS file, where whitespace is +disallowed in names. If this ever becomes a problem, we could change to the +fixed MPS format, where the files are split at the usual offsets. + +This function is a more performant version of: +```julia +LineToItems(line::String) = split(line, r"\\s"; keepempty = false) +``` +""" +struct LineToItems + line::String + nfields::Int + fields::NTuple{5,UnitRange{Int}} + + function LineToItems(line::String) + nfields, f1, f2, f3, f4, f5 = 0, 0:0, 0:0, 0:0, 0:0, 0:0 + start, in_field = -1, false + n = ncodeunits(line) + for i in 1:n + if isspace(line[i]) + if in_field + nfields += 1 + if nfields == 1 + f1 = start:(i-1) + elseif nfields == 2 + f2 = start:(i-1) + elseif nfields == 3 + f3 = start:(i-1) + elseif nfields == 4 + f4 = start:(i-1) + elseif nfields == 5 + f5 = start:(i-1) + end + in_field = false + end + elseif !in_field + start = i + in_field = true end - elseif N == 10 - if (x == 'I' || x == 'i') && uppercase(s) == "INDICATORS" - return HEADER_INDICATORS + end + if in_field + nfields += 1 + if nfields == 1 + f1 = start:n + elseif nfields == 2 + f2 = start:n + elseif nfields == 3 + f3 = start:n + elseif nfields == 4 + f4 = start:n + elseif nfields == 5 + f5 = start:n end end + return new(line, nfields, (f1, f2, f3, f4, f5)) end - return HEADER_UNKNOWN end -function line_to_items(line) - # Split on any whitespace characters. We can't split only on `' '` because - # at least one models in MIPLIB has `\t` as a separator. - # - # This decision assumes that we are parsing a free MPS file, where - # whitespace is disallowed in names. If this ever becomes a problem, we - # could change to the fixed MPS format, where the files are split at the - # usual offsets. - return split(line, r"\s"; keepempty = false) +Base.length(x::LineToItems) = x.nfields + +function Base.getindex(x::LineToItems, i::Int) + if !(1 <= i <= min(5, x.nfields)) + throw(BoundsError(x, i)) + end + return SubString(x.line, x.fields[i]) +end + +Base.iterate(x::LineToItems) = iterate(x, 1) + +function Base.iterate(x::LineToItems, i) + if i > x.nfields + return nothing + end + return x[i], i + 1 +end + +# `parse_header` gets called _alot_ (on every line), so we try very hard to be +# efficient. +function parse_header(s::LineToItems) + if length(s) == 1 + return parse_single_header(s[1]) + elseif length(s) == 2 + return parse_double_header(s[1]) + end + return HEADER_UNKNOWN end """ @@ -237,13 +328,12 @@ function Base.read!(io::IO, model::Model{T}) where {T} if startswith(data.contents, '*') continue # Lines starting with `*` are comments end - line = string(strip(data.contents)) - if isempty(line) + items = LineToItems(data.contents) + if length(items) == 0 continue # Skip blank lines end - h = Headers(line) + h = parse_header(items) if h == HEADER_OBJSENSE - items = line_to_items(line) if length(items) == 2 sense = uppercase(items[2]) if !(sense in ("MIN", "MAX")) @@ -258,7 +348,6 @@ function Base.read!(io::IO, model::Model{T}) where {T} end continue elseif h == HEADER_QCMATRIX || h == HEADER_QSECTION - items = line_to_items(line) if length(items) != 2 _throw_parse_error( data, @@ -274,10 +363,8 @@ function Base.read!(io::IO, model::Model{T}) where {T} continue end # Otherwise, carry on with the previous header - # TODO: split into hard fields based on column indices. - items = line_to_items(line) if header == HEADER_NAME - parse_name_line(data, line) + parse_name_line(data) elseif header == HEADER_OBJSENSE sense = uppercase(only(items)) if !(sense in ("MIN", "MAX")) @@ -490,8 +577,8 @@ end # NAME # ============================================================================== -function parse_name_line(data::TempMPSModel, line) - m = match(r"^\s*NAME(.*)"i, line) +function parse_name_line(data::TempMPSModel) + m = match(r"^\s*NAME(.*)"i, data.contents) if m === nothing _throw_parse_error( data, @@ -506,7 +593,7 @@ end # ROWS # ============================================================================== -function parse_rows_line(data::TempMPSModel{T}, items::Vector) where {T} +function parse_rows_line(data::TempMPSModel{T}, items) where {T} if length(items) < 2 _throw_parse_error( data, @@ -619,7 +706,7 @@ function _set_intorg(data::TempMPSModel{T}, column, column_name) where {T} return end -function parse_columns_line(data::TempMPSModel{T}, items::Vector) where {T} +function parse_columns_line(data::TempMPSModel{T}, items) where {T} if length(items) == 3 # [column name] [row name] [value] column_name, row_name, value = items @@ -657,7 +744,7 @@ end # RHS # ============================================================================== -function parse_single_rhs(data, row_name, value, items::Vector) +function parse_single_rhs(data, row_name, value, items) if row_name == data.obj_name data.obj_constant = value return @@ -688,7 +775,7 @@ function parse_single_rhs(data, row_name, value, items::Vector) end # TODO: handle multiple RHS vectors. -function parse_rhs_line(data::TempMPSModel{T}, items::Vector) where {T} +function parse_rhs_line(data::TempMPSModel{T}, items) where {T} if length(items) == 3 # [rhs name] [row name] [value] rhs_name, row_name, value = items @@ -744,7 +831,7 @@ function parse_single_range(data, row_name, value) end # TODO: handle multiple RANGES vectors. -function parse_ranges_line(data::TempMPSModel{T}, items::Vector) where {T} +function parse_ranges_line(data::TempMPSModel{T}, items) where {T} if length(items) == 3 # [rhs name] [row name] [value] _, row_name, value = items @@ -859,7 +946,7 @@ function _parse_single_bound( end end -function parse_bounds_line(data::TempMPSModel{T}, items::Vector) where {T} +function parse_bounds_line(data::TempMPSModel{T}, items) where {T} if length(items) == 3 bound_type, _, column_name = items _parse_single_bound(data, column_name, bound_type) diff --git a/test/FileFormats/MPS/test_MPS.jl b/test/FileFormats/MPS/test_MPS.jl index 382f44be7c..369f1fa0c5 100644 --- a/test/FileFormats/MPS/test_MPS.jl +++ b/test/FileFormats/MPS/test_MPS.jl @@ -1126,11 +1126,12 @@ function test_parse_name_line() " NAME foo" => "foo", "" => nothing, ) + data.contents = line data.name = "_" if name === nothing - @test_throws MPS.ParseError MPS.parse_name_line(data, line) + @test_throws MPS.ParseError MPS.parse_name_line(data) else - MPS.parse_name_line(data, line) + MPS.parse_name_line(data) @test data.name == name end end @@ -1702,12 +1703,12 @@ function test_issue_2792() end function test_issue_2797_tab() - @test MPS.line_to_items("a b") == ["a", "b"] - @test MPS.line_to_items(" a b") == ["a", "b"] - @test MPS.line_to_items("a\tb") == ["a", "b"] - @test MPS.line_to_items("a\tb") == ["a", "b"] - @test MPS.line_to_items("a\t b") == ["a", "b"] - @test MPS.line_to_items(" a \t b c ") == ["a", "b", "c"] + @test MPS.LineToItems("a b") |> collect == ["a", "b"] + @test MPS.LineToItems(" a b") |> collect == ["a", "b"] + @test MPS.LineToItems("a\tb") |> collect == ["a", "b"] + @test MPS.LineToItems("a\tb") |> collect == ["a", "b"] + @test MPS.LineToItems("a\t b") |> collect == ["a", "b"] + @test MPS.LineToItems(" a \t b c ") |> collect == ["a", "b", "c"] return end @@ -1728,6 +1729,63 @@ function test_unsupported_objectives() return end +function test_LineToItems() + for line in [ + "a", + " a ", + "a b", + " a b ", + "a b c", + " a b c ", + "a b c d", + " a b c d ", + "a b c d e", + " a b c d e ", + ] + @test collect(MPS.LineToItems(line)) == + split(line, ' '; keepempty = false) + end + items = MPS.LineToItems("a b c d e f g") + @test length(items) == 7 + @test_throws BoundsError items[0] + @test items[1] == "a" + @test_throws BoundsError items[6] + items = MPS.LineToItems("a b") + @test length(items) == 2 + @test_throws BoundsError items[3] + return +end + +function test_parse_header() + for (line, header) in [ + "OBJSENSE" => MPS.HEADER_OBJSENSE, + "OBJSENSE MAX" => MPS.HEADER_OBJSENSE, + "ROWS" => MPS.HEADER_ROWS, + "COLUMNS" => MPS.HEADER_COLUMNS, + "RHS" => MPS.HEADER_RHS, + "RANGES" => MPS.HEADER_RANGES, + "BOUNDS" => MPS.HEADER_BOUNDS, + "SOS" => MPS.HEADER_SOS, + "ENDATA" => MPS.HEADER_ENDATA, + "QUADOBJ" => MPS.HEADER_QUADOBJ, + "QMATRIX" => MPS.HEADER_QMATRIX, + "QCMATRIX c" => MPS.HEADER_QCMATRIX, + "QSECTION c" => MPS.HEADER_QSECTION, + "INDICATORS" => MPS.HEADER_INDICATORS, + "" => MPS.HEADER_UNKNOWN, + "FOO" => MPS.HEADER_UNKNOWN, + "RHS X" => MPS.HEADER_UNKNOWN, + "QDMATRIX X" => MPS.HEADER_UNKNOWN, + "RHS X 1" => MPS.HEADER_UNKNOWN, + ] + items = MPS.LineToItems(line) + @test header == MPS.parse_header(items) + items = MPS.LineToItems(lowercase(line)) + @test header == MPS.parse_header(items) + end + return +end + end # TestMPS TestMPS.runtests()