Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 128 additions & 41 deletions src/FileFormats/MPS/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,7 @@ end
HEADER_INDICATORS,
)

# `Headers` gets called _alot_ (on every line), so we try very hard to be
# efficient.
function Headers(s)
function parse_single_header(s::AbstractString)
N = length(s)
x = first(s)
if N == 3
Expand All @@ -180,7 +178,7 @@ function Headers(s)
return HEADER_ENDATA
end
elseif N == 7
if (x == 'C' || x == 'c') && (uppercase(s) == "COLUMNS")
if (x == 'C' || x == 'c') && uppercase(s) == "COLUMNS"
return HEADER_COLUMNS
elseif (x == 'Q' || x == 'q')
header = uppercase(s)
Expand All @@ -190,34 +188,127 @@ function Headers(s)
return HEADER_QMATRIX
end
end
elseif N >= 8
if (x == 'O' || x == 'o') && startswith(uppercase(s), "OBJSENSE")
elseif N == 8
if (x == 'O' || x == 'o') && uppercase(s) == "OBJSENSE"
return HEADER_OBJSENSE
elseif (x == 'Q' || x == 'q')
header = uppercase(s)
if startswith(header, "QCMATRIX")
return HEADER_QCMATRIX
elseif startswith(header, "QSECTION")
return HEADER_QSECTION
end
elseif N == 10
if (x == 'I' || x == 'i') && uppercase(s) == "INDICATORS"
return HEADER_INDICATORS
end
end
return HEADER_UNKNOWN
end

function parse_double_header(s::AbstractString)
N = length(s)
x = first(s)
if N != 8
return HEADER_UNKNOWN
elseif (x == 'O' || x == 'o') && uppercase(s) == "OBJSENSE"
return HEADER_OBJSENSE
elseif (x == 'Q' || x == 'q')
header = uppercase(s)
if startswith(header, "QCMATRIX")
return HEADER_QCMATRIX
elseif startswith(header, "QSECTION")
return HEADER_QSECTION
end
end
return HEADER_UNKNOWN
end

"""
LineToItems(line::String)

Split on any whitespace characters. We can't split only on `' '` because at
least one models in MIPLIB has `\t` as a separator.

This decision assumes that we are parsing a free MPS file, where whitespace is
disallowed in names. If this ever becomes a problem, we could change to the
fixed MPS format, where the files are split at the usual offsets.

This function is a more performant version of:
```julia
LineToItems(line::String) = split(line, r"\\s"; keepempty = false)
```
"""
struct LineToItems
line::String
nfields::Int
fields::NTuple{5,UnitRange{Int}}

function LineToItems(line::String)
nfields, f1, f2, f3, f4, f5 = 0, 0:0, 0:0, 0:0, 0:0, 0:0
start, in_field = -1, false
n = ncodeunits(line)
for i in 1:n
if isspace(line[i])
if in_field
nfields += 1
if nfields == 1
f1 = start:(i-1)
elseif nfields == 2
f2 = start:(i-1)
elseif nfields == 3
f3 = start:(i-1)
elseif nfields == 4
f4 = start:(i-1)
elseif nfields == 5
f5 = start:(i-1)
end
in_field = false
end
elseif !in_field
start = i
in_field = true
end
elseif N == 10
if (x == 'I' || x == 'i') && uppercase(s) == "INDICATORS"
return HEADER_INDICATORS
end
if in_field
nfields += 1
if nfields == 1
f1 = start:n
elseif nfields == 2
f2 = start:n
elseif nfields == 3
f3 = start:n
elseif nfields == 4
f4 = start:n
elseif nfields == 5
f5 = start:n
end
end
return new(line, nfields, (f1, f2, f3, f4, f5))
end
return HEADER_UNKNOWN
end

function line_to_items(line)
# Split on any whitespace characters. We can't split only on `' '` because
# at least one models in MIPLIB has `\t` as a separator.
#
# This decision assumes that we are parsing a free MPS file, where
# whitespace is disallowed in names. If this ever becomes a problem, we
# could change to the fixed MPS format, where the files are split at the
# usual offsets.
return split(line, r"\s"; keepempty = false)
Base.length(x::LineToItems) = x.nfields

function Base.getindex(x::LineToItems, i::Int)
if !(1 <= i <= min(5, x.nfields))
throw(BoundsError(x, i))
end
return SubString(x.line, x.fields[i])
end

Base.iterate(x::LineToItems) = iterate(x, 1)

function Base.iterate(x::LineToItems, i)
if i > x.nfields
return nothing
end
return x[i], i + 1
end

# `parse_header` gets called _alot_ (on every line), so we try very hard to be
# efficient.
function parse_header(s::LineToItems)
if length(s) == 1
return parse_single_header(s[1])
elseif length(s) == 2
return parse_double_header(s[1])
end
return HEADER_UNKNOWN
end

"""
Expand All @@ -237,13 +328,12 @@ function Base.read!(io::IO, model::Model{T}) where {T}
if startswith(data.contents, '*')
continue # Lines starting with `*` are comments
end
line = string(strip(data.contents))
if isempty(line)
items = LineToItems(data.contents)
if length(items) == 0
continue # Skip blank lines
end
h = Headers(line)
h = parse_header(items)
if h == HEADER_OBJSENSE
items = line_to_items(line)
if length(items) == 2
sense = uppercase(items[2])
if !(sense in ("MIN", "MAX"))
Expand All @@ -258,7 +348,6 @@ function Base.read!(io::IO, model::Model{T}) where {T}
end
continue
elseif h == HEADER_QCMATRIX || h == HEADER_QSECTION
items = line_to_items(line)
if length(items) != 2
_throw_parse_error(
data,
Expand All @@ -274,10 +363,8 @@ function Base.read!(io::IO, model::Model{T}) where {T}
continue
end
# Otherwise, carry on with the previous header
# TODO: split into hard fields based on column indices.
items = line_to_items(line)
if header == HEADER_NAME
parse_name_line(data, line)
parse_name_line(data)
elseif header == HEADER_OBJSENSE
sense = uppercase(only(items))
if !(sense in ("MIN", "MAX"))
Expand Down Expand Up @@ -490,8 +577,8 @@ end
# NAME
# ==============================================================================

function parse_name_line(data::TempMPSModel, line)
m = match(r"^\s*NAME(.*)"i, line)
function parse_name_line(data::TempMPSModel)
m = match(r"^\s*NAME(.*)"i, data.contents)
if m === nothing
_throw_parse_error(
data,
Expand All @@ -506,7 +593,7 @@ end
# ROWS
# ==============================================================================

function parse_rows_line(data::TempMPSModel{T}, items::Vector) where {T}
function parse_rows_line(data::TempMPSModel{T}, items) where {T}
if length(items) < 2
_throw_parse_error(
data,
Expand Down Expand Up @@ -619,7 +706,7 @@ function _set_intorg(data::TempMPSModel{T}, column, column_name) where {T}
return
end

function parse_columns_line(data::TempMPSModel{T}, items::Vector) where {T}
function parse_columns_line(data::TempMPSModel{T}, items) where {T}
if length(items) == 3
# [column name] [row name] [value]
column_name, row_name, value = items
Expand Down Expand Up @@ -657,7 +744,7 @@ end
# RHS
# ==============================================================================

function parse_single_rhs(data, row_name, value, items::Vector)
function parse_single_rhs(data, row_name, value, items)
if row_name == data.obj_name
data.obj_constant = value
return
Expand Down Expand Up @@ -688,7 +775,7 @@ function parse_single_rhs(data, row_name, value, items::Vector)
end

# TODO: handle multiple RHS vectors.
function parse_rhs_line(data::TempMPSModel{T}, items::Vector) where {T}
function parse_rhs_line(data::TempMPSModel{T}, items) where {T}
if length(items) == 3
# [rhs name] [row name] [value]
rhs_name, row_name, value = items
Expand Down Expand Up @@ -744,7 +831,7 @@ function parse_single_range(data, row_name, value)
end

# TODO: handle multiple RANGES vectors.
function parse_ranges_line(data::TempMPSModel{T}, items::Vector) where {T}
function parse_ranges_line(data::TempMPSModel{T}, items) where {T}
if length(items) == 3
# [rhs name] [row name] [value]
_, row_name, value = items
Expand Down Expand Up @@ -859,7 +946,7 @@ function _parse_single_bound(
end
end

function parse_bounds_line(data::TempMPSModel{T}, items::Vector) where {T}
function parse_bounds_line(data::TempMPSModel{T}, items) where {T}
if length(items) == 3
bound_type, _, column_name = items
_parse_single_bound(data, column_name, bound_type)
Expand Down
74 changes: 66 additions & 8 deletions test/FileFormats/MPS/test_MPS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1126,11 +1126,12 @@ function test_parse_name_line()
" NAME foo" => "foo",
"" => nothing,
)
data.contents = line
data.name = "_"
if name === nothing
@test_throws MPS.ParseError MPS.parse_name_line(data, line)
@test_throws MPS.ParseError MPS.parse_name_line(data)
else
MPS.parse_name_line(data, line)
MPS.parse_name_line(data)
@test data.name == name
end
end
Expand Down Expand Up @@ -1702,12 +1703,12 @@ function test_issue_2792()
end

function test_issue_2797_tab()
@test MPS.line_to_items("a b") == ["a", "b"]
@test MPS.line_to_items(" a b") == ["a", "b"]
@test MPS.line_to_items("a\tb") == ["a", "b"]
@test MPS.line_to_items("a\tb") == ["a", "b"]
@test MPS.line_to_items("a\t b") == ["a", "b"]
@test MPS.line_to_items(" a \t b c ") == ["a", "b", "c"]
@test MPS.LineToItems("a b") |> collect == ["a", "b"]
@test MPS.LineToItems(" a b") |> collect == ["a", "b"]
@test MPS.LineToItems("a\tb") |> collect == ["a", "b"]
@test MPS.LineToItems("a\tb") |> collect == ["a", "b"]
@test MPS.LineToItems("a\t b") |> collect == ["a", "b"]
@test MPS.LineToItems(" a \t b c ") |> collect == ["a", "b", "c"]
return
end

Expand All @@ -1728,6 +1729,63 @@ function test_unsupported_objectives()
return
end

function test_LineToItems()
for line in [
"a",
" a ",
"a b",
" a b ",
"a b c",
" a b c ",
"a b c d",
" a b c d ",
"a b c d e",
" a b c d e ",
]
@test collect(MPS.LineToItems(line)) ==
split(line, ' '; keepempty = false)
end
items = MPS.LineToItems("a b c d e f g")
@test length(items) == 7
@test_throws BoundsError items[0]
@test items[1] == "a"
@test_throws BoundsError items[6]
items = MPS.LineToItems("a b")
@test length(items) == 2
@test_throws BoundsError items[3]
return
end

function test_parse_header()
for (line, header) in [
"OBJSENSE" => MPS.HEADER_OBJSENSE,
"OBJSENSE MAX" => MPS.HEADER_OBJSENSE,
"ROWS" => MPS.HEADER_ROWS,
"COLUMNS" => MPS.HEADER_COLUMNS,
"RHS" => MPS.HEADER_RHS,
"RANGES" => MPS.HEADER_RANGES,
"BOUNDS" => MPS.HEADER_BOUNDS,
"SOS" => MPS.HEADER_SOS,
"ENDATA" => MPS.HEADER_ENDATA,
"QUADOBJ" => MPS.HEADER_QUADOBJ,
"QMATRIX" => MPS.HEADER_QMATRIX,
"QCMATRIX c" => MPS.HEADER_QCMATRIX,
"QSECTION c" => MPS.HEADER_QSECTION,
"INDICATORS" => MPS.HEADER_INDICATORS,
"" => MPS.HEADER_UNKNOWN,
"FOO" => MPS.HEADER_UNKNOWN,
"RHS X" => MPS.HEADER_UNKNOWN,
"QDMATRIX X" => MPS.HEADER_UNKNOWN,
"RHS X 1" => MPS.HEADER_UNKNOWN,
]
items = MPS.LineToItems(line)
@test header == MPS.parse_header(items)
items = MPS.LineToItems(lowercase(line))
@test header == MPS.parse_header(items)
end
return
end

end # TestMPS

TestMPS.runtests()
Loading