| | 1 | | """ |
| | 2 | | JSONWorksheet |
| | 3 | |
|
| | 4 | | construct 'Array{OrderedDict, 1}' for each row from Worksheet |
| | 5 | |
|
| | 6 | | # Constructors |
| | 7 | | ```julia |
| | 8 | | JSONWorksheet("Example.xlsx", "Sheet1") |
| | 9 | | JSONWorksheet("Example.xlsx", 1) |
| | 10 | |
|
| | 11 | | ``` |
| | 12 | | # Arguments |
| | 13 | | - `row_oriented` : if 'true'(the default) it will look for colum names in '1:1', if `false` it will look for colum names |
| | 14 | | - `start_line` : starting index of position of columnname. |
| | 15 | | - `squeeze` : squeezes all rows of Worksheet to a singe row. |
| | 16 | | - `delim` : a String or Regrex that of deliminator for converting single cell to array. |
| | 17 | |
|
| | 18 | | """ |
| | 19 | | mutable struct JSONWorksheet |
| 45 | 20 | | xlsxpath::String |
| | 21 | | pointer::Array{Pointer,1} |
| | 22 | | data::Array{T,1} where T |
| | 23 | | sheetname::String |
| | 24 | | end |
| | 25 | | function JSONWorksheet(xlsxpath, sheet, arr; |
| | 26 | | delim=";", squeeze=false) |
| 100 | 27 | | arr = dropemptyrange(arr) |
| 52 | 28 | | @assert !isempty(arr) "'$(xlsxpath)!$(sheet)' don't have valid column names, try change optional argument'start_line |
| | 29 | |
|
| 96 | 30 | | pointer = _column_to_pointer.(arr[1, :]) |
| 48 | 31 | | real_keys = map(el -> el.tokens, pointer) |
| | 32 | | # TODO more robust key validity check |
| 48 | 33 | | if !allunique(real_keys) |
| 3 | 34 | | throw(AssertionError("column names must be unique, check for duplication $(arr[1, :])")) |
| | 35 | | end |
| | 36 | |
|
| 45 | 37 | | if squeeze |
| 1 | 38 | | data = squeezerow_to_jsonarray(arr, pointer, delim) |
| | 39 | | else |
| 44 | 40 | | data = eachrow_to_jsonarray(arr, pointer, delim) |
| | 41 | | end |
| 43 | 42 | | JSONWorksheet(normpath(xlsxpath), pointer, data, String(sheet)) |
| | 43 | | end |
| | 44 | | function JSONWorksheet(xf::XLSX.XLSXFile, sheet; |
| | 45 | | start_line=1, |
| | 46 | | row_oriented=true, |
| | 47 | | delim=";", squeeze=false) |
| 86 | 48 | | ws = isa(sheet, Symbol) ? xf[String(sheet)] : xf[sheet] |
| 43 | 49 | | sheet = ws.name |
| | 50 | | # orientation handling |
| 0 | 51 | | ws = begin |
| 86 | 52 | | rg = XLSX.get_dimension(ws) |
| 43 | 53 | | if row_oriented |
| 42 | 54 | | rg = XLSX.CellRange(XLSX.CellRef(start_line, rg.start.column_number), rg.stop) |
| 42 | 55 | | dt = ws[rg] |
| | 56 | | else |
| 1 | 57 | | rg = XLSX.CellRange(XLSX.CellRef(rg.start.row_number, start_line), rg.stop) |
| 44 | 58 | | dt = permutedims(ws[rg]) |
| | 59 | | end |
| | 60 | | end |
| | 61 | |
|
| 43 | 62 | | JSONWorksheet(xf.filepath, sheet, ws; delim=delim, squeeze=squeeze) |
| | 63 | | end |
| | 64 | | function JSONWorksheet(xlsxpath, sheet; kwargs...) |
| 32 | 65 | | xf = XLSX.readxlsx(xlsxpath) |
| 16 | 66 | | x = JSONWorksheet(xf, sheet; kwargs...) |
| 9 | 67 | | close(xf) |
| 9 | 68 | | return x |
| | 69 | | end |
| | 70 | |
|
| | 71 | | function eachrow_to_jsonarray(data::Array{T,2}, pointers, delim) where T |
| 44 | 72 | | json = Array{OrderedDict,1}(undef, size(data, 1) - 1) |
| 88 | 73 | | Threads.@threads for i in 1:length(json) |
| 134 | 74 | | json[i] = row_to_jsonarray(data[i + 1, :], pointers, delim) |
| | 75 | | end |
| 42 | 76 | | return json |
| | 77 | | end |
| | 78 | |
|
| | 79 | | function row_to_jsonarray(row, pointers, delim) |
| 134 | 80 | | x = OrderedDict{String,Any}() |
| 268 | 81 | | for (col, p) in enumerate(pointers) |
| 1140 | 82 | | x[p] = collect_cell(p, row[col], delim) |
| | 83 | | end |
| 132 | 84 | | return x |
| | 85 | | end |
| | 86 | |
|
| | 87 | | function squeezerow_to_jsonarray(data::Array{T,2}, pointers, delim) where T |
| 1 | 88 | | arr_pointer = map(p -> begin |
| 4 | 89 | | U = Vector{eltype(p)}; Pointer{U}(p.tokens) |
| | 90 | | end, pointers) |
| | 91 | |
|
| 1 | 92 | | squzzed_json = OrderedDict{String, Any}() |
| 2 | 93 | | @inbounds for (col, p) in enumerate(pointers) |
| 202 | 94 | | val = map(i -> collect_cell(p, data[i + 1, :][col], delim), 1:size(data, 1) - 1) |
| 3 | 95 | | squzzed_json[arr_pointer[col]] = val |
| | 96 | | end |
| 1 | 97 | | return [squzzed_json] |
| | 98 | | end |
| | 99 | |
|
| | 100 | | @inline function dropemptyrange(arr::Array{T,2}) where T |
| 50 | 101 | | cols = falses(size(arr, 2)) |
| 100 | 102 | | @inbounds for c in 1:size(arr, 2) |
| | 103 | | # There must be a column name, or it's a commet line |
| 464 | 104 | | if !ismissing(arr[1, c]) |
| 460 | 105 | | for r in 1:size(arr, 1) |
| 457 | 106 | | if !ismissing(arr[r, c]) |
| 230 | 107 | | cols[c] = true |
| 417 | 108 | | break |
| | 109 | | end |
| | 110 | | end |
| | 111 | | end |
| | 112 | | end |
| | 113 | |
|
| 50 | 114 | | arr = arr[:, cols] |
| 50 | 115 | | rows = falses(size(arr, 1)) |
| 100 | 116 | | @inbounds for r in 1:size(arr, 1) |
| 588 | 117 | | for c in 1:size(arr, 2) |
| 587 | 118 | | if !ismissing(arr[r, c]) |
| 287 | 119 | | rows[r] = true |
| 550 | 120 | | break |
| | 121 | | end |
| | 122 | | end |
| | 123 | | end |
| 50 | 124 | | return arr[rows, :] |
| | 125 | | end |
| | 126 | |
|
| | 127 | | function collect_cell(p::Pointer{T}, cell, delim) where T |
| 837 | 128 | | if ismissing(cell) |
| 89 | 129 | | val = JSONPointer._null_value(p) |
| | 130 | | else |
| 748 | 131 | | if T <: AbstractArray |
| 167 | 132 | | if isa(cell, AbstractString) |
| 153 | 133 | | val = split(cell, delim) |
| 153 | 134 | | isempty(val[end]) && pop!(val) |
| 153 | 135 | | if eltype(T) <: Real |
| 260 | 136 | | val = parse.(eltype(T), val) |
| 23 | 137 | | elseif eltype(T) <: AbstractString |
| 159 | 138 | | val = string.(val) |
| | 139 | | end |
| | 140 | | else |
| 14 | 141 | | val = cell |
| 14 | 142 | | if eltype(T) <: Real |
| 6 | 143 | | if isa(cell, AbstractString) |
| 6 | 144 | | val = parse(eltype(T), cell) |
| | 145 | | end |
| 8 | 146 | | elseif eltype(T) <: AbstractString |
| 3 | 147 | | if !isa(cell, AbstractString) |
| 3 | 148 | | val = string(cell) |
| | 149 | | end |
| | 150 | | end |
| 175 | 151 | | val = convert(T, [val]) |
| | 152 | | end |
| | 153 | | else |
| 581 | 154 | | val = cell |
| | 155 | | end |
| | 156 | | end |
| 837 | 157 | | return val |
| | 158 | | end |
| | 159 | |
|
| 34 | 160 | | data(jws::JSONWorksheet) = getfield(jws, :data) |
| 1 | 161 | | xlsxpath(jws::JSONWorksheet) = getfield(jws, :xlsxpath) |
| 42 | 162 | | sheetnames(jws::JSONWorksheet) = getfield(jws, :sheetname) |
| 47 | 163 | | Base.keys(jws::JSONWorksheet) = jws.pointer |
| | 164 | | function Base.haskey(jws::JSONWorksheet, key::Pointer) |
| 25 | 165 | | t = key.tokens |
| 25 | 166 | | for el in getfield.(keys(jws), :tokens) |
| 67 | 167 | | if el == key.tokens |
| 12 | 168 | | return true |
| 55 | 169 | | elseif length(el) > length(t) |
| 21 | 170 | | if el[1:length(t)] == t |
| 13 | 171 | | return true |
| | 172 | | end |
| | 173 | | end |
| | 174 | | end |
| 10 | 175 | | return false |
| | 176 | | end |
| | 177 | |
|
| 5 | 178 | | Base.iterate(jws::JSONWorksheet) = iterate(data(jws)) |
| 9 | 179 | | Base.iterate(jws::JSONWorksheet, i) = iterate(data(jws), i) |
| | 180 | |
|
| 1 | 181 | | Base.size(jws::JSONWorksheet) = (length(jws.data), length(jws.pointer)) |
| 0 | 182 | | function Base.size(jws::JSONWorksheet, d) |
| 0 | 183 | | d == 1 ? length(jws.data) : |
| | 184 | | d == 2 ? length(jws.pointer) : throw(DimensionMismatch("only 2 dimensions of `JSONWorksheets` object are measurable" |
| | 185 | | end |
| 10 | 186 | | Base.length(jws::JSONWorksheet) = length(data(jws)) |
| | 187 | |
|
| | 188 | | ############################################################################## |
| | 189 | | ## |
| | 190 | | ## getindex() definitions |
| | 191 | | ## |
| | 192 | | ############################################################################## |
| 111 | 193 | | Base.getindex(jws::JSONWorksheet, i) = getindex(jws.data, i) |
| 1 | 194 | | Base.getindex(jws::JSONWorksheet, ::Colon, ::Colon) = getindex(jws, eachindex(jws.data), eachindex(jws.pointer)) |
| 0 | 195 | | Base.getindex(jws::JSONWorksheet, row_ind, ::Colon) = getindex(jws, row_ind, eachindex(jws.pointer)) |
| | 196 | |
|
| 1 | 197 | | Base.firstindex(jws::JSONWorksheet) = firstindex(jws.data) |
| 1 | 198 | | Base.lastindex(jws::JSONWorksheet) = lastindex(jws.data) |
| | 199 | | function Base.lastindex(jws::JSONWorksheet, i::Integer) |
| 6 | 200 | | i == 1 ? lastindex(jws.data) : |
| | 201 | | i == 2 ? lastindex(jws.pointer) : |
| | 202 | | throw(DimensionMismatch("JSONWorksheet only has two dimensions")) |
| | 203 | | end |
| | 204 | |
|
| | 205 | | function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::Integer) |
| 8 | 206 | | p = keys(jws)[col_ind] |
| | 207 | |
|
| 7 | 208 | | jws[row_ind, p] |
| | 209 | | end |
| | 210 | | function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::AbstractArray) |
| 4 | 211 | | pointers = keys(jws)[col_ind] |
| | 212 | |
|
| 17 | 213 | | permutedims(map(p -> jws[row_ind, p], pointers)) |
| | 214 | | end |
| | 215 | | @inline function Base.getindex(jws::JSONWorksheet, row_inds::AbstractArray, col_ind::AbstractArray) |
| 7 | 216 | | pointers = keys(jws)[col_ind] |
| 4 | 217 | | rows = jws[row_inds] |
| | 218 | |
|
| | 219 | | # v = vcat(map(el -> jws[el, col_ind], row_inds)...) |
| 4 | 220 | | v = Array{Any,2}(undef, length(rows), length(pointers)) |
| 4 | 221 | | @inbounds for (r, _row) in enumerate(rows) |
| 16 | 222 | | for (c, _col) in enumerate(pointers) |
| 28 | 223 | | v[r, c] = if haskey(_row, _col) |
| 28 | 224 | | _row[_col] |
| | 225 | | else |
| 48 | 226 | | missing |
| | 227 | | end |
| | 228 | | end |
| | 229 | | end |
| | 230 | |
|
| 4 | 231 | | return v |
| | 232 | | end |
| | 233 | |
|
| | 234 | | function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::Pointer) |
| 24 | 235 | | row = jws[row_ind] |
| | 236 | |
|
| 23 | 237 | | return row[col_ind] |
| | 238 | | end |
| | 239 | | @inline function Base.getindex(jws::JSONWorksheet, row_inds, p::Pointer) |
| 24 | 240 | | map(row -> row[p], jws[row_inds]) |
| | 241 | | end |
| | 242 | | @inline function Base.getindex(jws::JSONWorksheet, row_inds, col_ind::Integer) |
| 2 | 243 | | p = keys(jws)[col_ind] |
| | 244 | |
|
| 2 | 245 | | getindex(jws, row_inds, p) |
| | 246 | | end |
| | 247 | |
|
| | 248 | | function Base.setindex!(jws::JSONWorksheet, value::Vector, p::Pointer) |
| 4 | 249 | | if length(jws) != length(value) |
| 1 | 250 | | throw(ArgumentError("New column must have the same length as old columns")) |
| | 251 | | end |
| 6 | 252 | | @inbounds for (i, row) in enumerate(jws) |
| 7 | 253 | | row[p] = value[i] |
| | 254 | | end |
| 2 | 255 | | if !haskey(jws, p) |
| 1 | 256 | | push!(jws.pointer, p) |
| | 257 | | end |
| 2 | 258 | | return jws |
| | 259 | | end |
| | 260 | | function Base.setindex!(jws::JSONWorksheet, value, i::Integer, p::Pointer) |
| 2 | 261 | | jws[i][p] = value |
| | 262 | | end |
| | 263 | |
|
| | 264 | | """ |
| | 265 | | merge(a::JSONWorksheet, b::JSONWorksheet, bykey::AbstractString) |
| | 266 | |
|
| | 267 | | Construct a merged JSONWorksheet from the given JSONWorksheets. |
| | 268 | | If the same Pointer is present in another collection, the value for that key will be the |
| | 269 | | value it has in the last collection listed. |
| | 270 | | """ |
| | 271 | | function Base.merge(a::JSONWorksheet, b::JSONWorksheet, key::AbstractString) |
| 3 | 272 | | merge(a::JSONWorksheet, b::JSONWorksheet, Pointer(key)) |
| | 273 | | end |
| | 274 | | function Base.merge(a::JSONWorksheet, b::JSONWorksheet, key::Pointer) |
| 4 | 275 | | @assert haskey(a, key) "$key is not found in the JSONWorksheet(\"$(a.sheetname)\")" |
| 2 | 276 | | @assert haskey(b, key) "$key is not found in the JSONWorksheet(\"$(b.sheetname)\")" |
| | 277 | |
|
| 2 | 278 | | pointers = unique([a.pointer; b.pointer]) |
| | 279 | |
|
| 10 | 280 | | keyvalues_a = map(el -> el[key], a.data) |
| 10 | 281 | | keyvalues_b = map(el -> el[key], b.data) |
| 2 | 282 | | ind = indexin(keyvalues_b, keyvalues_a) |
| | 283 | |
|
| 2 | 284 | | data = deepcopy(a.data) |
| 2 | 285 | | for (i, _b) in enumerate(b.data) |
| 8 | 286 | | j = ind[i] |
| 14 | 287 | | if isnothing(j) |
| 2 | 288 | | _a = deepcopy(_b) |
| 2 | 289 | | for p in a.pointer |
| 10 | 290 | | _a[p] = JSONPointer._null_value(p) |
| | 291 | | end |
| 2 | 292 | | push!(data, _a) |
| | 293 | | else |
| 6 | 294 | | _a = data[j] |
| | 295 | | end |
| 8 | 296 | | for p in b.pointer |
| 32 | 297 | | _a[p] = _b[p] |
| | 298 | | end |
| | 299 | | end |
| 2 | 300 | | JSONWorksheet(a.xlsxpath, pointers, data, a.sheetname) |
| | 301 | | end |
| | 302 | | function Base.append!(a::JSONWorksheet, b::JSONWorksheet) |
| 6 | 303 | | ak = map(el -> el.tokens, keys(a)) |
| 6 | 304 | | bk = map(el -> el.tokens, keys(b)) |
| | 305 | |
|
| 2 | 306 | | if sort(ak) != sort(bk) |
| 1 | 307 | | throw(AssertionError("""Column names must be same for append! |
| | 308 | | $(setdiff(collect(ak), collect(bk)))""")) |
| | 309 | | end |
| | 310 | |
|
| 1 | 311 | | append!(a.data, b.data) |
| | 312 | | end |
| | 313 | |
|
| 0 | 314 | | function Base.sort!(jws::JSONWorksheet, key; kwargs...) |
| 0 | 315 | | sort!(jws, Pointer(key); kwargs...) |
| | 316 | | end |
| | 317 | | function Base.sort!(jws::JSONWorksheet, pointer::Pointer; kwargs...) |
| 9 | 318 | | sorted_idx = sortperm(map(el -> el[pointer], data(jws)); kwargs...) |
| 1 | 319 | | jws.data = data(jws)[sorted_idx] |
| 1 | 320 | | return jws |
| | 321 | | end |
| | 322 | |
|
| 0 | 323 | | function Base.summary(io::IO, jws::JSONWorksheet) |
| 0 | 324 | | @printf("%d×%d %s - %s!%s\n", size(jws)..., "JSONWorksheet", |
| | 325 | | basename(xlsxpath(jws)), sheetnames(jws)) |
| | 326 | | end |
| 0 | 327 | | function Base.show(io::IO, jws::JSONWorksheet) |
| 0 | 328 | | summary(io, jws) |
| | 329 | | # TODO truncate based on screen size |
| 0 | 330 | | x = data(jws) |
| 0 | 331 | | print(io, "row 1 => ") |
| 0 | 332 | | print(io, JSON.json(x[1], 1)) |
| 0 | 333 | | if length(x) > 1 |
| 0 | 334 | | print("...") |
| 0 | 335 | | print(io, "row $(length(x)) => ") |
| 0 | 336 | | print(io, JSON.json(x[end])) |
| | 337 | | end |
| | 338 | | end |