|  |  | 1 |  | """ | 
|  |  | 2 |  |     JSONWorksheet | 
|  |  | 3 |  |  | 
|  |  | 4 |  | construct 'Array{OrderedDict, 1}' for each row from Worksheet | 
|  |  | 5 |  |  | 
|  |  | 6 |  | # Constructors | 
|  |  | 7 |  | ```julia | 
|  |  | 8 |  | JSONWorksheet("Example.xlsx", "Sheet1") | 
|  |  | 9 |  | JSONWorksheet("Example.xlsx", 1) | 
|  |  | 10 |  |  | 
|  |  | 11 |  | ``` | 
|  |  | 12 |  | # Arguments | 
|  |  | 13 |  | - `row_oriented` : if 'true'(the default) it will look for colum names in '1:1', if `false` it will look for colum names | 
|  |  | 14 |  | - `start_line` : starting index of position of columnname. | 
|  |  | 15 |  | - `squeeze` : squeezes all rows of Worksheet to a singe row. | 
|  |  | 16 |  | - `delim` : a String or Regrex that of deliminator for converting single cell to array. | 
|  |  | 17 |  |  | 
|  |  | 18 |  | """ | 
|  |  | 19 |  | mutable struct JSONWorksheet | 
|  | 45 | 20 |  |     xlsxpath::String | 
|  |  | 21 |  |     pointer::Array{Pointer,1} | 
|  |  | 22 |  |     data::Array{T,1} where T | 
|  |  | 23 |  |     sheetname::String | 
|  |  | 24 |  | end | 
|  |  | 25 |  | function JSONWorksheet(xlsxpath, sheet, arr; | 
|  |  | 26 |  |                         delim=";", squeeze=false) | 
|  | 100 | 27 |  |     arr = dropemptyrange(arr) | 
|  | 52 | 28 |  |     @assert !isempty(arr) "'$(xlsxpath)!$(sheet)' don't have valid column names, try change optional argument'start_line | 
|  |  | 29 |  |  | 
|  | 96 | 30 |  |     pointer = _column_to_pointer.(arr[1, :]) | 
|  | 48 | 31 |  |     real_keys = map(el -> el.tokens, pointer) | 
|  |  | 32 |  |     # TODO more robust key validity check | 
|  | 48 | 33 |  |     if !allunique(real_keys) | 
|  | 3 | 34 |  |         throw(AssertionError("column names must be unique, check for duplication $(arr[1, :])")) | 
|  |  | 35 |  |     end | 
|  |  | 36 |  |  | 
|  | 45 | 37 |  |     if squeeze | 
|  | 1 | 38 |  |         data = squeezerow_to_jsonarray(arr, pointer, delim) | 
|  |  | 39 |  |     else | 
|  | 44 | 40 |  |         data = eachrow_to_jsonarray(arr, pointer, delim) | 
|  |  | 41 |  |     end | 
|  | 43 | 42 |  |     JSONWorksheet(normpath(xlsxpath), pointer, data, String(sheet)) | 
|  |  | 43 |  | end | 
|  |  | 44 |  | function JSONWorksheet(xf::XLSX.XLSXFile, sheet; | 
|  |  | 45 |  |                        start_line=1, | 
|  |  | 46 |  |                        row_oriented=true, | 
|  |  | 47 |  |                        delim=";", squeeze=false) | 
|  | 86 | 48 |  |     ws = isa(sheet, Symbol) ? xf[String(sheet)] : xf[sheet] | 
|  | 43 | 49 |  |     sheet = ws.name | 
|  |  | 50 |  |     # orientation handling | 
|  | 0 | 51 |  |     ws = begin | 
|  | 86 | 52 |  |         rg = XLSX.get_dimension(ws) | 
|  | 43 | 53 |  |         if row_oriented | 
|  | 42 | 54 |  |             rg = XLSX.CellRange(XLSX.CellRef(start_line, rg.start.column_number), rg.stop) | 
|  | 42 | 55 |  |             dt = ws[rg] | 
|  |  | 56 |  |         else | 
|  | 1 | 57 |  |             rg = XLSX.CellRange(XLSX.CellRef(rg.start.row_number, start_line), rg.stop) | 
|  | 44 | 58 |  |             dt = permutedims(ws[rg]) | 
|  |  | 59 |  |         end | 
|  |  | 60 |  |     end | 
|  |  | 61 |  |  | 
|  | 43 | 62 |  |     JSONWorksheet(xf.filepath, sheet, ws; delim=delim, squeeze=squeeze) | 
|  |  | 63 |  | end | 
|  |  | 64 |  | function JSONWorksheet(xlsxpath, sheet; kwargs...) | 
|  | 32 | 65 |  |     xf = XLSX.readxlsx(xlsxpath) | 
|  | 16 | 66 |  |     x = JSONWorksheet(xf, sheet; kwargs...) | 
|  | 9 | 67 |  |     close(xf) | 
|  | 9 | 68 |  |     return x | 
|  |  | 69 |  | end | 
|  |  | 70 |  |  | 
|  |  | 71 |  | function eachrow_to_jsonarray(data::Array{T,2}, pointers, delim) where T | 
|  | 44 | 72 |  |     json = Array{OrderedDict,1}(undef, size(data, 1) - 1) | 
|  | 88 | 73 |  |     Threads.@threads for i in 1:length(json) | 
|  | 134 | 74 |  |         json[i] = row_to_jsonarray(data[i + 1, :], pointers, delim) | 
|  |  | 75 |  |     end | 
|  | 42 | 76 |  |     return json | 
|  |  | 77 |  | end | 
|  |  | 78 |  |  | 
|  |  | 79 |  | function row_to_jsonarray(row, pointers, delim) | 
|  | 134 | 80 |  |     x = OrderedDict{String,Any}() | 
|  | 268 | 81 |  |     for (col, p) in enumerate(pointers) | 
|  | 1140 | 82 |  |         x[p] = collect_cell(p, row[col], delim) | 
|  |  | 83 |  |     end | 
|  | 132 | 84 |  |     return x | 
|  |  | 85 |  | end | 
|  |  | 86 |  |  | 
|  |  | 87 |  | function squeezerow_to_jsonarray(data::Array{T,2}, pointers, delim) where T | 
|  | 1 | 88 |  |     arr_pointer = map(p -> begin | 
|  | 4 | 89 |  |         U = Vector{eltype(p)}; Pointer{U}(p.tokens) | 
|  |  | 90 |  |     end, pointers) | 
|  |  | 91 |  |  | 
|  | 1 | 92 |  |     squzzed_json = OrderedDict{String, Any}() | 
|  | 2 | 93 |  |     @inbounds for (col, p) in enumerate(pointers) | 
|  | 202 | 94 |  |         val = map(i -> collect_cell(p, data[i + 1, :][col], delim), 1:size(data, 1) - 1) | 
|  | 3 | 95 |  |         squzzed_json[arr_pointer[col]] = val | 
|  |  | 96 |  |     end | 
|  | 1 | 97 |  |     return [squzzed_json] | 
|  |  | 98 |  | end | 
|  |  | 99 |  |  | 
|  |  | 100 |  | @inline function dropemptyrange(arr::Array{T,2}) where T | 
|  | 50 | 101 |  |     cols = falses(size(arr, 2)) | 
|  | 100 | 102 |  |     @inbounds for c in 1:size(arr, 2) | 
|  |  | 103 |  |         # There must be a column name, or it's a commet line | 
|  | 464 | 104 |  |         if !ismissing(arr[1, c]) | 
|  | 460 | 105 |  |             for r in 1:size(arr, 1) | 
|  | 457 | 106 |  |                 if !ismissing(arr[r, c]) | 
|  | 230 | 107 |  |                     cols[c] = true | 
|  | 417 | 108 |  |                     break | 
|  |  | 109 |  |                 end | 
|  |  | 110 |  |             end | 
|  |  | 111 |  |         end | 
|  |  | 112 |  |     end | 
|  |  | 113 |  |  | 
|  | 50 | 114 |  |     arr = arr[:, cols] | 
|  | 50 | 115 |  |     rows = falses(size(arr, 1)) | 
|  | 100 | 116 |  |     @inbounds for r in 1:size(arr, 1) | 
|  | 588 | 117 |  |         for c in 1:size(arr, 2) | 
|  | 587 | 118 |  |             if !ismissing(arr[r, c]) | 
|  | 287 | 119 |  |                 rows[r] = true | 
|  | 550 | 120 |  |                 break | 
|  |  | 121 |  |             end | 
|  |  | 122 |  |         end | 
|  |  | 123 |  |     end | 
|  | 50 | 124 |  |     return arr[rows, :] | 
|  |  | 125 |  | end | 
|  |  | 126 |  |  | 
|  |  | 127 |  | function collect_cell(p::Pointer{T}, cell, delim) where T | 
|  | 837 | 128 |  |     if ismissing(cell) | 
|  | 89 | 129 |  |         val = JSONPointer._null_value(p) | 
|  |  | 130 |  |     else | 
|  | 748 | 131 |  |         if T <: AbstractArray | 
|  | 167 | 132 |  |             if isa(cell, AbstractString) | 
|  | 153 | 133 |  |                 val = split(cell, delim) | 
|  | 153 | 134 |  |                 isempty(val[end]) && pop!(val) | 
|  | 153 | 135 |  |                 if eltype(T) <: Real | 
|  | 260 | 136 |  |                     val = parse.(eltype(T), val) | 
|  | 23 | 137 |  |                 elseif eltype(T) <: AbstractString | 
|  | 159 | 138 |  |                     val = string.(val) | 
|  |  | 139 |  |                 end | 
|  |  | 140 |  |             else | 
|  | 14 | 141 |  |                 val = cell | 
|  | 14 | 142 |  |                 if eltype(T) <: Real | 
|  | 6 | 143 |  |                     if isa(cell, AbstractString) | 
|  | 6 | 144 |  |                         val = parse(eltype(T), cell) | 
|  |  | 145 |  |                     end | 
|  | 8 | 146 |  |                 elseif eltype(T) <: AbstractString | 
|  | 3 | 147 |  |                     if !isa(cell, AbstractString) | 
|  | 3 | 148 |  |                         val = string(cell) | 
|  |  | 149 |  |                     end | 
|  |  | 150 |  |                 end | 
|  | 175 | 151 |  |                 val = convert(T, [val]) | 
|  |  | 152 |  |             end | 
|  |  | 153 |  |         else | 
|  | 581 | 154 |  |             val = cell | 
|  |  | 155 |  |         end | 
|  |  | 156 |  |     end | 
|  | 837 | 157 |  |     return val | 
|  |  | 158 |  | end | 
|  |  | 159 |  |  | 
|  | 34 | 160 |  | data(jws::JSONWorksheet) = getfield(jws, :data) | 
|  | 1 | 161 |  | xlsxpath(jws::JSONWorksheet) = getfield(jws, :xlsxpath) | 
|  | 42 | 162 |  | sheetnames(jws::JSONWorksheet) = getfield(jws, :sheetname) | 
|  | 47 | 163 |  | Base.keys(jws::JSONWorksheet) = jws.pointer | 
|  |  | 164 |  | function Base.haskey(jws::JSONWorksheet, key::Pointer) | 
|  | 25 | 165 |  |     t = key.tokens | 
|  | 25 | 166 |  |     for el in getfield.(keys(jws), :tokens) | 
|  | 67 | 167 |  |         if el == key.tokens | 
|  | 12 | 168 |  |             return true | 
|  | 55 | 169 |  |         elseif length(el) > length(t) | 
|  | 21 | 170 |  |             if el[1:length(t)] == t | 
|  | 13 | 171 |  |                 return true | 
|  |  | 172 |  |             end | 
|  |  | 173 |  |         end | 
|  |  | 174 |  |     end | 
|  | 10 | 175 |  |     return false | 
|  |  | 176 |  | end | 
|  |  | 177 |  |  | 
|  | 5 | 178 |  | Base.iterate(jws::JSONWorksheet) = iterate(data(jws)) | 
|  | 9 | 179 |  | Base.iterate(jws::JSONWorksheet, i) = iterate(data(jws), i) | 
|  |  | 180 |  |  | 
|  | 1 | 181 |  | Base.size(jws::JSONWorksheet) = (length(jws.data), length(jws.pointer)) | 
|  | 0 | 182 |  | function Base.size(jws::JSONWorksheet, d) | 
|  | 0 | 183 |  |     d == 1 ? length(jws.data) : | 
|  |  | 184 |  |     d == 2 ? length(jws.pointer) : throw(DimensionMismatch("only 2 dimensions of `JSONWorksheets` object are measurable" | 
|  |  | 185 |  | end | 
|  | 10 | 186 |  | Base.length(jws::JSONWorksheet) = length(data(jws)) | 
|  |  | 187 |  |  | 
|  |  | 188 |  | ############################################################################## | 
|  |  | 189 |  | ## | 
|  |  | 190 |  | ## getindex() definitions | 
|  |  | 191 |  | ## | 
|  |  | 192 |  | ############################################################################## | 
|  | 111 | 193 |  | Base.getindex(jws::JSONWorksheet, i) = getindex(jws.data, i) | 
|  | 1 | 194 |  | Base.getindex(jws::JSONWorksheet, ::Colon, ::Colon) = getindex(jws, eachindex(jws.data), eachindex(jws.pointer)) | 
|  | 0 | 195 |  | Base.getindex(jws::JSONWorksheet, row_ind, ::Colon) = getindex(jws, row_ind, eachindex(jws.pointer)) | 
|  |  | 196 |  |  | 
|  | 1 | 197 |  | Base.firstindex(jws::JSONWorksheet) = firstindex(jws.data) | 
|  | 1 | 198 |  | Base.lastindex(jws::JSONWorksheet) = lastindex(jws.data) | 
|  |  | 199 |  | function Base.lastindex(jws::JSONWorksheet, i::Integer) | 
|  | 6 | 200 |  |     i == 1 ? lastindex(jws.data) : | 
|  |  | 201 |  |     i == 2 ? lastindex(jws.pointer) : | 
|  |  | 202 |  |     throw(DimensionMismatch("JSONWorksheet only has two dimensions")) | 
|  |  | 203 |  | end | 
|  |  | 204 |  |  | 
|  |  | 205 |  | function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::Integer) | 
|  | 8 | 206 |  |     p = keys(jws)[col_ind] | 
|  |  | 207 |  |  | 
|  | 7 | 208 |  |     jws[row_ind, p] | 
|  |  | 209 |  | end | 
|  |  | 210 |  | function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::AbstractArray) | 
|  | 4 | 211 |  |     pointers = keys(jws)[col_ind] | 
|  |  | 212 |  |  | 
|  | 17 | 213 |  |     permutedims(map(p -> jws[row_ind, p], pointers)) | 
|  |  | 214 |  | end | 
|  |  | 215 |  | @inline function Base.getindex(jws::JSONWorksheet, row_inds::AbstractArray, col_ind::AbstractArray) | 
|  | 7 | 216 |  |     pointers = keys(jws)[col_ind] | 
|  | 4 | 217 |  |     rows = jws[row_inds] | 
|  |  | 218 |  |  | 
|  |  | 219 |  |     # v = vcat(map(el -> jws[el, col_ind], row_inds)...) | 
|  | 4 | 220 |  |     v = Array{Any,2}(undef, length(rows), length(pointers)) | 
|  | 4 | 221 |  |     @inbounds for (r, _row) in enumerate(rows) | 
|  | 16 | 222 |  |         for (c, _col) in enumerate(pointers) | 
|  | 28 | 223 |  |             v[r, c] = if haskey(_row, _col) | 
|  | 28 | 224 |  |                 _row[_col] | 
|  |  | 225 |  |             else | 
|  | 48 | 226 |  |                 missing | 
|  |  | 227 |  |             end | 
|  |  | 228 |  |         end | 
|  |  | 229 |  |     end | 
|  |  | 230 |  |  | 
|  | 4 | 231 |  |     return v | 
|  |  | 232 |  | end | 
|  |  | 233 |  |  | 
|  |  | 234 |  | function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::Pointer) | 
|  | 24 | 235 |  |     row = jws[row_ind] | 
|  |  | 236 |  |  | 
|  | 23 | 237 |  |     return row[col_ind] | 
|  |  | 238 |  | end | 
|  |  | 239 |  | @inline function Base.getindex(jws::JSONWorksheet, row_inds, p::Pointer) | 
|  | 24 | 240 |  |     map(row -> row[p], jws[row_inds]) | 
|  |  | 241 |  | end | 
|  |  | 242 |  | @inline function Base.getindex(jws::JSONWorksheet, row_inds, col_ind::Integer) | 
|  | 2 | 243 |  |     p = keys(jws)[col_ind] | 
|  |  | 244 |  |  | 
|  | 2 | 245 |  |     getindex(jws, row_inds, p) | 
|  |  | 246 |  | end | 
|  |  | 247 |  |  | 
|  |  | 248 |  | function Base.setindex!(jws::JSONWorksheet, value::Vector, p::Pointer) | 
|  | 4 | 249 |  |     if length(jws) != length(value) | 
|  | 1 | 250 |  |         throw(ArgumentError("New column must have the same length as old columns")) | 
|  |  | 251 |  |     end | 
|  | 6 | 252 |  |     @inbounds for (i, row) in enumerate(jws) | 
|  | 7 | 253 |  |         row[p] = value[i] | 
|  |  | 254 |  |     end | 
|  | 2 | 255 |  |     if !haskey(jws, p) | 
|  | 1 | 256 |  |         push!(jws.pointer, p) | 
|  |  | 257 |  |     end | 
|  | 2 | 258 |  |     return jws | 
|  |  | 259 |  | end | 
|  |  | 260 |  | function Base.setindex!(jws::JSONWorksheet, value, i::Integer, p::Pointer) | 
|  | 2 | 261 |  |     jws[i][p] = value | 
|  |  | 262 |  | end | 
|  |  | 263 |  |  | 
|  |  | 264 |  | """ | 
|  |  | 265 |  |     merge(a::JSONWorksheet, b::JSONWorksheet, bykey::AbstractString) | 
|  |  | 266 |  |  | 
|  |  | 267 |  | Construct a merged JSONWorksheet from the given JSONWorksheets. | 
|  |  | 268 |  | If the same Pointer is present in another collection, the value for that key will be the | 
|  |  | 269 |  | value it has in the last collection listed. | 
|  |  | 270 |  | """ | 
|  |  | 271 |  | function Base.merge(a::JSONWorksheet, b::JSONWorksheet, key::AbstractString) | 
|  | 3 | 272 |  |     merge(a::JSONWorksheet, b::JSONWorksheet, Pointer(key)) | 
|  |  | 273 |  | end | 
|  |  | 274 |  | function Base.merge(a::JSONWorksheet, b::JSONWorksheet, key::Pointer) | 
|  | 4 | 275 |  |     @assert haskey(a, key) "$key is not found in the JSONWorksheet(\"$(a.sheetname)\")" | 
|  | 2 | 276 |  |     @assert haskey(b, key) "$key is not found in the JSONWorksheet(\"$(b.sheetname)\")" | 
|  |  | 277 |  |  | 
|  | 2 | 278 |  |     pointers = unique([a.pointer; b.pointer]) | 
|  |  | 279 |  |  | 
|  | 10 | 280 |  |     keyvalues_a = map(el -> el[key], a.data) | 
|  | 10 | 281 |  |     keyvalues_b = map(el -> el[key], b.data) | 
|  | 2 | 282 |  |     ind = indexin(keyvalues_b, keyvalues_a) | 
|  |  | 283 |  |  | 
|  | 2 | 284 |  |     data = deepcopy(a.data) | 
|  | 2 | 285 |  |     for (i, _b) in enumerate(b.data) | 
|  | 8 | 286 |  |         j = ind[i] | 
|  | 14 | 287 |  |         if isnothing(j) | 
|  | 2 | 288 |  |             _a = deepcopy(_b) | 
|  | 2 | 289 |  |             for p in a.pointer | 
|  | 10 | 290 |  |                 _a[p] = JSONPointer._null_value(p) | 
|  |  | 291 |  |             end | 
|  | 2 | 292 |  |             push!(data, _a) | 
|  |  | 293 |  |         else | 
|  | 6 | 294 |  |             _a = data[j] | 
|  |  | 295 |  |         end | 
|  | 8 | 296 |  |         for p in b.pointer | 
|  | 32 | 297 |  |             _a[p] = _b[p] | 
|  |  | 298 |  |         end | 
|  |  | 299 |  |     end | 
|  | 2 | 300 |  |     JSONWorksheet(a.xlsxpath, pointers, data, a.sheetname) | 
|  |  | 301 |  | end | 
|  |  | 302 |  | function Base.append!(a::JSONWorksheet, b::JSONWorksheet) | 
|  | 6 | 303 |  |     ak = map(el -> el.tokens, keys(a)) | 
|  | 6 | 304 |  |     bk = map(el -> el.tokens, keys(b)) | 
|  |  | 305 |  |  | 
|  | 2 | 306 |  |     if sort(ak) != sort(bk) | 
|  | 1 | 307 |  |         throw(AssertionError("""Column names must be same for append! | 
|  |  | 308 |  |          $(setdiff(collect(ak), collect(bk)))""")) | 
|  |  | 309 |  |     end | 
|  |  | 310 |  |  | 
|  | 1 | 311 |  |     append!(a.data, b.data) | 
|  |  | 312 |  | end | 
|  |  | 313 |  |  | 
|  | 0 | 314 |  | function Base.sort!(jws::JSONWorksheet, key; kwargs...) | 
|  | 0 | 315 |  |     sort!(jws, Pointer(key); kwargs...) | 
|  |  | 316 |  | end | 
|  |  | 317 |  | function Base.sort!(jws::JSONWorksheet, pointer::Pointer; kwargs...) | 
|  | 9 | 318 |  |     sorted_idx = sortperm(map(el -> el[pointer], data(jws)); kwargs...) | 
|  | 1 | 319 |  |     jws.data = data(jws)[sorted_idx] | 
|  | 1 | 320 |  |     return jws | 
|  |  | 321 |  | end | 
|  |  | 322 |  |  | 
|  | 0 | 323 |  | function Base.summary(io::IO, jws::JSONWorksheet) | 
|  | 0 | 324 |  |     @printf("%d×%d %s - %s!%s\n", size(jws)..., "JSONWorksheet", | 
|  |  | 325 |  |         basename(xlsxpath(jws)), sheetnames(jws)) | 
|  |  | 326 |  | end | 
|  | 0 | 327 |  | function Base.show(io::IO, jws::JSONWorksheet) | 
|  | 0 | 328 |  |     summary(io, jws) | 
|  |  | 329 |  |     # TODO truncate based on screen size | 
|  | 0 | 330 |  |     x = data(jws) | 
|  | 0 | 331 |  |     print(io, "row 1 => ") | 
|  | 0 | 332 |  |     print(io, JSON.json(x[1], 1)) | 
|  | 0 | 333 |  |     if length(x) > 1 | 
|  | 0 | 334 |  |         print("...") | 
|  | 0 | 335 |  |         print(io, "row $(length(x)) => ") | 
|  | 0 | 336 |  |         print(io, JSON.json(x[end])) | 
|  |  | 337 |  |     end | 
|  |  | 338 |  | end |