< Summary

Class:	src/worksheet.jl
Assembly:	Default
File(s):	src/worksheet.jl
Covered lines:	148
Uncovered lines:	17
Coverable lines:	165
Total lines:	338
Line coverage:	89.6% (148 of 165)
Covered branches:	0
Total branches:	0
Tag:	43_456648716

File(s)

src/worksheet.jl

#	Line	Line coverage
	`1`	`"""`
	`2`	`JSONWorksheet`
	`3`
	`4`	`construct 'Array{OrderedDict, 1}' for each row from Worksheet`
	`5`
	`6`	`# Constructors`
	`7`	```julia
	`8`	`JSONWorksheet("Example.xlsx", "Sheet1")`
	`9`	`JSONWorksheet("Example.xlsx", 1)`
	`10`
	`11`	```
	`12`	`# Arguments`
	`13`	- `row_oriented` : if 'true'(the default) it will look for colum names in '1:1', if `false` it will look for colum names
	`14`	- `start_line` : starting index of position of columnname.
	`15`	- `squeeze` : squeezes all rows of Worksheet to a singe row.
	`16`	- `delim` : a String or Regrex that of deliminator for converting single cell to array.
	`17`
	`18`	`"""`
	`19`	`mutable struct JSONWorksheet`
45	`20`	`xlsxpath::String`
	`21`	`pointer::Array{Pointer,1}`
	`22`	`data::Array{T,1} where T`
	`23`	`sheetname::String`
	`24`	`end`
	`25`	`function JSONWorksheet(xlsxpath, sheet, arr;`
	`26`	`delim=";", squeeze=false)`
100	`27`	`arr = dropemptyrange(arr)`
52	`28`	`@assert !isempty(arr) "'$(xlsxpath)!$(sheet)' don't have valid column names, try change optional argument'start_line`
	`29`
96	`30`	`pointer = _column_to_pointer.(arr[1, :])`
48	`31`	`real_keys = map(el -> el.tokens, pointer)`
	`32`	`# TODO more robust key validity check`
48	`33`	`if !allunique(real_keys)`
3	`34`	`throw(AssertionError("column names must be unique, check for duplication $(arr[1, :])"))`
	`35`	`end`
	`36`
45	`37`	`if squeeze`
1	`38`	`data = squeezerow_to_jsonarray(arr, pointer, delim)`
	`39`	`else`
44	`40`	`data = eachrow_to_jsonarray(arr, pointer, delim)`
	`41`	`end`
43	`42`	`JSONWorksheet(normpath(xlsxpath), pointer, data, String(sheet))`
	`43`	`end`
	`44`	`function JSONWorksheet(xf::XLSX.XLSXFile, sheet;`
	`45`	`start_line=1,`
	`46`	`row_oriented=true,`
	`47`	`delim=";", squeeze=false)`
86	`48`	`ws = isa(sheet, Symbol) ? xf[String(sheet)] : xf[sheet]`
43	`49`	`sheet = ws.name`
	`50`	`# orientation handling`
0	`51`	`ws = begin`
86	`52`	`rg = XLSX.get_dimension(ws)`
43	`53`	`if row_oriented`
42	`54`	`rg = XLSX.CellRange(XLSX.CellRef(start_line, rg.start.column_number), rg.stop)`
42	`55`	`dt = ws[rg]`
	`56`	`else`
1	`57`	`rg = XLSX.CellRange(XLSX.CellRef(rg.start.row_number, start_line), rg.stop)`
44	`58`	`dt = permutedims(ws[rg])`
	`59`	`end`
	`60`	`end`
	`61`
43	`62`	`JSONWorksheet(xf.filepath, sheet, ws; delim=delim, squeeze=squeeze)`
	`63`	`end`
	`64`	`function JSONWorksheet(xlsxpath, sheet; kwargs...)`
32	`65`	`xf = XLSX.readxlsx(xlsxpath)`
16	`66`	`x = JSONWorksheet(xf, sheet; kwargs...)`
9	`67`	`close(xf)`
9	`68`	`return x`
	`69`	`end`
	`70`
	`71`	`function eachrow_to_jsonarray(data::Array{T,2}, pointers, delim) where T`
44	`72`	`json = Array{OrderedDict,1}(undef, size(data, 1) - 1)`
88	`73`	`Threads.@threads for i in 1:length(json)`
134	`74`	`json[i] = row_to_jsonarray(data[i + 1, :], pointers, delim)`
	`75`	`end`
42	`76`	`return json`
	`77`	`end`
	`78`
	`79`	`function row_to_jsonarray(row, pointers, delim)`
134	`80`	`x = OrderedDict{String,Any}()`
268	`81`	`for (col, p) in enumerate(pointers)`
1140	`82`	`x[p] = collect_cell(p, row[col], delim)`
	`83`	`end`
132	`84`	`return x`
	`85`	`end`
	`86`
	`87`	`function squeezerow_to_jsonarray(data::Array{T,2}, pointers, delim) where T`
1	`88`	`arr_pointer = map(p -> begin`
4	`89`	`U = Vector{eltype(p)}; Pointer{U}(p.tokens)`
	`90`	`end, pointers)`
	`91`
1	`92`	`squzzed_json = OrderedDict{String, Any}()`
2	`93`	`@inbounds for (col, p) in enumerate(pointers)`
202	`94`	`val = map(i -> collect_cell(p, data[i + 1, :][col], delim), 1:size(data, 1) - 1)`
3	`95`	`squzzed_json[arr_pointer[col]] = val`
	`96`	`end`
1	`97`	`return [squzzed_json]`
	`98`	`end`
	`99`
	`100`	`@inline function dropemptyrange(arr::Array{T,2}) where T`
50	`101`	`cols = falses(size(arr, 2))`
100	`102`	`@inbounds for c in 1:size(arr, 2)`
	`103`	`# There must be a column name, or it's a commet line`
464	`104`	`if !ismissing(arr[1, c])`
460	`105`	`for r in 1:size(arr, 1)`
457	`106`	`if !ismissing(arr[r, c])`
230	`107`	`cols[c] = true`
417	`108`	`break`
	`109`	`end`
	`110`	`end`
	`111`	`end`
	`112`	`end`
	`113`
50	`114`	`arr = arr[:, cols]`
50	`115`	`rows = falses(size(arr, 1))`
100	`116`	`@inbounds for r in 1:size(arr, 1)`
588	`117`	`for c in 1:size(arr, 2)`
587	`118`	`if !ismissing(arr[r, c])`
287	`119`	`rows[r] = true`
550	`120`	`break`
	`121`	`end`
	`122`	`end`
	`123`	`end`
50	`124`	`return arr[rows, :]`
	`125`	`end`
	`126`
	`127`	`function collect_cell(p::Pointer{T}, cell, delim) where T`
837	`128`	`if ismissing(cell)`
89	`129`	`val = JSONPointer._null_value(p)`
	`130`	`else`
748	`131`	`if T <: AbstractArray`
167	`132`	`if isa(cell, AbstractString)`
153	`133`	`val = split(cell, delim)`
153	`134`	`isempty(val[end]) && pop!(val)`
153	`135`	`if eltype(T) <: Real`
260	`136`	`val = parse.(eltype(T), val)`
23	`137`	`elseif eltype(T) <: AbstractString`
159	`138`	`val = string.(val)`
	`139`	`end`
	`140`	`else`
14	`141`	`val = cell`
14	`142`	`if eltype(T) <: Real`
6	`143`	`if isa(cell, AbstractString)`
6	`144`	`val = parse(eltype(T), cell)`
	`145`	`end`
8	`146`	`elseif eltype(T) <: AbstractString`
3	`147`	`if !isa(cell, AbstractString)`
3	`148`	`val = string(cell)`
	`149`	`end`
	`150`	`end`
175	`151`	`val = convert(T, [val])`
	`152`	`end`
	`153`	`else`
581	`154`	`val = cell`
	`155`	`end`
	`156`	`end`
837	`157`	`return val`
	`158`	`end`
	`159`
34	`160`	`data(jws::JSONWorksheet) = getfield(jws, :data)`
1	`161`	`xlsxpath(jws::JSONWorksheet) = getfield(jws, :xlsxpath)`
42	`162`	`sheetnames(jws::JSONWorksheet) = getfield(jws, :sheetname)`
47	`163`	`Base.keys(jws::JSONWorksheet) = jws.pointer`
	`164`	`function Base.haskey(jws::JSONWorksheet, key::Pointer)`
25	`165`	`t = key.tokens`
25	`166`	`for el in getfield.(keys(jws), :tokens)`
67	`167`	`if el == key.tokens`
12	`168`	`return true`
55	`169`	`elseif length(el) > length(t)`
21	`170`	`if el[1:length(t)] == t`
13	`171`	`return true`
	`172`	`end`
	`173`	`end`
	`174`	`end`
10	`175`	`return false`
	`176`	`end`
	`177`
5	`178`	`Base.iterate(jws::JSONWorksheet) = iterate(data(jws))`
9	`179`	`Base.iterate(jws::JSONWorksheet, i) = iterate(data(jws), i)`
	`180`
1	`181`	`Base.size(jws::JSONWorksheet) = (length(jws.data), length(jws.pointer))`
0	`182`	`function Base.size(jws::JSONWorksheet, d)`
0	`183`	`d == 1 ? length(jws.data) :`
	`184`	d == 2 ? length(jws.pointer) : throw(DimensionMismatch("only 2 dimensions of `JSONWorksheets` object are measurable"
	`185`	`end`
10	`186`	`Base.length(jws::JSONWorksheet) = length(data(jws))`
	`187`
	`188`	`##############################################################################`
	`189`	`##`
	`190`	`## getindex() definitions`
	`191`	`##`
	`192`	`##############################################################################`
111	`193`	`Base.getindex(jws::JSONWorksheet, i) = getindex(jws.data, i)`
1	`194`	`Base.getindex(jws::JSONWorksheet, ::Colon, ::Colon) = getindex(jws, eachindex(jws.data), eachindex(jws.pointer))`
0	`195`	`Base.getindex(jws::JSONWorksheet, row_ind, ::Colon) = getindex(jws, row_ind, eachindex(jws.pointer))`
	`196`
1	`197`	`Base.firstindex(jws::JSONWorksheet) = firstindex(jws.data)`
1	`198`	`Base.lastindex(jws::JSONWorksheet) = lastindex(jws.data)`
	`199`	`function Base.lastindex(jws::JSONWorksheet, i::Integer)`
6	`200`	`i == 1 ? lastindex(jws.data) :`
	`201`	`i == 2 ? lastindex(jws.pointer) :`
	`202`	`throw(DimensionMismatch("JSONWorksheet only has two dimensions"))`
	`203`	`end`
	`204`
	`205`	`function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::Integer)`
8	`206`	`p = keys(jws)[col_ind]`
	`207`
7	`208`	`jws[row_ind, p]`
	`209`	`end`
	`210`	`function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::AbstractArray)`
4	`211`	`pointers = keys(jws)[col_ind]`
	`212`
17	`213`	`permutedims(map(p -> jws[row_ind, p], pointers))`
	`214`	`end`
	`215`	`@inline function Base.getindex(jws::JSONWorksheet, row_inds::AbstractArray, col_ind::AbstractArray)`
7	`216`	`pointers = keys(jws)[col_ind]`
4	`217`	`rows = jws[row_inds]`
	`218`
	`219`	`# v = vcat(map(el -> jws[el, col_ind], row_inds)...)`
4	`220`	`v = Array{Any,2}(undef, length(rows), length(pointers))`
4	`221`	`@inbounds for (r, _row) in enumerate(rows)`
16	`222`	`for (c, _col) in enumerate(pointers)`
28	`223`	`v[r, c] = if haskey(_row, _col)`
28	`224`	`_row[_col]`
	`225`	`else`
48	`226`	`missing`
	`227`	`end`
	`228`	`end`
	`229`	`end`
	`230`
4	`231`	`return v`
	`232`	`end`
	`233`
	`234`	`function Base.getindex(jws::JSONWorksheet, row_ind::Integer, col_ind::Pointer)`
24	`235`	`row = jws[row_ind]`
	`236`
23	`237`	`return row[col_ind]`
	`238`	`end`
	`239`	`@inline function Base.getindex(jws::JSONWorksheet, row_inds, p::Pointer)`
24	`240`	`map(row -> row[p], jws[row_inds])`
	`241`	`end`
	`242`	`@inline function Base.getindex(jws::JSONWorksheet, row_inds, col_ind::Integer)`
2	`243`	`p = keys(jws)[col_ind]`
	`244`
2	`245`	`getindex(jws, row_inds, p)`
	`246`	`end`
	`247`
	`248`	`function Base.setindex!(jws::JSONWorksheet, value::Vector, p::Pointer)`
4	`249`	`if length(jws) != length(value)`
1	`250`	`throw(ArgumentError("New column must have the same length as old columns"))`
	`251`	`end`
6	`252`	`@inbounds for (i, row) in enumerate(jws)`
7	`253`	`row[p] = value[i]`
	`254`	`end`
2	`255`	`if !haskey(jws, p)`
1	`256`	`push!(jws.pointer, p)`
	`257`	`end`
2	`258`	`return jws`
	`259`	`end`
	`260`	`function Base.setindex!(jws::JSONWorksheet, value, i::Integer, p::Pointer)`
2	`261`	`jws[i][p] = value`
	`262`	`end`
	`263`
	`264`	`"""`
	`265`	`merge(a::JSONWorksheet, b::JSONWorksheet, bykey::AbstractString)`
	`266`
	`267`	`Construct a merged JSONWorksheet from the given JSONWorksheets.`
	`268`	`If the same Pointer is present in another collection, the value for that key will be the`
	`269`	`value it has in the last collection listed.`
	`270`	`"""`
	`271`	`function Base.merge(a::JSONWorksheet, b::JSONWorksheet, key::AbstractString)`
3	`272`	`merge(a::JSONWorksheet, b::JSONWorksheet, Pointer(key))`
	`273`	`end`
	`274`	`function Base.merge(a::JSONWorksheet, b::JSONWorksheet, key::Pointer)`
4	`275`	`@assert haskey(a, key) "$key is not found in the JSONWorksheet(\"$(a.sheetname)\")"`
2	`276`	`@assert haskey(b, key) "$key is not found in the JSONWorksheet(\"$(b.sheetname)\")"`
	`277`
2	`278`	`pointers = unique([a.pointer; b.pointer])`
	`279`
10	`280`	`keyvalues_a = map(el -> el[key], a.data)`
10	`281`	`keyvalues_b = map(el -> el[key], b.data)`
2	`282`	`ind = indexin(keyvalues_b, keyvalues_a)`
	`283`
2	`284`	`data = deepcopy(a.data)`
2	`285`	`for (i, _b) in enumerate(b.data)`
8	`286`	`j = ind[i]`
14	`287`	`if isnothing(j)`
2	`288`	`_a = deepcopy(_b)`
2	`289`	`for p in a.pointer`
10	`290`	`_a[p] = JSONPointer._null_value(p)`
	`291`	`end`
2	`292`	`push!(data, _a)`
	`293`	`else`
6	`294`	`_a = data[j]`
	`295`	`end`
8	`296`	`for p in b.pointer`
32	`297`	`_a[p] = _b[p]`
	`298`	`end`
	`299`	`end`
2	`300`	`JSONWorksheet(a.xlsxpath, pointers, data, a.sheetname)`
	`301`	`end`
	`302`	`function Base.append!(a::JSONWorksheet, b::JSONWorksheet)`
6	`303`	`ak = map(el -> el.tokens, keys(a))`
6	`304`	`bk = map(el -> el.tokens, keys(b))`
	`305`
2	`306`	`if sort(ak) != sort(bk)`
1	`307`	`throw(AssertionError("""Column names must be same for append!`
	`308`	`$(setdiff(collect(ak), collect(bk)))"""))`
	`309`	`end`
	`310`
1	`311`	`append!(a.data, b.data)`
	`312`	`end`
	`313`
0	`314`	`function Base.sort!(jws::JSONWorksheet, key; kwargs...)`
0	`315`	`sort!(jws, Pointer(key); kwargs...)`
	`316`	`end`
	`317`	`function Base.sort!(jws::JSONWorksheet, pointer::Pointer; kwargs...)`
9	`318`	`sorted_idx = sortperm(map(el -> el[pointer], data(jws)); kwargs...)`
1	`319`	`jws.data = data(jws)[sorted_idx]`
1	`320`	`return jws`
	`321`	`end`
	`322`
0	`323`	`function Base.summary(io::IO, jws::JSONWorksheet)`
0	`324`	`@printf("%d×%d %s - %s!%s\n", size(jws)..., "JSONWorksheet",`
	`325`	`basename(xlsxpath(jws)), sheetnames(jws))`
	`326`	`end`
0	`327`	`function Base.show(io::IO, jws::JSONWorksheet)`
0	`328`	`summary(io, jws)`
	`329`	`# TODO truncate based on screen size`
0	`330`	`x = data(jws)`
0	`331`	`print(io, "row 1 => ")`
0	`332`	`print(io, JSON.json(x[1], 1))`
0	`333`	`if length(x) > 1`
0	`334`	`print("...")`
0	`335`	`print(io, "row $(length(x)) => ")`
0	`336`	`print(io, JSON.json(x[end]))`
	`337`	`end`
	`338`	`end`

< Summary

File(s)

src/worksheet.jl

Methods/Properties