if not modules then modules = { } end modules ['lpdf-tag'] = { version = 1.001, comment = "companion to lpdf-tag.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- todo ; timing -- We do support tagged pdf but that bit is the worst part of the in itself -- impressive 'standard'. It's just a mess. The possible nesting and boundary -- conditions are probaly derived from some application with limitations or weird -- assumptions about structure. And it didn't get any better over time. Now, it -- might be that it's ConTeXt that is complicating matters because - at least that -- is the suggestion coming from comments - that other macro packages are pretty -- happy with it (even claiming that context is not doing it right and that there -- are no issues at all). That said: we have to deal with it somehow, keeping in -- mind that the standard changes, even with arguments that stuff gets dropped -- because the applications that deal with pdf can't handle for instance nesting (of -- e.g. headings). Validators and standards are instable and we don't expect much -- from applications that have to deal with it, or we'd already seen that over time. -- instead of taglist we can have a backpointer to the parent but then -- we also need to adapt the export, it might be a bit slower -- -- maybe also make specifications a two dimensional table -- Updated july/august 2025 to match changed in specifations, validation -- etc. Musical timestamp: (video) Brad Mehldau performs Elliott Smith's -- "Colorbars" (with others), a demonstration that there can still come -- something good from that country in free-fall. local next, type, tonumber, rawget = next, type, tonumber, rawget local format, match, gmatch, find, gsub = string.format, string.match, string.gmatch, string.find, string.gsub local concat, sortedhash, sortedkeys, setmetatableindex = table.concat, table.sortedhash, table.sortedkeys, table.setmetatableindex local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C local settings_to_hash, settings_to_array = utilities.parsers.settings_to_hash, utilities.parsers.settings_to_array local formatters = string.formatters local trace_tags = false local trace_info = false local trace_attribute = false local trace_math = false local trace_blobs = false local trace_internals = false local trace_suspects = false local trace_paragraphs = false local trace_objects = false local trace_tree = false trackers.register("structures.tags", function(v) trace_tags = v end) trackers.register("structures.tags.showtree", function(v) trace_tree = v end) trackers.register("structures.tags.blobs", function(v) trace_blobs = v end) -- These are the visualized ones: local trace_visualized = false -- once we set one of those below we also check more trackers.register("structures.tags.info", function(v) trace_visualized = true ; trace_info = v end) trackers.register("structures.tags.attribute", function(v) trace_visualized = true ; trace_attribute = v end) trackers.register("structures.tags.math", function(v) trace_visualized = true ; trace_math = v attributes.viewerlayers.enable() end) -- somehow has to happen trackers.register("structures.tags.internals", function(v) trace_visualized = true ; trace_internals = v end) trackers.register("structures.tags.suspects", function(v) trace_visualized = true ; trace_suspects = v end) trackers.register("structures.tags.paragraphs",function(v) trace_visualized = true ; trace_paragraphs = v end) trackers.register("structures.tags.objects", function(v) trace_visualized = true ; trace_objects = v end) local detailedmath = false local actualtexts = { } local handlelinks = false local checklinks = true local c_realpageno = tex.iscount("realpageno") directives.register("structures.tags.math.detail", function(v) detailedmath = v end) local report = logs.reporter("backend","tags") local pdfbackend = backends.registered.pdf local nodeinjections = pdfbackend.nodeinjections local codeinjections = pdfbackend.codeinjections local enableaction = nodes.tasks.enableaction local disableaction = nodes.tasks.disableaction local lpdf = lpdf local pdfdictionary = lpdf.dictionary local pdfarray = lpdf.array local pdfboolean = lpdf.boolean local pdfconstant = lpdf.constant local pdfreference = lpdf.reference local pdfunicode = lpdf.unicode local pdfmakenametree = lpdf.makenametree local addtocatalog = lpdf.addtocatalog local addtopageattributes = lpdf.addtopageattributes local pdfflushobject = lpdf.flushobject local pdfreserveobject = lpdf.reserveobject local pdfpagereference = lpdf.pagereference local version = 1 local texgetcount = tex.getcount local nodes = nodes local nodecodes = nodes.nodecodes local par_code = nodecodes.par local hlist_code = nodecodes.hlist local vlist_code = nodecodes.vlist local glyph_code = nodecodes.glyph local rule_code = nodecodes.rule local glue_code = nodecodes.glue local leaders_code = nodes.gluecodes.leaders local empty_rule_code = nodes.rulecodes.empty local a_tagged = attributes.private('tagged') local a_image = attributes.private('image') local a_mathblob = attributes.private('mathblob') local a_taggedpar = attributes.private("taggedpar") local nuts = nodes.nuts local nodepool = nuts.pool local setstate = nodepool.setstate local register = nodepool.register local getid = nuts.getid local getattr = nuts.getattr local getattrs = nuts.getattrs local getprev = nuts.getprev local getnext = nuts.getnext local getlist = nuts.getlist local getchar = nuts.getchar local getleader = nuts.getleader local getlanguage = nuts.getlanguage local getruledimensions = nuts.getruledimensions local tailoflist = nuts.tail local setlink = nuts.setlink local setlist = nuts.setlist local copy_node = nuts.copy local tosequence = nuts.tosequence ----- nextnode = nuts.traversers.node ----- nextcontent = nuts.traversers.content local nextpossible = nuts.traversers.possible local structure_kids -- delayed local structure_ref -- delayed local parent_ref -- delayed local root -- delayed local names = { } local tree = { } local firstintree = false local lastintree = false local elements = { } local elementsorder = { } local nofelements = 0 local structurestags = structures.tags local taglist = structurestags.taglist local specifications = structurestags.specifications local usedlabels = structurestags.labels local properties = structurestags.properties local usewithcare = structurestags.usewithcare local pushtag = structurestags.push local poptag = structurestags.pop local starttag = structurestags.start local stoptag = structurestags.stop local usedmapping = { } local destinations = { } local references = { } local pdffallback = "Artifact" structurestags.where = { -- only in 1.7 Art = "ua1", BlockQuote = "ua1", TOC = "ua1", TOCI = "ua1", Index = "ua1", Private = "ua1", Quote = "ua1", Note = "ua1", Reference = "ua1", BibEntry = "ua1", Code = "ua1", H1 = "ua1", H2 = "ua1", H3 = "ua1", H4 = "ua1", H5 = "ua1", H6 = "ua1", -- only in 2.0 DocumentFragment = "ua2", Aside = "ua2", H7 = "ua2", H8 = "ua2", H9 = "ua2", Title = "ua2", FENote = "ua2", Sub = "ua2", Em = "ua2", Strong = "ua2", Artifact = "ua2", -- rest depends on version } structurestags.unsupported = { -- only in 1.7 Art = true, -- BlockQuote = true, TOC = true, TOCI = true, Index = true, Quote = true, Note = true, Reference = true,-- internal BibEntry = true, H = true, -- only in 2.0 DocumentFragment = true, Title = true, -- FENote = true, Em = true, Strong = true, -- in both Caption = true, Document = true, -- internal Form = true, Annot = true, Link = true, -- internal } experiments.register("structures.tags.lr",function() structurestags.unsupported.Link = false structurestags.unsupported.Reference = false end) structurestags.modes = { H1 = "inline", H2 = "inline", H3 = "inline", H4 = "inline", H5 = "inline", H6 = "inline", H7 = "inline", H8 = "inline", H9 = "inline", Lbl = "inline", -- Em = "inline", -- Strong = "inline", P = "display", FENote = "display", BlockQuote = "display", NonStruct = "mixed", Sect = "display", Part = "display", } setmetatableindex(structurestags.where,function(t,k) -- if version 1 then if from 2 -> artifact .. todo local v = version > 1 and "ua2" or "ua1" t[k] = v return v end) ----- tagsplitter = structurestags.patterns.splitter -- We used to have a way to embed mathml and bib blobs independent of tagging but -- that was actually never really used. It used attachments and text notes but is -- was not really supported by viewers so we never advertized it. local embeddedtags = false local embeddedfilelist = pdfarray() local blobfunctions = { } local tagtracers = { } local indirectlocalkids = false local indirectglobalkids = false do local done = false experiments.register("structures.tags.indirect",function(v) if done then -- we only permit this once elseif v == "global" then indirectglobalkids = true elseif v == "local" then indirectlocalkids = true elseif v then indirectglobalkids = true indirectlocalkids = true end done = true end) end -- When we are generating more pages this will be invalid but there will be an extra -- run anyway due to storing the final value. -- -- For some reason links need a struct parent and destinations an object ... smells like -- application driven stuff. local pagenumindices = { } local usedpages = false -- internal -> structure local pagenum = 0 local linknumoffset = false local linknumindex = 0 local linknumindices = { } local linknumentries = setmetatableindex(function(t,refatt) if not linknumoffset then local tagging = job.variables.collected.tagging if tagging then linknumoffset = tagging.basetreesize else -- linknumoffset = structures.counters.record("realpage")["last"] + 100 linknumoffset = 1000 end end local v = linknumindex + linknumoffset linknumindex = linknumindex + 1 linknumindices[linknumindex] = false -- attribute -> parentindex t[refatt] = v return v end) local function setlinkstructureparent(refatt,objref) local p = linknumentries[refatt] linknumindices[p] = objref end function codeinjections.getlinkstructureparent(refatt) return linknumentries[refatt] end local referencenumindices = { } -- attribute -> structure local useddestinations = false -- internal -> structure function codeinjections.getreferencestructureobject(internal,page) if not useddestinations then local tagging = job.variables.collected.tagging useddestinations = tagging and tagging.destinations or { } usedpages = tagging and tagging.pages or { } end return (internal and useddestinations[internal]) or (page and usedpages[page]) -- or usedpages[1] end -- local function embedsupportedtags(str) if not embeddedtags then embeddedtags = { } end if str then for tag in gmatch(str,"([^, ]+)") do embeddedtags[tag] = true end else for tag in next, blobfunctions do embeddedtags[tag] = true end end end directives.register("structures.tags.embed",function(v) embedsupportedtags(type(v) == "string" and v or nil) end) function codeinjections.maptag(original,target,kind) mapping[original] = { target, kind or "inline" } end local detailmapping = { } local parentmapping = { } local directmapping = { } local warnedmapping = { } local usernamespaces = { } local usernamecount = 0 -- local namespaces = false local namespaces = { mathml = { url = "http://www.w3.org/1998/Math/MathML", force = false, known = true, map = { }, mod = { }, cap = { }, }, context = { url = "http://www.contextgarden.net/pdf/context", map = { }, mod = { }, cap = { }, }, recovery = { url = "http://www.contextgarden.net/pdf/recovery", map = { }, mod = { }, cap = { }, }, user = { url = "http://www.contextgarden.net/pdf/user", map = { }, mod = { }, cap = { }, }, ua1 = { url = "http://iso.org/pdf/ssn", mod = { }, cap = { }, force = false, known = true, }, ua2 = { url = "http://iso.org/pdf2/ssn", mod = { }, cap = { }, force = false, known = true, }, } local missingua = setmetatableindex(function(t,k) local v = pdffallback t[k] = v return v end) statistics.register("pdf tags", function() local k, v = next(missingua) if k then return format("unknown tags mapped to %s: % t",v,sortedkeys(missingua)) end end) local function concattags(tags) local t = tags.taglist if t then local l = { } -- table.new local n = #t for i=1,n do l[i] = t[i] or "ERROR" end return concat(l," ",1,n) else return "" end end local modes = structurestags.modes local where = structurestags.where local unsupported = structurestags.unsupported local function registernamespace(ns,tagnameused) -- user or user-XXXX if ns then local nsp = namespaces[ns] local map local mod local cap if nsp then map = nsp.map mod = nsp.mod cap = nsp.cap else local r = pdfreserveobject() map = { } mod = { } cap = { } nsp = { url = "http://www.contextgarden.net/pdf/user", map = map, mod = mod, cap = cap, num = r, ref = pdfreference(r), } namespaces[ns] = nsp end local u = map[tagnameused] local m = mod[tagnameused] local c = cap[tagnameused] if not u then local uns = usernamespaces[ns] local upd = uns and uns[tagnameused] if upd then local ua = namespaces[where[upd]] ua.force = true u = pdfarray { pdfconstant(upd), ua.ref } m = modes[upd] c = upd map[tagnameused] = u mod[tagnameused] = m cap[tagnameused] = c else -- todo : make more efficient local prp = properties[tagnameused] if prp then ns = prp.namespace or "context" upd = prp.pdf uns = namespaces[ns] if not uns then ns = "recovery" upd = missingua[tagnameused] uns = namespaces.recovery end else report("missing definition and/or mapping for tag %a",tagnameused) ns = "recovery" upd = missingua[tagnameused] uns = namespaces.recovery end m = modes[upd] c = upd local ua = namespaces[where[upd]] ua.force = true u = pdfarray { pdfconstant(upd), ua.ref } uns.map[tagnameused] = u uns.mod[tagnameused] = m uns.cap[tagnameused] = c end end return ns, m, c end end local problems = setmetatableindex("table") statistics.register("problematic tags", function() if next(problems) then report() local maxp = 0 local maxa = 0 for page, specification in sortedhash(problems) do for i=1,#specification do local s = specification[i] local p = s.parnumber local a = s.attribute if a > maxa then maxa = a end if p > maxp then maxp = p end end end maxp = (2 + maxp//100) * 2 maxa = (2 + maxa//100) * 2 local f = formatters["%" .. maxp .. "i %" .. maxa .. "i : %s : %s"] for page, specification in sortedhash(problems) do for i=1,#specification do local s = specification[i] specification[i] = f(s.parnumber,s.attribute,s.tagname,s.prevtag) end table.sort(specification) local last = false report("page %s:",page) report() for i=1,#specification do local s = specification[i] if s ~= last then last = s report(s) end end report() report("check with tracker: structures.trackers.[info|attribute|paragraph]") report() end end end) local function checknamespace(tagname,tagnameused,detail,specification) if detail then local dmap = detailmapping[tagnameused] if dmap then local ns = dmap[detail] if ns then return registernamespace(ns,tagnameused) end end end local pmap = parentmapping[tagnameused] if pmap and specification then local l = specification.taglist if l then local p = l[#l-1] if p then local s = specifications[p] local d = s.detail local ns = pmap[d] if ns then return registernamespace(ns,tagnameused) end end end end local ns = directmapping[tagnameused] if ns then if warnedmapping[tagnameused] then local p = problems[pagenum] p[#p+1] = specification end return registernamespace(ns,tagnameused) end return registernamespace("context",tagnameused) end local isrolemapped = false local forcerolemap = false directives.register("structures.tags.rolemapped",function(v) isrolemapped = v end) directives.register("structures.tags.rolemap", function(v) forcerolemap = v end) local function hasendpoints(kids,size) for i=1,size do if type(kids[i]) == "number" then return true end end return false end local function finishstructure() if root and #structure_kids > 0 then local nums = pdfarray() local n = 0 if indirectglobalkids then for i=firstintree,lastintree do local ti = tree[i] if ti then n = n + 1 ; nums[n] = i - 1 n = n + 1 ; nums[n] = pdfreference(pdfflushobject(ti)) else report("beware: missing page %i in tree", i) end end else for i=firstintree,lastintree do local ti = tree[i] if not ti then report("beware: missing page %i in tree", i) elseif #ti > 0 then n = n + 1 ; nums[n] = i - 1 n = n + 1 ; nums[n] = ti end end end if isrolemapped then local usedrolemap = nil local usednamespaces = nil local originals = { } if version == 1 or forcerolemap then for k, v in next, usedmapping do local k = usedlabels[k] or k local p = properties[k] if p then local pdf = p.pdf if not pdf then pdf = missingua[pdf] end local r = pdfconstant(pdf) if usedrolemap then usedrolemap[k] = r else usedrolemap = pdfdictionary { [k] = r } end else report("beware: missing property %s", k) end end end if version > 1 then for k, v in next, usedmapping do local k = usedlabels[k] or k local p = properties[k] if p then local s = p.namespace local n = namespaces[s] if n then local pdf = p.pdf if not pdf then pdf = missingua[pdf] end if not usednamespaces then usednamespaces = pdfarray() end local original = p.original if original then local o = originals[s] if not o then o = pdfdictionary() originals[s] = o end o[k] = pdfdictionary { S = pdfconstant(original[1]), -- T = pdfstring(original[2]), T = pdfunicode(original[2]), } end -- else report("beware: missing namespace %s", s) end else report("beware: missing property %s", k) end end if usednamespaces then for k, v in sortedhash(namespaces) do local o = originals[k] if o then o = pdfreference(pdfflushobject(o)) end if v.force then local n = pdfdictionary { Type = pdfconstant("Namespace"), NS = pdfunicode(v.url), LMTX_NameSpace = pdfconstant(k), LMTX_Originals = o, } pdfflushobject(v.num,n) usednamespaces[#usednamespaces+1] = v.ref else local map = v.map if map and next(map) then local m = pdfreference(pdfflushobject(pdfdictionary(map))) local n = pdfdictionary { Type = pdfconstant("Namespace"), NS = pdfunicode(v.url), RoleMapNS = m, LMTX_NameSpace = pdfconstant(k), LMTX_Originals = o, } pdfflushobject(v.num,n) usednamespaces[#usednamespaces+1] = v.ref end end end end end end -- we can split the loop: with and without links if indirectlocalkids then for i=1,nofelements do local fulltag = elementsorder[i] local element = elements[fulltag] local kids = element.kids local knum = element.knum if handlelinks and checklinks then local tag = element.tag if tag == "link" then local ref = element.ref if ref then setlinkstructureparent(ref,element.pref) end elseif tag == "reference" then local des = element.des if des then referencenumindices[des] = element.dnum end end end -- if handlelinks and checklinks then -- for i=1,#kids do -- local d = kids[i] -- if type(d) == "table" then -- local refatt = element.refatt -- if refatt then -- local refobj = codeinjections.getrefobj(refatt) -- if refobj then -- d.Obj = pdfreference(refobj) -- end -- end -- end -- end -- end pdfflushobject(knum,kids) end else for i=1,nofelements do local fulltag = elementsorder[i] local element = elements[fulltag] local dict = element.dict if dict then local dnum = element.dnum local kids = element.kids local size = #kids if handlelinks and checklinks then local tag = element.tag if tag == "link" then local ref = element.ref if ref then setlinkstructureparent(ref,element.pref) end elseif tag == "reference" then local des = element.des if des then referencenumindices[des] = dnum end elseif tag == "navigationpage" then pagenumindices[element.pnum] = dnum end end -- if handlelinks and checklinks then -- for i=1,size do -- local d = kids[i] -- if type(d) == "table" then -- local refatt = element.refatt -- if refatt then -- local refobj = codeinjections.getrefobj(refatt) -- if refobj then -- d.Obj = pdfreference(refobj) -- end -- end -- end -- end -- end if size == 1 then local t = type(kids[1]) if t == "number" then dict.K = kids[1] -- elseif t == "table" then -- -- this needs testing on a complex document -- dict.K = kids[1] else dict.K = kids[1] dict.Pg = nil end elseif not hasendpoints(kids,size) then dict.Pg = nil end pdfflushobject(dnum,dict) end end end -- local getinternalreference = structures.references.getinternalreference local destinations = { } for k, v in next, referencenumindices do destinations[getinternalreference(k)] = v end job.variables.tobesaved.tagging = { basetreesize = lastintree, destinations = destinations, pages = pagenumindices, } -- setmetatableindex(linknumindices) for k, v in sortedhash(linknumindices) do if v then n = n + 1 ; nums[n] = k n = n + 1 ; nums[n] = v end end -- local parenttree = pdfdictionary { Nums = nums } local idtree = pdfmakenametree(names) -- local structuretree = pdfdictionary { Type = pdfconstant("StructTreeRoot"), K = pdfreference(pdfflushobject(structure_kids)), ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)), IDTree = idtree, RoleMap = usedrolemap, Namespaces = usednamespaces, -- Experiment with some control (screen readers) but it didn't work out reliable -- so forget about it. -- ClassMap = pdfdictionary { -- Display = pdfdictionary { O = pdfconstant("Layout"), Placement = pdfconstant("Block") }, -- Inline = pdfdictionary { O = pdfconstant("Layout"), Placement = pdfconstant("Inline") }, -- }, } pdfflushobject(structure_ref,structuretree) addtocatalog("StructTreeRoot",pdfreference(structure_ref)) -- -- So out of a sudden (sometime 2025) this is now mandate when a document is tagged. Documents -- that validated before didn't after updating the validator. Well ... but how about those older -- documents that were found okay before? -- -- if version == 1 then -- always was optional anyway local markinfo = pdfdictionary { Marked = pdfboolean(true) or nil, -- UserProperties = pdfboolean(true), -- maybe some day -- Suspects = pdfboolean(true) or nil, -- AF = #embeddedfilelist > 0 and pdfreference(pdfflushobject(embeddedfilelist)) or nil, } addtocatalog("MarkInfo",pdfreference(pdfflushobject(markinfo))) -- end end if trace_tree then local p = false local n = tonumber(trace_tree) or true for i=1,nofelements do local fulltag = elementsorder[i] local element = elements[fulltag] local specification = specifications[fulltag] local pagenumber = element.pnum if n == true and i > 1 and p ~= pagenumber then report("") end p = pagenumber if n == true or n == p then report("% 5i %3i %s (%s)",i,pagenumber,concattags(specification),(element.des and "destination") or (element.ref and "reference") or "content") end end end if trace_blobs then for k, v in sortedhash(actualtexts) do local shared = v[1] and "-" or "+" local index = v[2] local text = v[3] report("blob %s : %5i : %04X : %s",shared,index,index,text) end end end lpdf.registerdocumentfinalizer(finishstructure,"document structure") -- pagenum has been moved up local index = 0 local pageref = nil local list = nil local namespaced = false local pdf_mcr = pdfconstant("MCR") local pdf_struct_element = pdfconstant("StructElem") local pdf_s = pdfconstant("S") local pdf_objr = pdfconstant("OBJR") local function initializepage() index = 0 pagenum = texgetcount(c_realpageno) pageref = pdfreference(pdfpagereference(pagenum)) list = pdfarray() if not namespaced then for k, v in sortedhash(namespaces) do v.num = pdfreserveobject() v.ref = pdfreference(v.num) end namespaced = true end -- hm, can be later than 1 if not firstintree then if pagenum > 1 then report("beware: first page in tree is %i", pagenum) end firstintree = pagenum lastintree = pagenum end if pagenum > lastintree then lastintree = pagenum else -- report("beware: page order problem in tree at page %i", pagenum) end tree[pagenum] = list -- we can flush after done, todo -- end local function finishpage() -- flush what can be flushed addtopageattributes("StructParents",pagenum-1) end -- here we can flush and free elements that are finished local pdf_userproperties = pdfconstant("UserProperties") -- /O /Table -- /Headers [ ] local function makeattribute(t) if t and next(t) then local properties = pdfarray() for k, v in sortedhash(t) do -- easier on comparing pdf properties[#properties+1] = pdfdictionary { N = pdfunicode(k), V = pdfunicode(v), } end return pdfdictionary { O = pdf_userproperties, P = properties, } end end local visualizetags = nil local visualizespecial = nil local visualizeblobs = nil local visualizesuspects = nil local visualizeinternals = nil local visualizeparagraphs = nil local visualizeobjects = nil local collectedsuspects = { } statistics.register("pdf tags", function() if #collectedsuspects > 0 then return formatters["suspects: % t"](collectedsuspects) end end) local function checkvisualize() if not visualizetags then visualizetags = nodes.visualizers.register("tags") visualizespecials = nodes.visualizers.register("specials",nil,nil,2.5,true) visualizeblobs = nodes.visualizers.register("blobs",nil,nil,2.5,true) visualizesuspects = nodes.visualizers.register("suspects") visualizeinternals = nodes.visualizers.register("internals") visualizeparagraphs = nodes.visualizers.register("paragraphs") visualizeobjects = nodes.visualizers.register("objects") end end local function tagtracer(name) checkvisualize() return visualizetags(name), visualizetags() end setmetatableindex(tagtracers,function(t,k) if trace_attribute then checkvisualize() local visualize = nodes.visualizers.register(k) local v = function(name,specification) local a = specification.attribute if a then name = name .. " " .. a end return visualize(name), visualize() end t[k] = v return v else t[k] = tagtracer return tagtracer end end) local endpoints, getendpoint do local nofendpoints = 0 local trace = false endpoints = { Part = "p", Div = "p", Sect = "p", } lpdf.endpoints = endpoints local get = function(prev,at,ap,oldmode) local ptag = prev.tag local capsule = prev.capsule local etag = endpoints[capsule] -- or capsule -- print("END1",ptag,capsule,etg,prev,mode) if not etag then if capsule == "NonStruct" then local newmode = prev.mode if oldmode == "display" then if newmode == "display" then -- okay else etag = "m" end elseif oldmode == "inline" then if newmode == "inline" then -- okay else etag = "m" end end end end -- print("END2",ptag,capsule,etg,prev,mode) if not etag then if trace then report("unknown endpoint for %a",ptag) end elseif ptag == etag then -- we're okay else nofendpoints = nofendpoints + 1 local bad = etag .. ">" .. nofendpoints specifications[bad] = { tagname = etag, attribute = at or 0, parnumber = ap or 0, prevtag = ptag, } if trace then report("adding endpoint %a to %a",etag,ptag) end return bad end end getendpoint = get -- we add them by default trackers .register("structures.tags.endpoints", function(v) trace = v end) directives.register("structures.tags.endpoints", function(v) getendpoint = v and get or false end) end local makeelement do do local f_tagid = formatters["math-%04X"] -- todo: auto adapt to nofblobs local f_tagfn = formatters["math-%04X.xml"] local shared = { } local bindex = 0 local btags = false function blobfunctions.math(tagname,specification) local tagindex = specification.tagindex local id = f_tagid(tagindex) local blob = specification.blob local af = nil if blob then local blobindex = mathematics.getblobindex("pdf",blob) if blobindex then local index = shared[blobindex] if not btags then btags = { } job.variables.tobesaved.mathblobs = btags end if index then af = index[1] btags[blob] = index[2] else bindex = bindex + 1 local blobname = f_tagid(blobindex) local blobfile = f_tagfn(blobindex) local blobdata = mathematics.getmathblob("pdf",blob) af = codeinjections.embedfile { force = true, data = blobdata, name = blobname, file = blobfile, -- hash = hash, hash = id, forcereference = true, -- title = "whatever", mimetype = "application/mathml+xml", relation = "Supplement", -- bah } af = pdfreference(pdfflushobject(pdfarray { af })) -- maybe also share this shared[blobindex] = { af, blobindex } btags[blob] = blobindex end local actualtext = mathematics.gettextblob("pdf",specification.language or "en",blob) if actualtext then if trace_blobs then actualtexts[blob] = { index and true or false, blobindex, actualtext } end actualtext = pdfunicode(actualtext) end return id, af, actualtext end else -- af = job.fileobjreferences.collected[id] -- if af then -- local r = pdfreference(af) -- af = pdfarray { r } -- -- embeddedfilelist[#embeddedfilelist+1] = r -- end end end local blobdone = { } function tagtracers.math(name,specification,blob) checkvisualize() if blob and not blobdone[blob] then local bname = btags[blob] if bname then bname = "M " .. blob .. " " .. f_tagid(bname) else bname = "M " .. blob end blobdone[blob] = true if detailedmath then return visualizeblobs(bname,name), visualizeblobs() else return visualizetags(bname), visualizetags() end else return visualizetags(name), visualizetags() end end function tagtracers.suspect(name) checkvisualize() return visualizesuspects("S " .. "mrow"), visualizesuspects() end function tagtracers.paragraph(n) checkvisualize() return visualizeparagraphs("P " .. n), visualizeparagraphs() end function tagtracers.objects(n) if n then checkvisualize() return visualizeobjects("O " .. n), visualizeobjects() end end function tagtracers.internallink(internal) checkvisualize() return visualizeinternals("L " .. internal), visualizeinternals() end function tagtracers.internalreference(internal) checkvisualize() return visualizeinternals("R " .. internal), visualizeinternals() end end do function tagtracers.link(name,specification,blob) checkvisualize() return visualizespecials(name), visualizespecials() end function tagtracers.reference(name,specification,blob) checkvisualize() return visualizespecials(name), visualizespecials() end end do local f_tagid = formatters["cite-%s"] -- todo: auto adapt to nofblobs local f_tagfn = formatters["cite-%s.bib"] local shared = { } local bindex = 0 local btags = { } local function citeblobs(tagname,specification) local detail = specification.detail if detail then local dataset, tag = match(detail,"^(.+)::(.+)$") local index = shared[tag] local id = f_tagid(tag) local af = nil if index then af = index[1] btags[tag] = index[2] else bindex = bindex + 1 local data = publications.datasets[dataset].luadata[tag] or "no data" local blobname = id -- f_tagid(tag) local blobfile = f_tagfn(tag) local blobdata = publications.savers.bib(false,false,{ [tag] = data }) -- converttoxml(dataset,true,false,true,false,true,true) af = codeinjections.embedfile { force = true, data = gsub(blobdata,"\n+$",""), name = blobname, file = blobfile, -- hash = hash, hash = id, forcereference = true, -- title = "whatever", mimetype = "application/x-bibtex", relation = "Supplement", -- bah } -- af = pdfarray { af } -- maybe also share this af = pdfreference(pdfflushobject(pdfarray { af })) -- maybe also share this shared[tag] = { af, blobname } btags[tag] = blobname end local actualtext = publications.meanings[tag] if actualtext then actualtext = pdfunicode(actualtext) end return id, af, actualtext end end function tagtracers.cite(name,specification) checkvisualize() local detail = specification.detail if detail then local dataset, tag = match(detail,"^(.-)::(.-)$") local bname = btags[tag] if bname then return visualizetags("C " .. bname), visualizetags() end end return visualizetags(name), visualizetags() end experiments.register("structure.tags.blobs.cite",function(v) blobfunctions.cite = v and citeblobs or nil end) blobfunctions.cite = citeblobs -- enabled by default end do local f_tagid = formatters["%s-%s"] -- todo: auto adapt to nofblobs local f_tagfn = formatters["%s-%s.txt"] local shared = { } local bindex = 0 local btags = { } local delayed = { [interfaces.variables.columns] = true, [interfaces.variables.page ] = true, } local function descriptionblobs(tagname,specification) local detail = specification.detail if detail then local taglist = specification.taglist local fulltag = taglist[#taglist] local indices = structurestags.getdescriptionindex(fulltag) if not indices then return end local data = structures.lists.collected[indices.listindex] if not data then return end local tag = indices.noteindex if not delayed[data.references.delay or false] then return end data = data.titledata.bookmark or data.titledata.title if not data then return end local index = shared[tag] local id = f_tagid(detail,tag) local af = nil if index then af = index[1] btags[tag] = index[2] else bindex = bindex + 1 local blobname = id -- f_tagid(detail,tag) local blobfile = f_tagfn(detail,tag) local blobdata = data af = codeinjections.embedfile { force = true, data = blobdata, -- gsub(blobdata,"\n+$",""), name = blobname, file = blobfile, -- hash = hash, hash = id, forcereference = true, -- title = "whatever", mimetype = "application/text", relation = "Supplement", -- bah } -- af = pdfarray { af } -- maybe also share this af = pdfreference(pdfflushobject(pdfarray { af })) -- maybe also share this shared[tag] = { af, blobname } btags[tag] = blobname end local actualtext = blobname -- blobdata if actualtext then actualtext = pdfunicode(actualtext) end return id, af, actualtext end end -- function tagtracers.description(name,specification) -- checkvisualize() -- local detail = specification.detail -- if detail then -- local tag = "..." -- local bname = btags[tag] -- if bname then -- return visualizetags("C " .. bname), visualizetags() -- end -- end -- return visualizetags(name), visualizetags() -- end experiments.register("structure.tags.blobs.description",function(v) blobfunctions.description = v and descriptionblobs or nil end) -- blobfunctions.description = nil-- disabled by default end local lastid = 0 local f_id = formatters["%X"] local symbols = setmetatableindex ( { -- None ["1"] = "Disc", ["2"] = "Circle", ["3"] = "Square", ["n"] = "Decimal", ["I"] = "UpperRoman", ["i"] = "LowerRoman", ["A"] = "UpperAlpha", ["a"] = "LowerAlpha", }, function(t,k) return tonumber(k) and "Unordered" or "Ordered" end ) -- Play safe for the crappy and evolving standard. We want to validate now and -- in the future without ever looking back. So, we always add an endpoint, even -- when it's not needed because we don't know what will change. We use "text" as -- it is a short tag "inline" or "display" could also work weren't it that one -- can't know in advance what a user prefers. -- todo: use a plugin model makeelement = function(fulltag,parent) local specification = specifications[fulltag] local tagname = specification and specification.tagname or "ignore" -- we always have a specification -- some catches .. todo if tagname == "ignore" then return false elseif tagname == "mstacker" or tagname == "mstackertop" or tagname == "mstackerbot" or tagname == "mstackermid" then -- test this in an mp stacker in the math manual, basically any private one return true elseif tagname == "mrow" then -- todo: alttext return false end local tagnameused = tagname local original = tagname local attributes = nil if tagname == "tabulatecell" then local d = structurestags.gettabulatecell(fulltag) if d and d.kind == 1 then tagnameused = "tabulateheadcell" end elseif tagname == "tablecell" then -- will become a plugin model local d = structurestags.gettablecell(fulltag) if d then if d.kind == 1 then tagnameused = "tableheadcell" end local rows = d.rows or 1 local cols = d.columns or 1 if rows > 1 or cols > 1 then attributes = pdfdictionary { -- The usual inconsistency on short and long keys: O = pdfconstant("Table"), RowSpan = rows > 1 and rows or nil, ColSpan = cols > 1 and cols or nil, } end end end -- local detail = specification.detail local userdata = specification.userdata local namespace = nil local mode = "mixed" local capsule = false -- if version == 1 then -- not here -- todo: register ns -> rolemap else namespace, mode, capsule = checknamespace(tagname,tagnameused,detail,specification) end -- -- needs checking, moved -- if tagname == "itemgroup" then local data = structurestags.getitemgroup(fulltag) local symbol = symbols[data.symbol or "None"] or "None" -- why do we care, we could just go none attributes = pdfdictionary { O = pdfconstant("List"), ListNumbering = pdfconstant(symbol), ContinuedList = data.continue and true or nil, } elseif capsule == "L" then -- so not in level 1 -- We have to add something but not None, so we can have an -- optional setter if needed. For now we just assume Ordered. -- local symbol = props.symbol and symbols[props.symbol] or "Ordered" attributes = pdfdictionary { O = pdfconstant("List"), ListNumbering = pdfconstant("Ordered"), -- ListNumbering = pdfconstant(symbol), } end -- usedmapping[tagname] = true -- local af = nil local id = nil local actualtext = nil -- if embeddedtags[tagname] then local action = blobfunctions[tagname] if action then id, af, actualtext = action(tagname,specification) end end -- if tagname == "sorting" then actualtext = structurestags.getsorting(fulltag) elseif tagname == "synonym" then actualtext = structurestags.getsynonym(fulltag) end -- if isrolemapped then if version > 1 then if namespace then namespace = namespaces[namespace].ref or nil else local p = properties[tagname] if p then namespace = namespaces[p.namespace].ref or nil else namespace = nil -- do we need a fallback like: -- namespace = "user" -- properties[tagname] = { namespace = namespace, pdf = "Span", nature = "inline" } end end end end local kids = pdfarray() local tag = usedlabels[tagnameused] or tagnameused local subtype = pdfconstant(tag) local pref = parent.pref local pkids = parent.kids local element local dref, dnum original = original ~= tagnameused and pdfconstant(original) or nil -- Pg : only needed when K is integer or array with integers if indirectlocalkids then local knum = pdfreserveobject() local dict = pdfdictionary { -- Type = pdf_struct_element, -- optional, saves bytes S = subtype, T = detail and detail or nil, P = pref, Pg = pageref, K = pdfreference(knum), A = attributes, Alt = actualtext or nil, NS = namespace, -- ActualText = actualtext or nil, -- shared object with Alt? AF = af or nil, LMTX_S = original, } dnum = pdfflushobject(dict) dref = pdfreference(dnum) element = { blob = af and true or false, -- also actualtext tag = tag, pnum = pagenum, pref = dref, kids = kids, knum = knum, dnum = dnum, ref = tag == "link" and specification.reference or nil, des = tag == "reference" and specification.destination or nil, mode = mode, capsule = capsule, } else local dict = pdfdictionary { -- Type = pdf_struct_element, -- optional, saves bytes S = subtype, T = detail and detail or nil, P = pref, Pg = pageref, K = kids, A = attributes, Alt = actualtext or nil, NS = namespace, -- ActualText = actualtext or nil, -- shared object with Alt? AF = af or nil, LMTX_S = original, LMTX_A = trace_attribute and specification.attribute or nil, } dnum = pdfreserveobject() dref = pdfreference(dnum) element = { blob = af and true or false, -- also actualtext tag = tag, pnum = pagenum, pref = dref, kids = kids, dict = dict, dnum = dnum, ref = tag == "link" and specification.reference or nil, des = tag == "reference" and specification.destination or nil, mode = mode, capsule = capsule, } end if id and names then names[id] = dref end pkids[#pkids+1] = dref elements[fulltag] = element nofelements = nofelements + 1 elementsorder[nofelements] = fulltag return element end end local f_BDC = formatters["/%s <> BDC"] local a_destination = attributes.private('destination') local a_reference = attributes.private('reference') local references = { } local function makecontent(start,parent,id,specification,range) local tag = parent.tag local kids = parent.kids local last = index index = index + 1 if id == "image" or id == "mpgraphic" then -- when we are in \startTEXpage .... \stopTEXpage we have not really -- an element in the end of the list as all is unstructured local list = specification.taglist local data = usewithcare[id][list[#list]] -- export image generator local d = pdfdictionary { Type = pdf_mcr, Pg = pageref, MCID = last, } if data then local alt = data.alternativetext or "" parent.dict.Alt = pdfunicode(alt ~= "" and alt or id) else -- not really tagged as image end kids[#kids+1] = pdfreference(pdfflushobject(d)) -- kids[#kids+1] = d elseif pagenum == parent.pnum then kids[#kids+1] = last else local d = pdfdictionary { Type = pdf_mcr, Pg = pageref, MCID = last, } kids[#kids+1] = pdfreference(pdfflushobject(d)) -- kids[#kids+1] = d end -- list[index] = parent.pref -- page related list -- return f_BDC(tag,last) end local function makeignore(specification,range) return "/Artifact BMC" end -- no need to adapt head, as we always operate on lists local EMCliteral = nil -- local enabled = true -- updaters.register("tagging.state.enable", function() enabled = true end) -- updaters.register("tagging.state.disable", function() enabled = false end) local tag_ignore_level = 1 local tag_document_level = 2 local tag_image_state = -1 local tag_ignore_state = -2 local tag_link_state = -3 local tag_reference_state = -4 local tag_rule_state = -5 function nodeinjections.addtags(head,ispage) if tex.systemmodes.export then return head elseif not tex.conditionals.c_strc_tags_global then return head end -- if not enabled then -- return head -- end local pdflanguage = languages.pdflanguage local listtoutf = nodes.listtoutf if not EMCliteral then EMCliteral = register(setstate("EMC")) end local last = nil local ranges = { } local range = nil local nofranges = 0 if not root then structure_kids = pdfarray() structure_ref = pdfreserveobject() parent_ref = pdfreserveobject() root = { pref = pdfreference(structure_ref), kids = structure_kids } names = pdfarray() end initializepage() local mblob = false local ablob = { } -- dummy link if handlelinks and ispage then pushtag() -- tag_document_level starttag("dummy") local ac = starttag("navigationpage", { detail = pagenum } ) stoptag() stoptag() local n = nodepool.boundary() setlink(n,getlist(head)) setlist(head,n) nofranges = nofranges + 1 ranges[nofranges] = { ac, false, "navigationpage", n, n, head } poptag() end -- if handlelinks and ispage then -- pushtag() -- tag_document_level -- starttag("dummy") -- local ac = starttag("navigationpage", { detail = pagenum } ) -- stoptag() -- stoptag() -- nofranges = nofranges + 1 -- ranges[nofranges] = { ac, false, "navigationpage" } -- poptag() -- end -- todo: survive across pages local lastpar = 0 local lastparat = 0 -- most are glyphs and lists anyway adn those take time local function collectranges(head,parent) -- for n, id, subtype in nextnode, head do -- for n, id, subtype, list in nextcontent, head do for n, id, subtype, list in nextpossible, head do -- check(n,id) if id == glyph_code then -- we no longer intercept char 0 here local at, blob, ap = getattrs(n,a_tagged,a_mathblob,a_taggedpar) if at == 0 then at = false elseif at == 1 then at = false elseif at then -- else at = false end -- no check for detailed math, see archive (2025-07) if not blob then mblob = false if last ~= at then range = { at, ap or 0, "glyph", n, n, parent } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = at lastparat = at lastpar = ap elseif range then if lastpar ~= ap and at and lastparat == at then -- local specification = taglist[at] pushtag(at < tag_document_level and tag_document_level or at) local ac = starttag("break") stoptag() poptag() range = { ac, ap or 0, "break", n, false } nofranges = nofranges + 1 ranges[nofranges] = range lastpar = ap range = { at, ap or 0, "glyph", n, n, parent } -- , false, false, ap } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = at lastparat = at else range[5] = n -- stop end end elseif blob == mblob and (last and last > 0) then if range then range[5] = n -- stop end last = at else mblob = blob local a = ablob[blob] if not a then a = tag_document_level if at then local t = taglist[at].taglist -- no check for simple math, see archive (2025-07) -- we could store the index in specifications but we only need it once for i=1,#t do local s = specifications[t[i]] if s.tagname == "math" then a = s.attribute break end end end ablob[blob] = a end range = { a, ap or 0, "math", n, n, parent, blob } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = at end elseif id == hlist_code or id == vlist_code then local at, img = getattrs(n,a_tagged,a_image) -- todo: img id 3 == mp if img then -- combine i.e. store img number range = { at or false, false, img == 3 and "mpgraphic" or "image", n, n, parent } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = tag_image_state mblob = false elseif at == 0 then range = { false, false, "ignore", n, n, parent } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = tag_ignore_state mblob = false else -- 1 also process -- at is always true as we already checked for zero if handlelinks and at then local r, d = getattrs(n,a_reference,a_destination) if r and not references[r] then -- bah local b = getattr(n,a_mathblob) if b then at = ablob[b] end -- -- The wrapping in a dummy is really needed as links can't be in some -- places, not even when tagged as Artifact but wrapping them in an -- Artifact actually does work. -- pushtag(at < tag_document_level and tag_document_level or at) starttag("dummy", { }) local ac = starttag("link", { reference = r }) stoptag() stoptag() poptag() range = { ac, false, "link", n, false, parent } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = tag_link_state references[r] = true -- ac end if d and not destinations[d] then -- bah local b = getattr(n,a_mathblob) if b then at = ablob[b] end -- pushtag(at < tag_document_level and tag_document_level or at) local ac = starttag("reference", { destination = d }) stoptag() poptag() range = { ac, false, "reference", n, false, parent } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = tag_reference_state mblob = false destinations[d] = true -- ac end end -- local list = getlist(n) if list then collectranges(list,n) end end -- elseif id == disc_code then -- -- can't happen elseif id == glue_code then if subtype >= leaders_code then -- local list = getleader(n) if list then collectranges(list,n) end end elseif id == rule_code then -- This is nasty: struts are also changing the state! if subtype == empty_rule_code then -- skip else local w, h, d = getruledimensions(n) if (w ~= 0) and (h + d ~= 0) then local at, blob = getattrs(n,a_tagged,a_mathblob) if blob then at = false end -- if blob then -- else if not at then at = false elseif at == 0 then at = false elseif at == 1 then at = false end if last ~= tag_rule_state and last ~= at then range = { false, false, "rule", n, n, parent, blob } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = at last = tag_rule_state mblob = false elseif range then range[5] = n -- stop end -- end else -- print("rule needs tagging",w,h,d,nodes.rulecodes[subtype]) end end end end end collectranges(head) if trace_tags then report("") report(ispage and "begin page" or "begin object") report("") for i=1,nofranges do local range = ranges[i] local attr = range[1] local id = range[3] local start = range[4] local stop = range[5] local pdf = "" -- if trace_tags == "pdf" then -- local specification = taglist[attr] -- if attr then -- local tagname = specification.tagname -- local tagnameused = specification.tagname -- local detail = specification.detail -- tagname, tagnameused, detail = checkoverload(tagname,tagname,detail,specification) -- local p = properties[tagnameused] -- if p then -- pdf = p.pdf -- end -- if pdf then -- pdf = " (" .. pdf .. ")" -- else -- pdf = "" -- end -- end -- end local tags = taglist[attr] if tags then -- not ok ... only first lines local s = concattags(tags) if id == "reference" then report("R %5i %s%s",attr,s,pdf) elseif id == "link" then report("L %5i %s%s",attr,s,pdf) elseif id == "break" then report("B %5i %s%s",attr,s,pdf) elseif id == "navigationpage" then report("P %5i %s : %i%s",attr,s,pagenum or 0,pdf) else report("T %5i %s : %s%s",attr,s,listtoutf(start,false,true,stop),pdf) end else report("-------") end end report("") report(ispage and "end page" or "end object") report("") end local top = nil local noftop = 0 local blobdone = { } local function inject(start,stop,list,literal,left,right) -- can move out local prev = getprev(start) if prev then setlink(prev,literal) end if left then setlink(literal,left,start) else setlink(literal,start) end if list and not prev then setlist(list,literal) end local finish = copy_node(EMCliteral) if stop then -- use insert instead: local next = getnext(stop) if next then setlink(finish,next) end if right then setlink(stop,right,finish) else setlink(stop,finish) end else local next = getnext(literal) if next then setlink(finish,next) end if right then setlink(literal,right,finish) else setlink(literal,finish) end end end local language = texgetcount("mainlanguagenumber") for i=1,nofranges do local range = ranges[i] local mblob = false local attr = range[1] local id = range[3] local start = range[4] local stop = range[5] local list = range[6] if attr == 0 then -- should be false then local literal = setstate(makeignore(false,range)) inject(start,stop,list,literal) elseif attr then local blob = range[7] local specification = taglist[attr] local currentlist = specification.taglist local noftags = #currentlist local common = 0 local literal = nil local ignore = false if top then for i=1,noftags >= noftop and noftop or noftags do if top[i] == currentlist[i] then common = i else break end end end local prev = common > 0 and elements[currentlist[common]] or root if blob and not detailedmath then for j=common+1,noftags do local tag = currentlist[j] local prv = elements[tag] or makeelement(tag,prev) if prv == false then -- ignore this one prev = false ignore = true break elseif prv == true then -- skip this one else prev = prv end -- this is an ugly hack but ok for now -- elements property if find(tag,"^math>") then break end end else for j=common+1,noftags do local tag = currentlist[j] local etg = elements[tag] local prv = etg or makeelement(tag,prev) if prv == false then -- ignore this one prev = false ignore = true break elseif prv == true then -- skip this one else prev = prv end end end if prev then -- We run backwards. Integrating in the above loops also takes time -- and adds tests too so we have to suffer this performance hit. local pmode = false local cmode = prev.mode for i=noftags-1,1,-1 do local e = elements[currentlist[i]] local m = e.mode if m and m ~= "mixed" then pmode = m break end end -- print("MODE",prev.tag,pmode,cmode) if getendpoint then local bad = getendpoint(prev,attr,range[2],pmode) if bad then prev = makeelement(bad,prev) end end -- we will do this more explicit if id == "glyph" then -- It makes little sense to check if we change a language as that can happen -- mixed in a running text as well as in nested elements. One should just use -- proper environments. So, we don't even bother to warn here for the few -- cases where we can detect it. We delegate that to the \TEX\ end. local lan = getlanguage(start) if lan and lan > 0 and lan ~= language then -- if lan == getlanguage(stop) then local dict = prev.dict if dict then dict.Lang = pdflanguage(lan) end -- else -- unreliable -- report("confusing language %a range: %s",languagenumbers[lan],listtoutf(start,false,true,stop)) -- end end end literal = setstate(makecontent(start,prev,id,specification,range)) elseif ignore then literal = setstate(makeignore(specification,range)) else -- maybe ignore too end -- make trace info artifact if not literal then -- skip elseif not trace_visualized then -- no need for checking inject(start,stop,list,literal) else -- a lot of checking local left, right if trace_attribute then local name = specification.tagname if name then left, right = tagtracers[name](name,specification) end elseif trace_info or trace_math then local name = specification.tagname if name then left, right = tagtracers[name](name,specification,trace_math and blob or nil) end end if not left and trace_suspects then local name = specification.tagname if name == "mrow" then -- todo collectedsuspects[#collectedsuspects+1] = formatters["%i:%s"](pagenum,name) left, right = tagtracers.suspect(name) end end if handlelinks and not left and trace_internals then if id == "link" then left, right = tagtracers.internallink(attr or 0) elseif id == "reference" then left, right = tagtracers.internalreference(attr or 0) end end if not left and trace_paragraphs then local p = range[2] if p then left, right = tagtracers.paragraph(p) end end if not left and trace_objects then if currentlist then local e = elements[currentlist[#currentlist]] if e then left, right = tagtracers.objects(e.dnum) end end end inject(start,stop,list,literal,left,right) end top = currentlist noftop = noftags else local literal = setstate(makeignore(specification,range)) inject(start,stop,list,literal) end end finishpage() return head end -- variant: more structure but funny collapsing in viewer, only in lua -- file as commented anyway -- this belongs elsewhere (export is not pdf related) local permitted = true local enabled = false local shipout = true directives.register("structures.tags.shipout",function(v) shipout = v end) function codeinjections.settaggingsupport(option) if option == false then if enabled then disableaction("shipouts","structures.tags.handler") disableaction("math","noads.handlers.tags") enabled = false end if permitted then if trace_tags then report("blocking structure tags") end permitted = false end isrolemapped = false else isrolemapped = true end end do local where = structurestags.where local modes = structurestags.modes local profiles = { } local comment = nil local options = { level = 0 } local compact = false -- can be an option function structurestags.getoption(name) return options[name] end local function register(preset) local filename = "lpdf-tag-imp-" .. preset .. ".lmt" local fullname = resolvers.findfile(filename) or "" if fullname == "" then report("%s tag preset %a (file: %s)","unknown",preset,filename) else report("%s tag preset %a (file: %s)","using",preset,filename) local data = table.load(fullname) if data then local includes = data.includes local mapping = data.mapping local endpoints = data.endpoints local remapping = data.remapping includes = type(includes) == "table" and #includes > 0 and includes or nil mapping = type(mapping) == "table" and next(mapping) and mapping or nil endpoints = type(endpoints) == "table" and next(endpoints) and endpoints or nil if mapping or includes or remapping or endpoints then local patterns = { } if remapping then for i=1,#remapping do local r = remapping[i] local e = r.element if e then local d = r.detail local p = r.parent if d then e = e .. ":" .. d elseif p then e = p .. "/" .. e end patterns[#patterns+1] = e end end table.sort(patterns) end profiles[#profiles+1] = { preset = preset, name = data.name, version = data.version, size = file.size(fullname), validated = data.validated, includes = includes, mapping = sortedkeys(mapping), remapping = patterns, endpoints = sortedkeys(endpoints), } end return includes, mapping, endpoints, remapping end end end local function namespacename(detail,parent) if parent or detail then usernamecount = usernamecount + 1 return formatters["user-%04X"](usernamecount) else return "user" end end local function loadtags(preset,n,c,e) local includes, mapping, endpoints, remapping = register(preset) if includes then for i=1,#includes do n, c, e = loadtags(includes[i],n,c,e) end end if endpoints then for k, v in next, endpoints do rawset(endpoints,k,v) e = e + 1 end end -- old method if mapping then if not remapping then remapping = { } end for k, v in sortedhash(mapping) do local p = properties[k] remapping[#remapping+1] = { element = k, pdf = v.pdf or (p and p.pdf ) or pdffallback, namespace = v.namespace or (p and p.namespace) or "user", -- nature = v.nature or (p and p.nature ) or "mixed", } end mapping = nil end -- new method if remapping then for i=1,#remapping do ::continue:: local m = remapping[i] local element = m.element if element then local detail = m.detail local parent = m.parent local pdf = m.pdf if unsupported[pdf] then report("unsupported tag %a remapped to %a",pdf,pdffallback) pdf = pdffallback end if compact then for i=1,usernamecount do local ns = namespacename(detail,parent) local usernamespace = usernamespaces[ns] if not usernamespace[element] then usernamespace[element] = pdf if detail then local d = detailmapping[element] if d then d[detail] = ns else detailmapping[element] = { [detail] = ns } end elseif parent then local p = parentmapping[element] if p then p[parent] = ns else parentmapping[element] = { [parent] = ns } end else directmapping[element] = ns warnedmapping[element] = m.warning end goto continue end end end local ns = namespacename(detail,parent) local u = usernamespaces[ns] if u then u[element] = pdf else usernamespaces[ns] = { [element] = pdf } end if detail then local d = detailmapping[element] if d then d[detail] = ns else detailmapping[element] = { [detail] = ns } end elseif parent then local p = parentmapping[element] if p then p[parent] = ns else parentmapping[element] = { [parent] = ns } end else directmapping[element] = ns warnedmapping[element] = m.warning end end end c = c + #remapping -- for now end return n, c, e end local function settaggingpresets(specification) local preset = specification.preset if type(preset) == "string" then local list = settings_to_array(preset) -- just fetch a list local level = tonumber(specification.level) if level then options.level = level end if list then local n = 0 local c = 0 local e = 0 for i=1,#list do n, c, e = loadtags(list[i],n,c,e) end if n > 0 or c > 0 or e > 0 then report("%i pdf tags overloaded, %i crappy tags added, %i endpoints set, cross your fingers",n,c,e) end end end end local disclaimer = [[The structure mapping is an attempt to map the internal ConTeXt structure to PDF tags best as possible given the constraints. It can result in a degradation of the original document structure. The authors take no responsonility for that.]] local function settaggingcomment(str) comment = type(str) == "string" and str or nil end local function settaggingoption(str) local o = utilities.parsers.settings_to_hash(str) if o.interaction then handlelinks = true end end function lpdf.gettaggingstatus() if isrolemapped then if enabled then local disclaimer = pdfunicode(disclaimer) local comment = comment and pdfunicode(comment) if #profiles > 0 then return { origin = "additional basic or user maping", disclaimer = disclaimer, comment = comment, profiles = profiles, } else return { origin = "only build in default mapping", disclaimer = pdfunicode(disclaimer), comment = comment, } end end end end -- this should be a proper codeinjections: interfaces.implement { name = "settaggingpresets", actions = settaggingpresets, arguments = { { { "preset" }, { "level" }, } } } interfaces.implement { name = "settaggingcomment", actions = settaggingcomment, arguments = "string", } interfaces.implement { name = "settaggingoption", actions = settaggingoption, arguments = "string", } end function codeinjections.enabletags() if permitted and not enabled then structures.tags.handler = nodeinjections.addtags if shipout then enableaction("shipouts","structures.tags.handler") end -- -- already done when export is set: -- enableaction("math","noads.handlers.tags") -- if not embeddedtags then embedsupportedtags() end -- if trace_tags then report("enabling structure tags") end -- enabled = true version = lpdf.majorversion() -- detailedmath = detailedmath or (version < 2) -- always go for it -- bah updaters.apply("structures.tagging",version) structures.references.forceinnermode() end end function codeinjections.discardpages(state) if state == true then state = false else state = true end lpdf.setpagestate(state) end -- We now delegate as w need to push and pop anyway: do local stack = { } local level = 0 local count = 0 -- local a_taggedpar = attributes.private("taggedpar") local unsetvalue = attributes.unsetvalue local report = logs.reporter("structure","paragraphs") local trace = false trackers.register("structure.paragraphs", function(v) trace = v end) local texsetattribute = tex.setattribute local texgetattribute = tex.getattribute -- interfaces.implement { -- name = "strc_tags_parcounter_check", -- -- public = true, -- protected = true, -- noaligned = true, -- actions = function() -- if level == 0 then -- count = 1 -- level = 1 -- stack[level] = count -- texsetattribute(a_taggedpar,count,"global") -- if trace then -- report("%wforcing, new %s",level,count) -- end -- end -- end -- } interfaces.implement { name = "strc_tags_parcounter_push", -- public = true, protected = true, -- noaligned = true, actions = function() local old = texgetattribute(a_taggedpar) local new = count + 1 level = level + 1 stack[level] = old texsetattribute(a_taggedpar,new,"global") if trace then report("%wpushing, old %s, new %s",level, old == unsetvalue and "*" or old, new == unsetvalue and "*" or new ) end count = new end } interfaces.implement { name = "strc_tags_parcounter_pop", -- public = true, protected = true, -- noaligned = true, actions = function() local old = texgetattribute(a_taggedpar) local new = stack[level] or 0 if trace then report("%wpopping, old %s, new %s",level, old == unsetvalue and "*" or old, new == unsetvalue and "*" or new ) end if level > 0 then level = level - 1 else -- error end texsetattribute(a_taggedpar,new,"global") end } end