#lang racket ;;; --------------------------------------------------------------------;;; ;;; kameindex v1.1 source code ;;; ;;; Author: Antoine Bossard (Kanagawa University, Japan) ;;; ;;; Licence: MIT licence ;;; ;;; Usage: kameindex indexfile.idx [stylefile.ist] ;;; ;;; Homepage: https://www.sci.kanagawa-u.ac.jp/info/abossard/kameindex/ ;;; ;;; Copyright (c) 2018-2019 - A. Bossard ;;; ;;; --------------------------------------------------------------------;;; ; returns a list of entries, each of the form (key (pages) (ranges) (subentries)), ie. with one new or updated entry (define (insert subitems pagenumber-format type entries) ; subitems: a non-empty list of entry keys, eg. ("hello" "ecole@école" ...) ; pagenumber, format, type: page or range number, number format (eg. "textit"), entry type (0: page, 1: range...) ; entries: a list of entries, each of the form (key (pages) (ranges) (subentries)) ; search for an entry in entries whose key is the current entry key. If not found, entries-right is '() (let-values ([(entries-left entries-right) (splitf-at entries (lambda (e) (not (equal? (car subitems) (first e)))))]) (if (null? entries-right) ; create a new entry (repeat entry creation until no more subitem) (let* ([items (reverse subitems)] [new-entry (foldl (lambda (item acc) ; return a new entry with acc as (unique) subentry (list item '() '() (list acc))) ; initial value: the last entry, with page number and format (if (= type 1) (list (car items) '() (list pagenumber-format) '()) ; range (list (car items) (list pagenumber-format) '() '())) ; page number (incl. see / see also cross-ref.) (cdr items))]) ; all but the first item, which has been processed as initial value for fold (cons new-entry entries)) ; existing entry: add a new page/range number or subentry (let* ([cur-entry (car entries-right)] ; current entry (ie. the one matching (car subitems)) [new-entry ; create a new entry based on the current entry (ie. update page/range number or subentries) (if (null? (cdr subitems)) ; last subitem: recreate entry with a new page/range number (if (= type 1) ; range (list (first cur-entry) ; same key (second cur-entry) ; same page numbers (cons pagenumber-format (third cur-entry)) ; add a new range page number (fourth cur-entry)) ; same subentries ; otherwise, page number (incl. see / see also cross-ref.) (list (first cur-entry) ; same key (cons pagenumber-format (second cur-entry)) ; add a new page number (third cur-entry) ; same range page numbers (fourth cur-entry))) ; same subentries ; not last subitem: recursively update subentries (list (first cur-entry) ; same key as the current entry (second cur-entry) ; same pages as the current entry (third cur-entry) ; same ranges as the current entry (insert (cdr subitems) pagenumber-format type (fourth cur-entry))) ; updated subentries )]) (cons new-entry (append entries-left (cdr entries-right))) ; return the new entries )))) (define (merge entry) ; input: 1 entry, ie. of the form (key (pages) (ranges) (subentries)) ; output: 1 entry, with ranges merged (including ranges of subentries) (let ([ranges (third entry)]) (if (null? ranges) ; recreate entry as is, removing duplicates for pages, except for subentries that are recursively merged (list (first entry) (remove-duplicates (second entry) #:key car) '() (map merge (fourth entry))) ; otherwise (let ([range-pairs ; create ranges as (beg . end) pairs (let aux ([l (reverse ranges)] [res '()]) (if (null? l) res (aux (cddr l) (cons (cons (car l) (cadr l)) res))))]) (let-values ([(pages range-pairs) (let aux ([ranges range-pairs] [res-pages (second entry)] [res-ranges '()]) ; iterate ranges until no change (if (null? ranges) ; all range passes done. Lastly, remove page duplicates (some may have been created while looping) (values (remove-duplicates res-pages #:key car) res-ranges) ; otherwise, continue processing ranges (let* ([cur-range (car ranges)] [beg (caar cur-range)] [end (cadr cur-range)]) (if (= beg end) ; a range with same beg & end is transformed into a page number (aux (append res-ranges (cdr ranges)) (cons (car cur-range) res-pages) '()) ; reset ranges & res ; otherwise (let* (; 1. extend the current range if possible [new-beg (let ([beg-1 (sub1 beg)]) (if (memf (lambda (page) (= (car page) beg-1)) res-pages) beg-1 beg))] [new-end (let ([end+1 (add1 end)]) (if (memf (lambda (page) (= (car page) end+1)) res-pages) end+1 end))] ; 2. remove page numbers that are included in the current range [new-res-pages (filter-not (lambda (page) (let ([page-number (car page)]) (<= new-beg page-number new-end))) res-pages)] ) (if (and (= beg new-beg) (= end new-end) (= (length res-pages) (length new-res-pages))) ; no change happened (ie. pages & ranges) with respect to the current range (aux (cdr ranges) res-pages (cons cur-range res-ranges)) ; some changes happened (ie. pages or ranges): reset ranges & res (aux (append res-ranges (list (cons (cons new-beg (cdar cur-range)) (cons new-end (cddr cur-range)))) ranges) new-res-pages '()) ))))))]) ; merge consecutive & overlapping ranges (in one pass since ranges are sorted) (let* ([sorted-pairs (sort range-pairs < #:key caar)] [range-pairs (if (null? sorted-pairs) '() (reverse (foldl (lambda (r2 acc) (let* ([r2-beg (caar r2)] [r2-end (cadr r2)] [r1 (car acc)] [r1-beg (caar r1)] [r1-end (cadr r1)]) (if (> (- r2-beg r1-end) 1) ; no merge (cons r2 acc) ; else, merge r1 & r2 by creating a new range in place of both (cons (cons (cons r1-beg (cdar r1)) (if (>= r1-end r2-end) (cons r1-end (cddr r1)) (cons r2-end (cddr r2)))) (cdr acc))))) (list (car sorted-pairs)) ; initial accumulator value (cdr sorted-pairs))))]) ; recreate entry with the new ranges (and possibly page numbers) (list (first entry) pages range-pairs (map merge (fourth entry)))) ))))) (define (unescape str) (string-replace (string-replace str #rx"(?string/utf-8 (car l))] ; convert from bytes #"..." to string "..." [label (car e)] ; index entry label, eg. hello|textit [pagenumber (string->number (cadr e))] [pipesplit (regexp-split #rx"(?