Source code for swh.loader.cvs.cvs2gitdump.cvs2gitdump

#!/usr/local/bin/python

#
# Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

# Usage
#
#   First import:
#   % git init --bare /git/openbsd.git
#   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
#       > openbsd.dump
#   % git --git-dir /git/openbsd.git fast-import < openbsd.dump
#
#   Periodic import:
#   % sudo cvsync
#   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
#       /git/openbsd.git > openbsd2.dump
#   % git --git-dir /git/openbsd.git fast-import < openbsd2.dump
#

from collections import defaultdict
import copy
import getopt
import os
import re
import subprocess
import sys
import time
from typing import Dict, List, Optional, Tuple, TypeVar

import swh.loader.cvs.rcsparse as rcsparse

CHANGESET_FUZZ_SEC = 300



[docs]
def usage():
    print('usage: cvs2gitdump [-ah] [-z fuzz] [-e email_domain] '
          '[-E log_encodings]\n'
          '\t[-k rcs_keywords] [-b branch] [-m module] [-l last_revision]\n'
          '\tcvsroot [git_dir]', file=sys.stderr)




[docs]
def main() -> None:
    email_domain = None
    do_incremental = False
    git_tip = None
    git_branch = 'master'
    dump_all = False
    log_encoding = 'utf-8,iso-8859-1'
    rcs = RcsKeywords()
    modules = []
    last_revision = None
    fuzzsec = CHANGESET_FUZZ_SEC

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'ab:hm:z:e:E:k:t:l:')
        for opt, v in opts:
            if opt == '-z':
                fuzzsec = int(v)
            elif opt == '-e':
                email_domain = v
            elif opt == '-a':
                dump_all = True
            elif opt == '-b':
                git_branch = v
            elif opt == '-E':
                log_encoding = v
            elif opt == '-k':
                rcs.add_id_keyword(v)
            elif opt == '-m':
                if v == '.git':
                    print('Cannot handle the path named \'.git\'',
                          file=sys.stderr)
                    sys.exit(1)
                modules.append(v)
            elif opt == '-l':
                last_revision = v
            elif opt == '-h':
                usage()
                sys.exit(1)
    except getopt.GetoptError as msg:
        print(msg, file=sys.stderr)
        usage()
        sys.exit(1)

    if len(args) == 0 or len(args) > 2:
        usage()
        sys.exit(1)

    log_encodings = log_encoding.split(',')

    cvsroot = args[0]
    while cvsroot[-1] == '/':
        cvsroot = cvsroot[:-1]

    if len(args) == 2:
        do_incremental = True
        git = subprocess.Popen(
            ['git', '--git-dir=' + args[1], '-c',
             'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
             '--date=raw', '--format=%ae%n%ad%n%H', git_branch],
            encoding='utf-8', stdout=subprocess.PIPE)
        assert git.stdout is not None
        outs = git.stdout.readlines()
        git.wait()
        if git.returncode != 0:
            print("Couldn't exec git", file=sys.stderr)
            sys.exit(git.returncode)
        git_tip = outs[2].strip()

        if last_revision is not None:
            git = subprocess.Popen(
                ['git', '--git-dir=' + args[1], '-c',
                 'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
                 '--date=raw', '--format=%ae%n%ad%n%H', last_revision],
                encoding='utf-8', stdout=subprocess.PIPE)
            assert git.stdout is not None
            outs = git.stdout.readlines()
            git.wait()
            if git.returncode != 0:
                print("Coundn't exec git", file=sys.stderr)
                sys.exit(git.returncode)
        last_author = outs[0].strip()
        last_ctime = float(outs[1].split()[0])

        # strip off the domain part from the last author since cvs doesn't have
        # the domain part.
        if do_incremental and email_domain is not None and \
                last_author.lower().endswith(('@' + email_domain).lower()):
            last_author = last_author[:-1 * (1 + len(email_domain))]

    cvs = CvsConv(cvsroot, rcs, not do_incremental, fuzzsec)
    print('** walk cvs tree', file=sys.stderr)
    if len(modules) == 0:
        cvs.walk()
    else:
        for module in modules:
            cvs.walk(module)

    changesets = sorted(cvs.changesets)
    nchangesets = len(changesets)
    print('** cvs has %d changeset' % (nchangesets), file=sys.stderr)

    if nchangesets <= 0:
        sys.exit(0)

    if not dump_all:
        # don't use last 10 minutes for safety
        max_time_max = changesets[-1].max_time - 600
    else:
        max_time_max = changesets[-1].max_time

    found_last_revision = False
    markseq = cvs.markseq
    extags = set()
    for k in changesets:
        if do_incremental and not found_last_revision:
            if k.min_time == last_ctime and k.author == last_author:
                found_last_revision = True
            for tag in k.tags:
                extags.add(tag)
            continue
        if k.max_time > max_time_max:
            break

        marks = {}

        for f in k.revs:
            if not do_incremental:
                marks[f.markseq] = f
            else:
                markseq = markseq + 1
                git_dump_file(f.path, f.rev, rcs, markseq)
                marks[markseq] = f
        log = rcsparse.rcsfile(k.revs[0].path).getlog(k.revs[0].rev)
        for i, e in enumerate(log_encodings):
            try:
                how = 'ignore' if i == len(log_encodings) - 1 else 'strict'
                log_str = log.decode(e, how)
                break
            except UnicodeError:
                pass
        log = log_str.encode('utf-8', 'ignore')

        output('commit refs/heads/' + git_branch)
        markseq = markseq + 1
        output('mark :%d' % (markseq))
        email = k.author if email_domain is None \
            else k.author + '@' + email_domain
        output('author %s <%s> %d +0000' % (k.author, email, k.min_time))
        output('committer %s <%s> %d +0000' % (k.author, email, k.min_time))

        output('data', len(log))
        output(log, end='')
        if do_incremental and git_tip is not None:
            output('from', git_tip)
            git_tip = None

        for m in marks:
            f = marks[m]
            mode = 0o100755 if os.access(f.path, os.X_OK) else 0o100644
            fn = file_path(cvs.cvsroot, f.path)
            if f.state == 'dead':
                output('D', fn)
            else:
                output('M %o :%d %s' % (mode, m, fn))
        output('')
        for tag in k.tags:
            if tag in extags:
                continue
            output('reset refs/tags/%s' % (tag))
            output('from :%d' % (markseq))
            output('')

    if do_incremental and not found_last_revision:
        raise Exception('could not find the last revision')

    print('** dumped', file=sys.stderr)



#
# Encode by UTF-8 always for string objects since encoding for git-fast-import
# is UTF-8.  Also write without conversion for a bytes object (file bodies
# might be various encodings)
#

[docs]
def output(*args, end='\n') -> None:
    if len(args) == 0:
        pass
    elif len(args) > 1 or isinstance(args[0], str):
        lines = ' '.join(
            [arg if isinstance(arg, str) else str(arg) for arg in args])
        sys.stdout.buffer.write(lines.encode('utf-8'))
    else:
        sys.stdout.buffer.write(args[0])
    if len(end) > 0:
        sys.stdout.buffer.write(end.encode('utf-8'))




[docs]
class FileRevision:
    def __init__(self, path: bytes, rev: str, state: str, markseq: int) -> None:
        self.path = path
        self.rev = rev
        self.state = state
        self.markseq = markseq




[docs]
class ChangeSetKey:
    def __init__(
            self,
            branch: str,
            author,
            timestamp: int,
            log: bytes,
            commitid: Optional[str],
            fuzzsec: int
    ) -> None:
        self.branch = branch
        self.author = author
        self.min_time = timestamp
        self.max_time = timestamp
        self.commitid = commitid
        self.fuzzsec = fuzzsec
        self.revs: List[FileRevision] = []
        self.tags: List[str] = []
        self.log_hash = 0
        h = 0
        for c in log:
            h = 31 * h + c
        self.log_hash = h

    def __lt__(self, other) -> bool:
        return self._cmp(other) < 0

    def __gt__(self, other) -> bool:
        return self._cmp(other) > 0

    def __eq__(self, other) -> bool:
        return self._cmp(other) == 0

    def __le__(self, other) -> bool:
        return self._cmp(other) <= 0

    def __ge__(self, other) -> bool:
        return self._cmp(other) >= 0

    def __ne__(self, other) -> bool:
        return self._cmp(other) != 0

    def _cmp(self, anon) -> int:
        if not isinstance(anon, ChangeSetKey):
            raise TypeError()
        # compare by the commitid
        cid = _cmp2(self.commitid, anon.commitid)
        if cid == 0 and self.commitid is not None:
            # both have commitid and they are same
            return 0

        # compare by the time
        ma = anon.min_time - self.max_time
        mi = self.min_time - anon.max_time
        ct = self.min_time - anon.min_time
        if ma > self.fuzzsec or mi > self.fuzzsec:
            return ct

        if cid != 0:
            # only one has the commitid, this means different commit
            return cid if ct == 0 else ct

        # compare by log, branch and author
        c = _cmp2(self.log_hash, anon.log_hash)
        if c == 0:
            c = _cmp2(self.branch, anon.branch)
        if c == 0:
            c = _cmp2(self.author, anon.author)
        if c == 0:
            return 0

        return ct if ct != 0 else c


[docs]
    def merge(self, anot: "ChangeSetKey") -> None:
        self.max_time = max(self.max_time, anot.max_time)
        self.min_time = min(self.min_time, anot.min_time)
        self.revs.extend(anot.revs)


    def __hash__(self) -> int:
        return hash(self.branch + '/' + self.author) * 31 + self.log_hash


[docs]
    def put_file(self, path: bytes, rev: str, state: str, markseq: int):
        self.revs.append(FileRevision(path, rev, state, markseq))




TCmp = TypeVar("TCmp", int, str)
def _cmp2(a: Optional[TCmp], b: Optional[TCmp]) -> int:
    _a = a is not None
    _b = b is not None
    return (a > b) - (a < b) if _a and _b else (_a > _b) - (_a < _b)  # type: ignore



[docs]
class CvsConv:
    def __init__(self, cvsroot: str, rcs: "RcsKeywords", dumpfile: bool, fuzzsec: int) -> None:
        self.cvsroot = cvsroot
        self.rcs = rcs
        self.changesets: Dict[ChangeSetKey, ChangeSetKey] = dict()
        self.dumpfile = dumpfile
        self.markseq = 0
        self.tags: Dict[str, ChangeSetKey] = dict()
        self.fuzzsec = fuzzsec


[docs]
    def walk(self, module: Optional[str] =None) -> None:
        p = [self.cvsroot]
        if module is not None:
            p.append(module)
        path = os.path.join(*p)

        for root, dirs, files in os.walk(os.fsencode(path)):
            if b'.git' in dirs:
                print('Ignore %s: cannot handle the path named \'.git\'' % (
                      os.path.join(root, b'.git')), file=sys.stderr)
                dirs.remove(b'.git')
            if b'.git' in files:
                print('Ignore %s: cannot handle the path named \'.git\'' % (
                      os.path.join(root, b'.git')), file=sys.stderr)
                files.remove(b'.git')
            for f in files:
                if not f[-2:] == b',v':
                    continue
                self.parse_file(os.path.join(root, f))

        for t, c in list(self.tags.items()):
            c.tags.append(t)



[docs]
    def parse_file(self, path: str) -> None:
        rtags: Dict[str, List[str]] = defaultdict(list)
        rcsfile = rcsparse.rcsfile(path)

        branches = {'1': 'HEAD', '1.1.1': 'VENDOR'}

        for k, v_ in list(rcsfile.symbols.items()):
            r = v_.split('.')
            if len(r) == 3:
                branches[v_] = 'VENDOR'
            elif len(r) >= 3 and r[-2] == '0':
                branches['.'.join(r[:-2] + r[-1:])] = k
            elif len(r) == 2 and branches.get(r[0]) == 'HEAD':
                rtags[v_].append(k)

        revs: List[Tuple[str, Tuple[str, int, str, str, List[str], str, str]]] = list(rcsfile.revs.items())
        # sort by revision descending to priorize 1.1.1.1 than 1.1
        revs.sort(key=lambda a: a[1][0], reverse=True)
        # sort by time
        revs.sort(key=lambda a: a[1][1])
        novendor = False
        have_initial_revision = False
        last_vendor_status = None
        for k, v in revs:
            r = k.split('.')
            if len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1' \
                    and r[3] == '1':
                if have_initial_revision:
                    continue
                if v[3] == 'dead':
                    continue
                last_vendor_status = v[3]
                have_initial_revision = True
            elif len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1':
                if novendor:
                    continue
                last_vendor_status = v[3]
            elif len(r) == 2:
                # ensure revision targets head branch
                branches[r[0]] = 'HEAD'
                if r[0] == '1' and r[1] == '1':
                    if have_initial_revision:
                        continue
                    if v[3] == 'dead':
                        continue
                    have_initial_revision = True
                elif r[0] == '1' and r[1] != '1':
                    novendor = True
                if last_vendor_status == 'dead' and v[3] == 'dead':
                    last_vendor_status = None
                    continue
                last_vendor_status = None
            else:
                # trunk only
                continue

            if self.dumpfile:
                self.markseq = self.markseq + 1
                git_dump_file(path, k, self.rcs, self.markseq)

            b = '.'.join(r[:-1])
            try:
                a = ChangeSetKey(
                    branches[b], v[2], v[1], rcsfile.getlog(v[0]), v[6],
                    self.fuzzsec)
            except Exception as e:
                print('Aborted at %s %s' % (path, v[0]), file=sys.stderr)
                raise e

            a.put_file(path, k, v[3], self.markseq)
            while a in self.changesets:
                c = self.changesets[a]
                del self.changesets[a]
                c.merge(a)
                a = c
            self.changesets[a] = a
            if k in rtags:
                for t in rtags[k]:
                    if t not in self.tags or \
                            self.tags[t].max_time < a.max_time:
                        self.tags[t] = a





[docs]
def file_path(r: bytes, p: bytes) -> bytes:
    if r.endswith(b'/'):
        r = r[:-1]
    if p[-2:] == b',v':
        path = p[:-2]               # drop ",v"
    else:
        path = p
    p_ = path.split(b'/')
    if len(p_) > 0 and p_[-2] == b'Attic':
        path = b'/'.join(p_[:-2] + [p_[-1]])
    if path.startswith(r):
        path = path[len(r) + 1:]
    return path




[docs]
def git_dump_file(path: str, k, rcs, markseq) -> None:
    try:
        cont = rcs.expand_keyword(path, rcsparse.rcsfile(path), k, [])
    except RuntimeError as msg:
        print('Unexpected runtime error on parsing',
              path, k, ':', msg, file=sys.stderr)
        print('unlimit the resource limit may fix this problem.',
              file=sys.stderr)
        sys.exit(1)
    output('blob')
    output('mark :%d' % markseq)
    output('data', len(cont))
    output(cont)




[docs]
class RcsKeywords:
    RCS_KW_AUTHOR   = (1 << 0)
    RCS_KW_DATE     = (1 << 1)
    RCS_KW_LOG      = (1 << 2)
    RCS_KW_NAME     = (1 << 3)
    RCS_KW_RCSFILE  = (1 << 4)
    RCS_KW_REVISION = (1 << 5)
    RCS_KW_SOURCE   = (1 << 6)
    RCS_KW_STATE    = (1 << 7)
    RCS_KW_FULLPATH = (1 << 8)
    RCS_KW_MDOCDATE = (1 << 9)
    RCS_KW_LOCKER   = (1 << 10)

    RCS_KW_ID       = (RCS_KW_RCSFILE | RCS_KW_REVISION | RCS_KW_DATE |
                       RCS_KW_AUTHOR | RCS_KW_STATE)
    RCS_KW_HEADER   = (RCS_KW_ID | RCS_KW_FULLPATH)

    rcs_expkw = {
        b"Author":   RCS_KW_AUTHOR,
        b"Date":     RCS_KW_DATE,
        b"Header":   RCS_KW_HEADER,
        b"Id":       RCS_KW_ID,
        b"Log":      RCS_KW_LOG,
        b"Name":     RCS_KW_NAME,
        b"RCSfile":  RCS_KW_RCSFILE,
        b"Revision": RCS_KW_REVISION,
        b"Source":   RCS_KW_SOURCE,
        b"State":    RCS_KW_STATE,
        b"Mdocdate": RCS_KW_MDOCDATE,
        b"Locker":   RCS_KW_LOCKER
    }

    RCS_KWEXP_NONE    = (1 << 0)
    RCS_KWEXP_NAME    = (1 << 1)    # include keyword name
    RCS_KWEXP_VAL     = (1 << 2)    # include keyword value
    RCS_KWEXP_LKR     = (1 << 3)    # include name of locker
    RCS_KWEXP_OLD     = (1 << 4)    # generate old keyword string
    RCS_KWEXP_ERR     = (1 << 5)    # mode has an error
    RCS_KWEXP_DEFAULT = (RCS_KWEXP_NAME | RCS_KWEXP_VAL)
    RCS_KWEXP_KVL     = (RCS_KWEXP_NAME | RCS_KWEXP_VAL | RCS_KWEXP_LKR)

    def __init__(self) -> None:
        self.rerecomple()


[docs]
    def rerecomple(self) -> None:
        pat = b'|'.join(list(self.rcs_expkw.keys()))
        self.re_kw = re.compile(b".*?\\$(" + pat + b")[\\$:]")



[docs]
    def add_id_keyword(self, keyword) -> None:
        self.rcs_expkw[keyword.encode('ascii')] = self.RCS_KW_ID
        self.rerecomple()



[docs]
    def kflag_get(self, flags: Optional[str]) -> int:
        if flags is None:
            return self.RCS_KWEXP_DEFAULT
        fl = 0
        for fc in flags:
            if fc == 'k':
                fl |= self.RCS_KWEXP_NAME
            elif fc == 'v':
                fl |= self.RCS_KWEXP_VAL
            elif fc == 'l':
                fl |= self.RCS_KWEXP_LKR
            elif fc == 'o':
                if len(flags) != 1:
                    fl |= self.RCS_KWEXP_ERR
                fl |= self.RCS_KWEXP_OLD
            elif fc == 'b':
                if len(flags) != 1:
                    fl |= self.RCS_KWEXP_ERR
                fl |= self.RCS_KWEXP_NONE
            else:
                fl |= self.RCS_KWEXP_ERR
        return fl



[docs]
    def expand_keyword(self, filename: str, rcs: rcsparse.rcsfile, r: str, excluded_keywords: List[str], filename_encoding="utf-8") -> bytes:
        """
        Check out a file with keywords expanded. Expansion rules are specific
        to each keyword, and some cases specific to undocumented behaviour of CVS.
        Our implementation does not expand some keywords (see comments in the code).
        For a list of keywords and their expansion rules, see:
        https://www.gnu.org/software/trans-coord/manual/cvs/cvs.html#Keyword-list
        (also available in 'info cvs' if cvs is installed)
        """
        rev = rcs.revs[r]

        mode = self.kflag_get(rcs.expand)
        if (mode & (self.RCS_KWEXP_NONE | self.RCS_KWEXP_OLD)) != 0:
            return rcs.checkout(rev[0])

        ret = []
        for line in rcs.checkout(rev[0]).splitlines(keepends=True):
            logbuf = None
            m = self.re_kw.match(line)
            if m is None:
                # No RCS Keywords, use it as it is
                ret.append(line)
                continue

            expkw = 0
            line0 = b''
            while m is not None:
                logbuf = None
                try:
                    dsign = m.end(1) + line[m.end(1):].index(b'$')
                except ValueError:
                    # No RCS Keywords, use it as it is
                    ret.append(line)
                    break
                prefix = line[:m.start(1) - 1]
                next_match_segment = copy.deepcopy(line[dsign:])
                expbuf = ''
                try:
                    kwname = m.group(1).decode('ascii')
                except UnicodeDecodeError:
                    # Not a valid RCS keyword, use it as it is
                    ret.append(line)
                    break
                if kwname in excluded_keywords:
                    line0 += prefix + m.group(1)
                    m = self.re_kw.match(next_match_segment)
                    if m:
                        line = next_match_segment
                        continue
                    else:
                        ret.append(line0 + line[dsign + 1:])
                        break
                line = line[dsign + 1:]
                if (mode & self.RCS_KWEXP_NAME) != 0:
                    expbuf += '$%s' % kwname
                    if (mode & self.RCS_KWEXP_VAL) != 0:
                        expbuf += ': '
                if (mode & self.RCS_KWEXP_VAL) != 0:
                    expkw = self.rcs_expkw[m.group(1)]
                    if (expkw & self.RCS_KW_RCSFILE) != 0:
                        expbuf += filename \
                            if (expkw & self.RCS_KW_FULLPATH) != 0 \
                            else os.path.basename(filename)
                        expbuf += " "
                    if (expkw & self.RCS_KW_REVISION) != 0:
                        expbuf += rev[0]
                        expbuf += " "
                    if (expkw & self.RCS_KW_DATE) != 0:
                        expbuf += time.strftime(
                            "%Y/%m/%d %H:%M:%S ", time.gmtime(rev[1]))
                    if (expkw & self.RCS_KW_MDOCDATE) != 0:
                        d = time.gmtime(rev[1])
                        expbuf += time.strftime(
                            "%B%e %Y " if (d.tm_mday < 10) else "%B %e %Y ", d)
                    if (expkw & self.RCS_KW_AUTHOR) != 0:
                        expbuf += rev[2]
                        expbuf += " "
                    if (expkw & self.RCS_KW_STATE) != 0:
                        expbuf += rev[3]
                        expbuf += " "
                    if (expkw & self.RCS_KW_LOG) != 0:
                        # Unlike other keywords, the Log keyword expands over multiple lines.
                        # The terminating '$' of the Log keyword appears on the line which
                        # contains the log keyword itself. Then follow all log message lines,
                        # and those lines are followed by content which follows the Log keyword.
                        # For example, the line:
                        #
                        #    foo $Log$content which follows
                        #
                        # will be expanded like this by CVS:
                        #
                        #   foo $Log: delta,v $
                        #   foo Revision 1.2  2021/11/29 14:24:18  stsp
                        #   foo log message line 1
                        #   foo log message line 2
                        #   foocontent which follows
                        #
                        # (Side note: Trailing whitespace is stripped from "foo " when
                        # the content which follows gets written to the output file.)
                        #
                        # If we did not trim the Log keyword's trailing "$" here then
                        # the last line would read instead:
                        #
                        #   foo$content which follows
                        assert(next_match_segment[0] == ord('$'))
                        next_match_segment = next_match_segment[1:]
                        expbuf += filename \
                            if (expkw & self.RCS_KW_FULLPATH) != 0 \
                            else os.path.basename(filename)
                        expbuf += " "
                        logbuf = prefix + (
                            'Revision %s  %s  %s\n' % (
                                rev[0], time.strftime(
                                    "%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
                                rev[2])).encode('ascii')
                        for lline in rcs.getlog(rev[0]).splitlines(keepends=True):
                            logbuf += prefix + lline
                    if (expkw & self.RCS_KW_SOURCE) != 0:
                        expbuf += filename
                        expbuf += " "
                    if (expkw & (self.RCS_KW_NAME | self.RCS_KW_LOCKER)) != 0:
                        # We do not expand Name and Locker keywords.
                        # The Name keyword is only expanded when a file is checked
                        # out with an explicit tag name .perhaps this will be needed
                        # if the loader learns about CVS tags some day.
                        # The Locker keyword only expands if the file is currently
                        # locked via 'cvs admin -l', which is not part of the
                        # information we want to preserve about source code.
                        expbuf += " "
                if (mode & self.RCS_KWEXP_NAME) != 0:
                    expbuf += '$'
                if logbuf is not None:
                    ret.append(prefix + expbuf.encode(filename_encoding) + b'\n' + logbuf)
                else:
                    line0 += prefix + expbuf[:255].encode(filename_encoding)
                m = self.re_kw.match(next_match_segment)
                if m:
                    line = next_match_segment
                    if (mode & self.RCS_KWEXP_NAME) != 0 and expkw and (expkw & self.RCS_KW_LOG) == 0 and line0[-1] == ord('$'):
                        # There is another keyword on this line that needs expansion.
                        # Avoid a double "$$" in the expanded string. This $ terminates
                        # the previous keyword and marks the beginning of the next one.
                        line0 = line0[:-1]
                elif logbuf is not None:
                    # Trim whitespace from tail of prefix if appending a suffix which
                    # followed the Log keyword on the same line.
                    # Testing suggests that this matches CVS's behaviour.
                    ret.append(line0 + prefix.rstrip() + line)
                else:
                    ret.append(line0 + line)
        return b''.join(ret)




# ----------------------------------------------------------------------
# entry point
# ----------------------------------------------------------------------
if __name__ == '__main__':
    main()