# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import functools
import json
from typing import Dict
from pygments.lexers import get_all_lexers, get_lexer_for_filename
from pygments.util import ClassNotFound
from django.contrib.staticfiles.finders import find
@functools.lru_cache()
def _hljs_languages_data():
with open(str(find("json/highlightjs-languages.json")), "r") as hljs_languages_file:
return json.load(hljs_languages_file)
# set of languages ids that can be highlighted by highlight.js library
@functools.lru_cache()
def _hljs_languages():
return set(_hljs_languages_data()["languages"])
# languages aliases defined in highlight.js
@functools.lru_cache()
def _hljs_languages_aliases():
language_aliases = _hljs_languages_data()["languages_aliases"]
language_aliases.pop("robots.txt", None)
return {
**language_aliases,
"ml": "ocaml",
"bsl": "1c",
"ep": "mojolicious",
"lc": "livecode",
"p": "parser3",
"pde": "processing",
"rsc": "routeros",
"s": "armasm",
"sl": "rsl",
"4dm": "4d",
"kaos": "chaos",
"dfy": "dafny",
"ejs": "eta",
"nev": "never",
"m": "octave",
"shader": "hlsl",
"fx": "hlsl",
"prg": "xsharp",
"xs": "xsharp",
"scd": "sclang",
"c3t": "c3",
"cbl": "cobol",
"lkml": "lookml",
"sdm": "sdml",
"pow": "poweron",
"ttcn": "ttcn3",
"ttcnpp": "ttcn3",
}
# dictionary mapping pygment lexers to hljs languages
_pygments_lexer_to_hljs_language: Dict[str, str] = {}
# dictionary mapping mime types to hljs languages
_mime_type_to_hljs_language = {
"text/x-c": "c",
"text/x-c++": "cpp",
"text/x-msdos-batch": "dos",
"text/x-lisp": "lisp",
"text/x-shellscript": "bash",
}
# dictionary mapping filenames to hljs languages
_filename_to_hljs_language = {
"cmakelists.txt": "cmake",
".htaccess": "apache",
"httpd.conf": "apache",
"access.log": "accesslog",
"nginx.log": "accesslog",
"resolv.conf": "dns",
"dockerfile": "docker",
"nginx.conf": "nginx",
"pf.conf": "pf",
"robots.txt": "robots-txt",
}
# function to fill the above dictionaries
def _init_pygments_to_hljs_map():
if len(_pygments_lexer_to_hljs_language) == 0:
hljs_languages = _hljs_languages()
hljs_languages_aliases = _hljs_languages_aliases()
for lexer in get_all_lexers():
lexer_name = lexer[0]
lang_aliases = lexer[1]
lang_mime_types = lexer[3]
lang = None
for lang_alias in lang_aliases:
if lang_alias in hljs_languages:
lang = lang_alias
_pygments_lexer_to_hljs_language[lexer_name] = lang_alias
break
if lang_alias in hljs_languages_aliases:
lang = hljs_languages_aliases[lang_alias]
_pygments_lexer_to_hljs_language[lexer_name] = lang_alias
break
if lang:
for lang_mime_type in lang_mime_types:
if lang_mime_type not in _mime_type_to_hljs_language:
_mime_type_to_hljs_language[lang_mime_type] = lang
[docs]
def get_hljs_language_from_filename(filename):
"""Function that tries to associate a language supported by highlight.js
from a filename.
Args:
filename: input filename
Returns:
highlight.js language id or None if no correspondence has been found
"""
_init_pygments_to_hljs_map()
if filename:
filename_lower = filename.lower()
if filename_lower in _filename_to_hljs_language:
return _filename_to_hljs_language[filename_lower]
if filename_lower in _hljs_languages():
return filename_lower
exts = filename_lower.split(".")
# check if file extension matches an hljs language
# also handle .ext.in cases
for ext in reversed(exts[-2:]):
if ext in _hljs_languages():
return ext
if ext in _hljs_languages_aliases():
return _hljs_languages_aliases()[ext]
# otherwise use Pygments language database
lexer = None
# try to find a Pygment lexer
try:
lexer = get_lexer_for_filename(filename)
except ClassNotFound:
pass
# if there is a correspondence between the lexer and an hljs
# language, return it
if lexer and lexer.name in _pygments_lexer_to_hljs_language:
return _pygments_lexer_to_hljs_language[lexer.name]
# otherwise, try to find a match between the file extensions
# associated to the lexer and the hljs language aliases
if lexer:
exts = [ext.replace("*.", "") for ext in lexer.filenames]
for ext in exts:
if ext in _hljs_languages_aliases():
return _hljs_languages_aliases()[ext]
return None
[docs]
def get_hljs_language_from_mime_type(mime_type):
"""Function that tries to associate a language supported by highlight.js
from a mime type.
Args:
mime_type: input mime type
Returns:
highlight.js language id or None if no correspondence has been found
"""
_init_pygments_to_hljs_map()
if mime_type and mime_type in _mime_type_to_hljs_language:
return _mime_type_to_hljs_language[mime_type]
return None
[docs]
@functools.lru_cache()
def get_supported_languages():
"""
Return the list of programming languages that can be highlighted using the
highlight.js library.
Returns:
List[str]: the list of supported languages
"""
return sorted(list(_hljs_languages()))