Some languages have the flexibility to use upper or lower case
characters interchangeably in integer and float literal definitions.
I'd like to be able to enforce a consistent case style in one of my
projects, so I added this clang-format style option to control it.
With this .clang-format configuration:
```yaml
NumericLiteralCaseStyle:
UpperCasePrefix: Never
UpperCaseHexDigit: Always
UpperCaseSuffix: Never
```
This line of code:
```C
unsigned long long 0XdEaDbEeFUll;
```
gets reformatted into this line of code:
```C
unsigned long long 0xDEAFBEEFull;
```
-----
I'm new to this project, so please let me know if I missed something in
the process. I modeled this PR from
[IntegerLiteralSeparatorFixer](https://reviews.llvm.org/D140543)
178 lines
5.9 KiB
C++
178 lines
5.9 KiB
C++
//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This file implements NumericLiteralCaseFixer that standardizes character
|
|
/// case within numeric literals.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "NumericLiteralCaseFixer.h"
|
|
#include "NumericLiteralInfo.h"
|
|
|
|
#include "llvm/ADT/StringExtras.h"
|
|
|
|
#include <algorithm>
|
|
|
|
namespace clang {
|
|
namespace format {
|
|
|
|
static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) {
|
|
// Check if language is supported.
|
|
switch (Style.Language) {
|
|
case FormatStyle::LK_C:
|
|
case FormatStyle::LK_Cpp:
|
|
case FormatStyle::LK_ObjC:
|
|
case FormatStyle::LK_CSharp:
|
|
case FormatStyle::LK_Java:
|
|
case FormatStyle::LK_JavaScript:
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
// Check if style options are set.
|
|
const auto &Option = Style.NumericLiteralCase;
|
|
const auto Leave = FormatStyle::NLCS_Leave;
|
|
return Option.Prefix != Leave || Option.HexDigit != Leave ||
|
|
Option.ExponentLetter != Leave || Option.Suffix != Leave;
|
|
}
|
|
|
|
static std::string
|
|
transformComponent(StringRef Component,
|
|
FormatStyle::NumericLiteralComponentStyle ConfigValue) {
|
|
switch (ConfigValue) {
|
|
case FormatStyle::NLCS_Upper:
|
|
return Component.upper();
|
|
case FormatStyle::NLCS_Lower:
|
|
return Component.lower();
|
|
default:
|
|
// Covers FormatStyle::NLCS_Leave.
|
|
return Component.str();
|
|
}
|
|
}
|
|
|
|
/// Test if Suffix matches a C++ literal reserved by the library.
|
|
/// Matches against all suffixes reserved in the C++23 standard.
|
|
static bool matchesReservedSuffix(StringRef Suffix) {
|
|
static constexpr std::array<StringRef, 11> SortedReservedSuffixes = {
|
|
"d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y",
|
|
};
|
|
|
|
// This can be static_assert when we have access to constexpr is_sorted in
|
|
// C++ 20.
|
|
assert(llvm::is_sorted(SortedReservedSuffixes) &&
|
|
"Must be sorted as precondition for lower_bound().");
|
|
|
|
auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix);
|
|
if (entry == SortedReservedSuffixes.cend())
|
|
return false;
|
|
return *entry == Suffix;
|
|
}
|
|
|
|
static std::string format(StringRef NumericLiteral, const FormatStyle &Style) {
|
|
const char Separator = Style.isCpp() ? '\'' : '_';
|
|
const NumericLiteralInfo Info(NumericLiteral, Separator);
|
|
const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos;
|
|
const bool HasExponent = Info.ExponentLetterPos != StringRef::npos;
|
|
const bool HasSuffix = Info.SuffixPos != StringRef::npos;
|
|
|
|
std::string Formatted;
|
|
|
|
if (HasBaseLetter) {
|
|
Formatted +=
|
|
transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos),
|
|
Style.NumericLiteralCase.Prefix);
|
|
}
|
|
// Reformat this slice as HexDigit whether or not the digit has hexadecimal
|
|
// characters because binary/decimal/octal digits are unchanged.
|
|
Formatted += transformComponent(
|
|
NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0,
|
|
HasExponent ? Info.ExponentLetterPos
|
|
: HasSuffix ? Info.SuffixPos
|
|
: NumericLiteral.size()),
|
|
Style.NumericLiteralCase.HexDigit);
|
|
|
|
if (HasExponent) {
|
|
Formatted += transformComponent(
|
|
NumericLiteral.slice(Info.ExponentLetterPos,
|
|
HasSuffix ? Info.SuffixPos
|
|
: NumericLiteral.size()),
|
|
Style.NumericLiteralCase.ExponentLetter);
|
|
}
|
|
|
|
if (HasSuffix) {
|
|
StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos);
|
|
if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') {
|
|
// In C++, it is idiomatic, but NOT standardized to define user-defined
|
|
// literals with a leading '_'. Omit user defined literals and standard
|
|
// reserved suffixes from transformation.
|
|
Formatted += Suffix.str();
|
|
} else {
|
|
Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix);
|
|
}
|
|
}
|
|
|
|
return Formatted;
|
|
}
|
|
|
|
std::pair<tooling::Replacements, unsigned>
|
|
NumericLiteralCaseFixer::process(const Environment &Env,
|
|
const FormatStyle &Style) {
|
|
if (!isNumericLiteralCaseFixerNeeded(Style))
|
|
return {};
|
|
|
|
const auto &SourceMgr = Env.getSourceManager();
|
|
AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
|
|
|
|
const auto ID = Env.getFileID();
|
|
const auto LangOpts = getFormattingLangOpts(Style);
|
|
Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
|
|
Lex.SetCommentRetentionState(true);
|
|
|
|
Token Tok;
|
|
tooling::Replacements Result;
|
|
|
|
for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
|
|
// Skip tokens that are too small to contain a formattable literal.
|
|
// Size=2 is the smallest possible literal that could contain formattable
|
|
// components, for example "1u".
|
|
auto Length = Tok.getLength();
|
|
if (Length < 2)
|
|
continue;
|
|
|
|
// Service clang-format off/on comments.
|
|
auto Location = Tok.getLocation();
|
|
auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
|
|
if (Tok.is(tok::comment)) {
|
|
if (isClangFormatOff(Text))
|
|
Skip = true;
|
|
else if (isClangFormatOn(Text))
|
|
Skip = false;
|
|
continue;
|
|
}
|
|
|
|
if (Skip || Tok.isNot(tok::numeric_constant) ||
|
|
!AffectedRangeMgr.affectsCharSourceRange(
|
|
CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
|
|
continue;
|
|
}
|
|
|
|
const auto Formatted = format(Text, Style);
|
|
if (Formatted != Text) {
|
|
cantFail(Result.add(
|
|
tooling::Replacement(SourceMgr, Location, Length, Formatted)));
|
|
}
|
|
}
|
|
|
|
return {Result, 0};
|
|
}
|
|
|
|
} // namespace format
|
|
} // namespace clang
|