2020#undef __STRICT_ANSI__
2121#endif
2222
23+ // #define LOG_EMACS_MARKER
24+
2325#include " path.h"
2426#include " utils.h"
2527
2628#include < algorithm>
29+ #include < cstdio>
2730#include < cstdlib>
31+ #ifdef LOG_EMACS_MARKER
32+ #include < iostream>
33+ #endif
2834#include < sys/stat.h>
2935#include < unordered_set>
3036#include < utility>
@@ -235,7 +241,7 @@ bool Path::isCPP(const std::string &path)
235241bool Path::acceptFile (const std::string &path, const std::set<std::string> &extra)
236242{
237243 bool header = false ;
238- return (identify (path, &header) != Standards::Language::None && !header) || extra.find (getFilenameExtension (path)) != extra.end ();
244+ return (identify (path, false , &header) != Standards::Language::None && !header) || extra.find (getFilenameExtension (path)) != extra.end ();
239245}
240246
241247// cppcheck-suppress unusedFunction
@@ -245,13 +251,99 @@ bool Path::isHeader(const std::string &path)
245251 return startsWith (extension, " .h" );
246252}
247253
248- Standards::Language Path::identify (const std::string &path, bool *header)
254+ static bool hasEmacsCppMarker (const char * path)
255+ {
256+ // TODO: identify is called three times for each file
257+ // Preprocessor::loadFiles() -> createDUI()
258+ // Preprocessor::preprocess() -> createDUI()
259+ // TokenList::createTokens() -> TokenList::determineCppC()
260+ #ifdef LOG_EMACS_MARKER
261+ std::cout << path << ' \n ' ;
262+ #endif
263+
264+ FILE *fp = fopen (path, " rt" );
265+ if (!fp)
266+ return false ;
267+ std::string buf (128 , ' \0 ' );
268+ {
269+ // TODO: read the whole first line only
270+ const char * const res = fgets (const_cast <char *>(buf.data ()), buf.size (), fp);
271+ fclose (fp);
272+ fp = nullptr ;
273+ if (!res)
274+ return false ; // failed to read file
275+ }
276+ // TODO: replace with regular expression
277+ const auto pos1 = buf.find (" -*-" );
278+ if (pos1 == std::string::npos)
279+ return false ; // no start marker
280+ const auto pos_nl = buf.find_first_of (" \r\n " );
281+ if (pos_nl != std::string::npos && (pos_nl < pos1)) {
282+ #ifdef LOG_EMACS_MARKER
283+ std::cout << path << " - Emacs marker not on the first line" << ' \n ' ;
284+ #endif
285+ return false ; // not on first line
286+ }
287+ const auto pos2 = buf.find (" -*-" , pos1 + 3 );
288+ // TODO: make sure we have read the whole line before bailing out
289+ if (pos2 == std::string::npos) {
290+ #ifdef LOG_EMACS_MARKER
291+ std::cout << path << " - Emacs marker not terminated" << ' \n ' ;
292+ #endif
293+ return false ; // no end marker
294+ }
295+ #ifdef LOG_EMACS_MARKER
296+ std::cout << " Emacs marker: '" << buf.substr (pos1, (pos2 + 3 ) - pos1) << " '" << ' \n ' ;
297+ #endif
298+ // TODO: support /* */ comments
299+ const std::string buf_trim = trim (buf); // trim whitespaces
300+ if (buf_trim[0 ] != ' /' || buf_trim[1 ] != ' /' ) {
301+ #ifdef LOG_EMACS_MARKER
302+ std::cout << path << " - Emacs marker not in a comment: '" << buf.substr (pos1, (pos2 + 3 ) - pos1) << " '" << ' \n ' ;
303+ #endif
304+ return false ; // not a comment
305+ }
306+
307+ // there are more variations with lowercase and no whitespaces
308+ // -*- C++ -*-
309+ // -*- Mode: C++; -*-
310+ // -*- Mode: C++; c-basic-offset: 8 -*-
311+ std::string marker = trim (buf.substr (pos1 + 3 , pos2 - pos1 - 3 ), " ;" );
312+ // cut off additional attributes
313+ const auto pos_semi = marker.find (' ;' );
314+ if (pos_semi != std::string::npos)
315+ marker.resize (pos_semi);
316+ findAndReplace (marker, " mode:" , " " );
317+ findAndReplace (marker, " Mode:" , " " );
318+ marker = trim (marker);
319+ if (marker == " C++" || marker == " c++" )
320+ return true ; // C++ marker found
321+
322+ // if (marker == "C" || marker == "c")
323+ // return false;
324+ #ifdef LOG_EMACS_MARKER
325+ std::cout << path << " - unmatched Emacs marker: '" << marker << " '" << ' \n ' ;
326+ #endif
327+
328+ return false ; // marker is not a C++ one
329+ }
330+
331+ Standards::Language Path::identify (const std::string &path, bool cppHeaderProbe, bool *header)
249332{
250333 // cppcheck-suppress uninitvar - TODO: FP
251334 if (header)
252335 *header = false ;
253336
254337 std::string ext = getFilenameExtension (path);
338+ // standard library headers have no extension
339+ if (cppHeaderProbe && ext.empty ()) {
340+ if (hasEmacsCppMarker (path.c_str ())) {
341+ if (header)
342+ *header = true ;
343+ return Standards::Language::CPP;
344+ }
345+ return Standards::Language::None;
346+ }
255347 if (ext == " .C" )
256348 return Standards::Language::CPP;
257349 if (c_src_exts.find (ext) != c_src_exts.end ())
@@ -262,7 +354,9 @@ Standards::Language Path::identify(const std::string &path, bool *header)
262354 if (ext == " .h" ) {
263355 if (header)
264356 *header = true ;
265- return Standards::Language::C; // treat as C for now
357+ if (cppHeaderProbe && hasEmacsCppMarker (path.c_str ()))
358+ return Standards::Language::CPP;
359+ return Standards::Language::C;
266360 }
267361 if (cpp_src_exts.find (ext) != cpp_src_exts.end ())
268362 return Standards::Language::CPP;
@@ -277,7 +371,7 @@ Standards::Language Path::identify(const std::string &path, bool *header)
277371bool Path::isHeader2 (const std::string &path)
278372{
279373 bool header;
280- (void )Path:: identify (path, &header);
374+ (void )identify (path, false , &header);
281375 return header;
282376}
283377
0 commit comments